Print this page
11490 SRS ring polling disabled for VLANs
11491 Want DLS bypass for VLAN traffic
11492 add VLVF bypass to ixgbe core
2869 duplicate packets with vnics over aggrs
11489 DLS stat delete and aggr kstat can deadlock
Portions contributed by: Theo Schlossnagle <jesus@omniti.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/ip/ip_input.c
+++ new/usr/src/uts/common/inet/ip/ip_input.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 *
25 25 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 + * Copyright 2018 Joyent, Inc.
26 27 */
27 28 /* Copyright (c) 1990 Mentat Inc. */
28 29
29 30 #include <sys/types.h>
30 31 #include <sys/stream.h>
31 32 #include <sys/dlpi.h>
32 33 #include <sys/stropts.h>
33 34 #include <sys/sysmacros.h>
34 35 #include <sys/strsubr.h>
35 36 #include <sys/strlog.h>
36 37 #include <sys/strsun.h>
37 38 #include <sys/zone.h>
38 39 #define _SUN_TPI_VERSION 2
39 40 #include <sys/tihdr.h>
40 41 #include <sys/xti_inet.h>
41 42 #include <sys/ddi.h>
42 43 #include <sys/sunddi.h>
43 44 #include <sys/cmn_err.h>
44 45 #include <sys/debug.h>
45 46 #include <sys/kobj.h>
46 47 #include <sys/modctl.h>
47 48 #include <sys/atomic.h>
48 49 #include <sys/policy.h>
49 50 #include <sys/priv.h>
50 51
51 52 #include <sys/systm.h>
52 53 #include <sys/param.h>
53 54 #include <sys/kmem.h>
54 55 #include <sys/sdt.h>
55 56 #include <sys/socket.h>
56 57 #include <sys/vtrace.h>
57 58 #include <sys/isa_defs.h>
58 59 #include <sys/mac.h>
59 60 #include <net/if.h>
60 61 #include <net/if_arp.h>
61 62 #include <net/route.h>
62 63 #include <sys/sockio.h>
63 64 #include <netinet/in.h>
64 65 #include <net/if_dl.h>
65 66
66 67 #include <inet/common.h>
67 68 #include <inet/mi.h>
68 69 #include <inet/mib2.h>
69 70 #include <inet/nd.h>
70 71 #include <inet/arp.h>
71 72 #include <inet/snmpcom.h>
72 73 #include <inet/kstatcom.h>
73 74
74 75 #include <netinet/igmp_var.h>
75 76 #include <netinet/ip6.h>
76 77 #include <netinet/icmp6.h>
77 78 #include <netinet/sctp.h>
78 79
79 80 #include <inet/ip.h>
80 81 #include <inet/ip_impl.h>
81 82 #include <inet/ip6.h>
82 83 #include <inet/ip6_asp.h>
83 84 #include <inet/optcom.h>
84 85 #include <inet/tcp.h>
85 86 #include <inet/tcp_impl.h>
86 87 #include <inet/ip_multi.h>
87 88 #include <inet/ip_if.h>
88 89 #include <inet/ip_ire.h>
89 90 #include <inet/ip_ftable.h>
90 91 #include <inet/ip_rts.h>
91 92 #include <inet/ip_ndp.h>
92 93 #include <inet/ip_listutils.h>
93 94 #include <netinet/igmp.h>
94 95 #include <netinet/ip_mroute.h>
95 96 #include <inet/ipp_common.h>
96 97
97 98 #include <net/pfkeyv2.h>
98 99 #include <inet/sadb.h>
99 100 #include <inet/ipsec_impl.h>
100 101 #include <inet/ipdrop.h>
101 102 #include <inet/ip_netinfo.h>
102 103 #include <inet/ilb_ip.h>
103 104 #include <sys/squeue_impl.h>
104 105 #include <sys/squeue.h>
105 106
106 107 #include <sys/ethernet.h>
107 108 #include <net/if_types.h>
108 109 #include <sys/cpuvar.h>
109 110
110 111 #include <ipp/ipp.h>
111 112 #include <ipp/ipp_impl.h>
112 113 #include <ipp/ipgpc/ipgpc.h>
113 114
114 115 #include <sys/pattr.h>
115 116 #include <inet/ipclassifier.h>
116 117 #include <inet/sctp_ip.h>
117 118 #include <inet/sctp/sctp_impl.h>
118 119 #include <inet/udp_impl.h>
119 120 #include <sys/sunddi.h>
120 121
121 122 #include <sys/tsol/label.h>
122 123 #include <sys/tsol/tnet.h>
123 124
124 125 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */
125 126
126 127 #ifdef DEBUG
127 128 extern boolean_t skip_sctp_cksum;
128 129 #endif
129 130
130 131 static void ip_input_local_v4(ire_t *, mblk_t *, ipha_t *,
131 132 ip_recv_attr_t *);
132 133
133 134 static void ip_input_broadcast_v4(ire_t *, mblk_t *, ipha_t *,
134 135 ip_recv_attr_t *);
135 136 static void ip_input_multicast_v4(ire_t *, mblk_t *, ipha_t *,
136 137 ip_recv_attr_t *);
137 138
138 139 #pragma inline(ip_input_common_v4, ip_input_local_v4, ip_forward_xmit_v4)
↓ open down ↓ |
103 lines elided |
↑ open up ↑ |
139 140
140 141 /*
141 142 * Direct read side procedure capable of dealing with chains. GLDv3 based
142 143 * drivers call this function directly with mblk chains while STREAMS
143 144 * read side procedure ip_rput() calls this for single packet with ip_ring
144 145 * set to NULL to process one packet at a time.
145 146 *
146 147 * The ill will always be valid if this function is called directly from
147 148 * the driver.
148 149 *
149 - * If ip_input() is called from GLDv3:
150 + * If this chain is part of a VLAN stream, then the VLAN tag is
151 + * stripped from the MAC header before being delivered to this
152 + * function.
150 153 *
151 - * - This must be a non-VLAN IP stream.
152 - * - 'mp' is either an untagged or a special priority-tagged packet.
153 - * - Any VLAN tag that was in the MAC header has been stripped.
154 - *
155 154 * If the IP header in packet is not 32-bit aligned, every message in the
156 155 * chain will be aligned before further operations. This is required on SPARC
157 156 * platform.
158 157 */
159 158 void
160 159 ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
161 160 struct mac_header_info_s *mhip)
162 161 {
163 162 (void) ip_input_common_v4(ill, ip_ring, mp_chain, mhip, NULL, NULL,
164 163 NULL);
165 164 }
166 165
167 166 /*
168 167 * ip_accept_tcp() - This function is called by the squeue when it retrieves
169 168 * a chain of packets in the poll mode. The packets have gone through the
170 169 * data link processing but not IP processing. For performance and latency
171 170 * reasons, the squeue wants to process the chain in line instead of feeding
172 171 * it back via ip_input path.
173 172 *
174 173 * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v4
175 174 * will pass back any TCP packets matching the target sqp to
176 175 * ip_input_common_v4 using ira_target_sqp_mp. Other packets are handled by
177 176 * ip_input_v4 and ip_fanout_v4 as normal.
178 177 * The TCP packets that match the target squeue are returned to the caller
179 178 * as a b_next chain after each packet has been prepend with an mblk
180 179 * from ip_recv_attr_to_mblk.
181 180 */
182 181 mblk_t *
183 182 ip_accept_tcp(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp,
184 183 mblk_t *mp_chain, mblk_t **last, uint_t *cnt)
185 184 {
186 185 return (ip_input_common_v4(ill, ip_ring, mp_chain, NULL, target_sqp,
187 186 last, cnt));
188 187 }
189 188
190 189 /*
191 190 * Used by ip_input and ip_accept_tcp
192 191 * The last three arguments are only used by ip_accept_tcp, and mhip is
193 192 * only used by ip_input.
194 193 */
195 194 mblk_t *
196 195 ip_input_common_v4(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
197 196 struct mac_header_info_s *mhip, squeue_t *target_sqp,
198 197 mblk_t **last, uint_t *cnt)
199 198 {
200 199 mblk_t *mp;
201 200 ipha_t *ipha;
202 201 ip_recv_attr_t iras; /* Receive attributes */
203 202 rtc_t rtc;
204 203 iaflags_t chain_flags = 0; /* Fixed for chain */
205 204 mblk_t *ahead = NULL; /* Accepted head */
206 205 mblk_t *atail = NULL; /* Accepted tail */
207 206 uint_t acnt = 0; /* Accepted count */
208 207
209 208 ASSERT(mp_chain != NULL);
210 209 ASSERT(ill != NULL);
211 210
212 211 /* These ones do not change as we loop over packets */
213 212 iras.ira_ill = iras.ira_rill = ill;
214 213 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
215 214 iras.ira_rifindex = iras.ira_ruifindex;
216 215 iras.ira_sqp = NULL;
217 216 iras.ira_ring = ip_ring;
218 217 /* For ECMP and outbound transmit ring selection */
219 218 iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring);
220 219
221 220 iras.ira_target_sqp = target_sqp;
222 221 iras.ira_target_sqp_mp = NULL;
223 222 if (target_sqp != NULL)
224 223 chain_flags |= IRAF_TARGET_SQP;
225 224
226 225 /*
227 226 * We try to have a mhip pointer when possible, but
228 227 * it might be NULL in some cases. In those cases we
229 228 * have to assume unicast.
230 229 */
231 230 iras.ira_mhip = mhip;
232 231 iras.ira_flags = 0;
233 232 if (mhip != NULL) {
234 233 switch (mhip->mhi_dsttype) {
235 234 case MAC_ADDRTYPE_MULTICAST :
236 235 chain_flags |= IRAF_L2DST_MULTICAST;
237 236 break;
238 237 case MAC_ADDRTYPE_BROADCAST :
239 238 chain_flags |= IRAF_L2DST_BROADCAST;
240 239 break;
241 240 }
242 241 }
243 242
244 243 /*
245 244 * Initialize the one-element route cache.
246 245 *
247 246 * We do ire caching from one iteration to
248 247 * another. In the event the packet chain contains
249 248 * all packets from the same dst, this caching saves
250 249 * an ire_route_recursive for each of the succeeding
251 250 * packets in a packet chain.
252 251 */
253 252 rtc.rtc_ire = NULL;
254 253 rtc.rtc_ipaddr = INADDR_ANY;
255 254
256 255 /* Loop over b_next */
257 256 for (mp = mp_chain; mp != NULL; mp = mp_chain) {
258 257 mp_chain = mp->b_next;
259 258 mp->b_next = NULL;
260 259
261 260 ASSERT(DB_TYPE(mp) == M_DATA);
262 261
263 262
264 263 /*
265 264 * if db_ref > 1 then copymsg and free original. Packet
266 265 * may be changed and we do not want the other entity
267 266 * who has a reference to this message to trip over the
268 267 * changes. This is a blind change because trying to
269 268 * catch all places that might change the packet is too
270 269 * difficult.
271 270 *
272 271 * This corresponds to the fast path case, where we have
273 272 * a chain of M_DATA mblks. We check the db_ref count
274 273 * of only the 1st data block in the mblk chain. There
275 274 * doesn't seem to be a reason why a device driver would
276 275 * send up data with varying db_ref counts in the mblk
277 276 * chain. In any case the Fast path is a private
278 277 * interface, and our drivers don't do such a thing.
279 278 * Given the above assumption, there is no need to walk
280 279 * down the entire mblk chain (which could have a
281 280 * potential performance problem)
282 281 *
283 282 * The "(DB_REF(mp) > 1)" check was moved from ip_rput()
284 283 * to here because of exclusive ip stacks and vnics.
285 284 * Packets transmitted from exclusive stack over vnic
286 285 * can have db_ref > 1 and when it gets looped back to
287 286 * another vnic in a different zone, you have ip_input()
288 287 * getting dblks with db_ref > 1. So if someone
289 288 * complains of TCP performance under this scenario,
290 289 * take a serious look here on the impact of copymsg().
291 290 */
292 291 if (DB_REF(mp) > 1) {
293 292 if ((mp = ip_fix_dbref(mp, &iras)) == NULL) {
294 293 /* mhip might point into 1st packet in chain */
295 294 iras.ira_mhip = NULL;
296 295 continue;
297 296 }
298 297 }
299 298
300 299 /*
301 300 * IP header ptr not aligned?
302 301 * OR IP header not complete in first mblk
303 302 */
304 303 ipha = (ipha_t *)mp->b_rptr;
305 304 if (!OK_32PTR(ipha) || MBLKL(mp) < IP_SIMPLE_HDR_LENGTH) {
306 305 mp = ip_check_and_align_header(mp, IP_SIMPLE_HDR_LENGTH,
307 306 &iras);
308 307 if (mp == NULL) {
309 308 /* mhip might point into 1st packet in chain */
310 309 iras.ira_mhip = NULL;
311 310 continue;
312 311 }
313 312 ipha = (ipha_t *)mp->b_rptr;
314 313 }
315 314
316 315 /* Protect against a mix of Ethertypes and IP versions */
317 316 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
318 317 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
319 318 ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
320 319 freemsg(mp);
321 320 /* mhip might point into 1st packet in the chain. */
322 321 iras.ira_mhip = NULL;
323 322 continue;
324 323 }
325 324
326 325 /*
327 326 * Check for Martian addrs; we have to explicitly
328 327 * test for for zero dst since this is also used as
329 328 * an indication that the rtc is not used.
330 329 */
331 330 if (ipha->ipha_dst == INADDR_ANY) {
332 331 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
333 332 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
334 333 freemsg(mp);
335 334 /* mhip might point into 1st packet in the chain. */
336 335 iras.ira_mhip = NULL;
337 336 continue;
338 337 }
339 338
340 339 /*
341 340 * Keep L2SRC from a previous packet in chain since mhip
342 341 * might point into an earlier packet in the chain.
343 342 * Keep IRAF_VERIFIED_SRC to avoid redoing broadcast
344 343 * source check in forwarding path.
345 344 */
346 345 chain_flags |= (iras.ira_flags &
347 346 (IRAF_L2SRC_SET|IRAF_VERIFIED_SRC));
348 347
349 348 iras.ira_flags = IRAF_IS_IPV4 | IRAF_VERIFY_IP_CKSUM |
350 349 IRAF_VERIFY_ULP_CKSUM | chain_flags;
351 350 iras.ira_free_flags = 0;
352 351 iras.ira_cred = NULL;
353 352 iras.ira_cpid = NOPID;
354 353 iras.ira_tsl = NULL;
355 354 iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */
356 355
357 356 /*
358 357 * We must count all incoming packets, even if they end
359 358 * up being dropped later on. Defer counting bytes until
360 359 * we have the whole IP header in first mblk.
361 360 */
362 361 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
363 362
364 363 iras.ira_pktlen = ntohs(ipha->ipha_length);
365 364 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets,
366 365 iras.ira_pktlen);
367 366
368 367 /*
369 368 * Call one of:
370 369 * ill_input_full_v4
371 370 * ill_input_short_v4
372 371 * The former is used in unusual cases. See ill_set_inputfn().
373 372 */
374 373 (*ill->ill_inputfn)(mp, ipha, &ipha->ipha_dst, &iras, &rtc);
375 374
376 375 /* Any references to clean up? No hold on ira_ill */
377 376 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED))
378 377 ira_cleanup(&iras, B_FALSE);
379 378
380 379 if (iras.ira_target_sqp_mp != NULL) {
381 380 /* Better be called from ip_accept_tcp */
382 381 ASSERT(target_sqp != NULL);
383 382
384 383 /* Found one packet to accept */
385 384 mp = iras.ira_target_sqp_mp;
386 385 iras.ira_target_sqp_mp = NULL;
387 386 ASSERT(ip_recv_attr_is_mblk(mp));
388 387
389 388 if (atail != NULL)
390 389 atail->b_next = mp;
391 390 else
392 391 ahead = mp;
393 392 atail = mp;
394 393 acnt++;
395 394 mp = NULL;
396 395 }
397 396 /* mhip might point into 1st packet in the chain. */
398 397 iras.ira_mhip = NULL;
399 398 }
400 399 /* Any remaining references to the route cache? */
401 400 if (rtc.rtc_ire != NULL) {
402 401 ASSERT(rtc.rtc_ipaddr != INADDR_ANY);
403 402 ire_refrele(rtc.rtc_ire);
404 403 }
405 404
406 405 if (ahead != NULL) {
407 406 /* Better be called from ip_accept_tcp */
408 407 ASSERT(target_sqp != NULL);
409 408 *last = atail;
410 409 *cnt = acnt;
411 410 return (ahead);
412 411 }
413 412
414 413 return (NULL);
415 414 }
416 415
417 416 /*
418 417 * This input function is used when
419 418 * - is_system_labeled()
420 419 * - CGTP filtering
421 420 * - DHCP unicast before we have an IP address configured
422 421 * - there is an listener for IPPROTO_RSVP
423 422 */
424 423 void
425 424 ill_input_full_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg,
426 425 ip_recv_attr_t *ira, rtc_t *rtc)
427 426 {
428 427 ipha_t *ipha = (ipha_t *)iph_arg;
429 428 ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg;
430 429 ill_t *ill = ira->ira_ill;
431 430 ip_stack_t *ipst = ill->ill_ipst;
432 431 int cgtp_flt_pkt;
433 432
434 433 ASSERT(ira->ira_tsl == NULL);
435 434
436 435 /*
437 436 * Attach any necessary label information to
438 437 * this packet
439 438 */
440 439 if (is_system_labeled()) {
441 440 ira->ira_flags |= IRAF_SYSTEM_LABELED;
442 441
443 442 /*
444 443 * This updates ira_cred, ira_tsl and ira_free_flags based
445 444 * on the label.
446 445 */
447 446 if (!tsol_get_pkt_label(mp, IPV4_VERSION, ira)) {
448 447 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
449 448 ip_drop_input("ipIfStatsInDiscards", mp, ill);
450 449 freemsg(mp);
451 450 return;
452 451 }
453 452 /* Note that ira_tsl can be NULL here. */
454 453
455 454 /* tsol_get_pkt_label sometimes does pullupmsg */
456 455 ipha = (ipha_t *)mp->b_rptr;
457 456 }
458 457
459 458 /*
460 459 * Invoke the CGTP (multirouting) filtering module to process
461 460 * the incoming packet. Packets identified as duplicates
462 461 * must be discarded. Filtering is active only if the
463 462 * the ip_cgtp_filter ndd variable is non-zero.
464 463 */
465 464 cgtp_flt_pkt = CGTP_IP_PKT_NOT_CGTP;
466 465 if (ipst->ips_ip_cgtp_filter &&
467 466 ipst->ips_ip_cgtp_filter_ops != NULL) {
468 467 netstackid_t stackid;
469 468
470 469 stackid = ipst->ips_netstack->netstack_stackid;
471 470 /*
472 471 * CGTP and IPMP are mutually exclusive so
473 472 * phyint_ifindex is fine here.
474 473 */
475 474 cgtp_flt_pkt =
476 475 ipst->ips_ip_cgtp_filter_ops->cfo_filter(stackid,
477 476 ill->ill_phyint->phyint_ifindex, mp);
478 477 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) {
479 478 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill);
480 479 freemsg(mp);
481 480 return;
482 481 }
483 482 }
484 483
485 484 /*
486 485 * Brutal hack for DHCPv4 unicast: RFC2131 allows a DHCP
487 486 * server to unicast DHCP packets to a DHCP client using the
488 487 * IP address it is offering to the client. This can be
489 488 * disabled through the "broadcast bit", but not all DHCP
490 489 * servers honor that bit. Therefore, to interoperate with as
491 490 * many DHCP servers as possible, the DHCP client allows the
492 491 * server to unicast, but we treat those packets as broadcast
493 492 * here. Note that we don't rewrite the packet itself since
494 493 * (a) that would mess up the checksums and (b) the DHCP
495 494 * client conn is bound to INADDR_ANY so ip_fanout_udp() will
496 495 * hand it the packet regardless.
497 496 */
498 497 if (ill->ill_dhcpinit != 0 &&
499 498 ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION &&
500 499 ipha->ipha_protocol == IPPROTO_UDP) {
501 500 udpha_t *udpha;
502 501
503 502 ipha = ip_pullup(mp, sizeof (ipha_t) + sizeof (udpha_t), ira);
504 503 if (ipha == NULL) {
505 504 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
506 505 ip_drop_input("ipIfStatsInDiscards - dhcp", mp, ill);
507 506 freemsg(mp);
508 507 return;
509 508 }
510 509 /* Reload since pullupmsg() can change b_rptr. */
511 510 udpha = (udpha_t *)&ipha[1];
512 511
513 512 if (ntohs(udpha->uha_dst_port) == IPPORT_BOOTPC) {
514 513 DTRACE_PROBE2(ip4__dhcpinit__pkt, ill_t *, ill,
515 514 mblk_t *, mp);
516 515 /*
517 516 * This assumes that we deliver to all conns for
518 517 * multicast and broadcast packets.
519 518 */
520 519 nexthop = INADDR_BROADCAST;
521 520 ira->ira_flags |= IRAF_DHCP_UNICAST;
522 521 }
523 522 }
524 523
525 524 /*
526 525 * If rsvpd is running, let RSVP daemon handle its processing
527 526 * and forwarding of RSVP multicast/unicast packets.
528 527 * If rsvpd is not running but mrouted is running, RSVP
529 528 * multicast packets are forwarded as multicast traffic
530 529 * and RSVP unicast packets are forwarded by unicast router.
531 530 * If neither rsvpd nor mrouted is running, RSVP multicast
532 531 * packets are not forwarded, but the unicast packets are
533 532 * forwarded like unicast traffic.
534 533 */
535 534 if (ipha->ipha_protocol == IPPROTO_RSVP &&
536 535 ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) {
537 536 /* RSVP packet and rsvpd running. Treat as ours */
538 537 ip2dbg(("ip_input: RSVP for us: 0x%x\n", ntohl(nexthop)));
539 538 /*
540 539 * We use a multicast address to get the packet to
541 540 * ire_recv_multicast_v4. There will not be a membership
542 541 * check since we set IRAF_RSVP
543 542 */
544 543 nexthop = htonl(INADDR_UNSPEC_GROUP);
545 544 ira->ira_flags |= IRAF_RSVP;
546 545 }
547 546
548 547 ill_input_short_v4(mp, ipha, &nexthop, ira, rtc);
549 548 }
550 549
551 550 /*
552 551 * This is the tail-end of the full receive side packet handling.
553 552 * It can be used directly when the configuration is simple.
554 553 */
555 554 void
556 555 ill_input_short_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg,
557 556 ip_recv_attr_t *ira, rtc_t *rtc)
558 557 {
559 558 ire_t *ire;
560 559 uint_t opt_len;
561 560 ill_t *ill = ira->ira_ill;
562 561 ip_stack_t *ipst = ill->ill_ipst;
563 562 uint_t pkt_len;
564 563 ssize_t len;
565 564 ipha_t *ipha = (ipha_t *)iph_arg;
566 565 ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg;
567 566 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb;
568 567 uint_t irr_flags;
569 568 #define rptr ((uchar_t *)ipha)
570 569
571 570 ASSERT(DB_TYPE(mp) == M_DATA);
572 571
573 572 /*
574 573 * The following test for loopback is faster than
575 574 * IP_LOOPBACK_ADDR(), because it avoids any bitwise
576 575 * operations.
577 576 * Note that these addresses are always in network byte order
578 577 */
579 578 if (((*(uchar_t *)&ipha->ipha_dst) == IN_LOOPBACKNET) ||
580 579 ((*(uchar_t *)&ipha->ipha_src) == IN_LOOPBACKNET)) {
581 580 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
582 581 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
583 582 freemsg(mp);
584 583 return;
585 584 }
586 585
587 586 len = mp->b_wptr - rptr;
588 587 pkt_len = ira->ira_pktlen;
589 588
590 589 /* multiple mblk or too short */
591 590 len -= pkt_len;
592 591 if (len != 0) {
593 592 mp = ip_check_length(mp, rptr, len, pkt_len,
594 593 IP_SIMPLE_HDR_LENGTH, ira);
595 594 if (mp == NULL)
596 595 return;
597 596 ipha = (ipha_t *)mp->b_rptr;
598 597 }
599 598
600 599 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
601 600 ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL,
602 601 int, 0);
603 602
604 603 /*
605 604 * The event for packets being received from a 'physical'
606 605 * interface is placed after validation of the source and/or
607 606 * destination address as being local so that packets can be
608 607 * redirected to loopback addresses using ipnat.
609 608 */
610 609 DTRACE_PROBE4(ip4__physical__in__start,
611 610 ill_t *, ill, ill_t *, NULL,
612 611 ipha_t *, ipha, mblk_t *, mp);
613 612
614 613 if (HOOKS4_INTERESTED_PHYSICAL_IN(ipst)) {
615 614 int ll_multicast = 0;
616 615 int error;
617 616 ipaddr_t orig_dst = ipha->ipha_dst;
618 617
619 618 if (ira->ira_flags & IRAF_L2DST_MULTICAST)
620 619 ll_multicast = HPE_MULTICAST;
621 620 else if (ira->ira_flags & IRAF_L2DST_BROADCAST)
622 621 ll_multicast = HPE_BROADCAST;
623 622
624 623 FW_HOOKS(ipst->ips_ip4_physical_in_event,
625 624 ipst->ips_ipv4firewall_physical_in,
626 625 ill, NULL, ipha, mp, mp, ll_multicast, ipst, error);
627 626
628 627 DTRACE_PROBE1(ip4__physical__in__end, mblk_t *, mp);
629 628
630 629 if (mp == NULL)
631 630 return;
632 631 /* The length could have changed */
633 632 ipha = (ipha_t *)mp->b_rptr;
634 633 ira->ira_pktlen = ntohs(ipha->ipha_length);
635 634 pkt_len = ira->ira_pktlen;
636 635
637 636 /*
638 637 * In case the destination changed we override any previous
639 638 * change to nexthop.
640 639 */
641 640 if (orig_dst != ipha->ipha_dst)
642 641 nexthop = ipha->ipha_dst;
643 642 if (nexthop == INADDR_ANY) {
644 643 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
645 644 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
646 645 freemsg(mp);
647 646 return;
648 647 }
649 648 }
650 649
651 650 if (ipst->ips_ip4_observe.he_interested) {
652 651 zoneid_t dzone;
653 652
654 653 /*
655 654 * On the inbound path the src zone will be unknown as
656 655 * this packet has come from the wire.
657 656 */
658 657 dzone = ip_get_zoneid_v4(nexthop, mp, ira, ALL_ZONES);
659 658 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst);
660 659 }
661 660
662 661 /*
663 662 * If there is a good HW IP header checksum we clear the need
664 663 * look at the IP header checksum.
665 664 */
666 665 if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) &&
667 666 ILL_HCKSUM_CAPABLE(ill) && dohwcksum) {
668 667 /* Header checksum was ok. Clear the flag */
669 668 DB_CKSUMFLAGS(mp) &= ~HCK_IPV4_HDRCKSUM;
670 669 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM;
671 670 }
672 671
673 672 /*
674 673 * Here we check to see if we machine is setup as
675 674 * L3 loadbalancer and if the incoming packet is for a VIP
676 675 *
677 676 * Check the following:
678 677 * - there is at least a rule
679 678 * - protocol of the packet is supported
680 679 */
681 680 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ipha->ipha_protocol)) {
682 681 ipaddr_t lb_dst;
683 682 int lb_ret;
684 683
685 684 /* For convenience, we pull up the mblk. */
686 685 if (mp->b_cont != NULL) {
687 686 if (pullupmsg(mp, -1) == 0) {
688 687 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
689 688 ip_drop_input("ipIfStatsInDiscards - pullupmsg",
690 689 mp, ill);
691 690 freemsg(mp);
692 691 return;
693 692 }
694 693 ipha = (ipha_t *)mp->b_rptr;
695 694 }
696 695
697 696 /*
698 697 * We just drop all fragments going to any VIP, at
699 698 * least for now....
700 699 */
701 700 if (ntohs(ipha->ipha_fragment_offset_and_flags) &
702 701 (IPH_MF | IPH_OFFSET)) {
703 702 if (!ilb_rule_match_vip_v4(ilbs, nexthop, NULL)) {
704 703 goto after_ilb;
705 704 }
706 705
707 706 ILB_KSTAT_UPDATE(ilbs, ip_frag_in, 1);
708 707 ILB_KSTAT_UPDATE(ilbs, ip_frag_dropped, 1);
709 708 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
710 709 ip_drop_input("ILB fragment", mp, ill);
711 710 freemsg(mp);
712 711 return;
713 712 }
714 713 lb_ret = ilb_check_v4(ilbs, ill, mp, ipha, ipha->ipha_protocol,
715 714 (uint8_t *)ipha + IPH_HDR_LENGTH(ipha), &lb_dst);
716 715
717 716 if (lb_ret == ILB_DROPPED) {
718 717 /* Is this the right counter to increase? */
719 718 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
720 719 ip_drop_input("ILB_DROPPED", mp, ill);
721 720 freemsg(mp);
722 721 return;
723 722 }
724 723 if (lb_ret == ILB_BALANCED) {
725 724 /* Set the dst to that of the chosen server */
726 725 nexthop = lb_dst;
727 726 DB_CKSUMFLAGS(mp) = 0;
728 727 }
729 728 }
730 729
731 730 after_ilb:
732 731 opt_len = ipha->ipha_version_and_hdr_length - IP_SIMPLE_HDR_VERSION;
733 732 ira->ira_ip_hdr_length = IP_SIMPLE_HDR_LENGTH;
734 733 if (opt_len != 0) {
735 734 int error = 0;
736 735
737 736 ira->ira_ip_hdr_length += (opt_len << 2);
738 737 ira->ira_flags |= IRAF_IPV4_OPTIONS;
739 738
740 739 /* IP Options present! Validate the length. */
741 740 mp = ip_check_optlen(mp, ipha, opt_len, pkt_len, ira);
742 741 if (mp == NULL)
743 742 return;
744 743
745 744 /* Might have changed */
746 745 ipha = (ipha_t *)mp->b_rptr;
747 746
748 747 /* Verify IP header checksum before parsing the options */
749 748 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) &&
750 749 ip_csum_hdr(ipha)) {
751 750 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs);
752 751 ip_drop_input("ipIfStatsInCksumErrs", mp, ill);
753 752 freemsg(mp);
754 753 return;
755 754 }
756 755 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM;
757 756
758 757 /*
759 758 * Go off to ip_input_options which returns the next hop
760 759 * destination address, which may have been affected
761 760 * by source routing.
762 761 */
763 762 IP_STAT(ipst, ip_opt);
764 763
765 764 nexthop = ip_input_options(ipha, nexthop, mp, ira, &error);
766 765 if (error != 0) {
767 766 /*
768 767 * An ICMP error has been sent and the packet has
769 768 * been dropped.
770 769 */
771 770 return;
772 771 }
773 772 }
774 773
775 774 if (ill->ill_flags & ILLF_ROUTER)
776 775 irr_flags = IRR_ALLOCATE;
777 776 else
778 777 irr_flags = IRR_NONE;
779 778
780 779 /* Can not use route cache with TX since the labels can differ */
781 780 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
782 781 if (CLASSD(nexthop)) {
783 782 ire = ire_multicast(ill);
784 783 } else {
785 784 /* Match destination and label */
786 785 ire = ire_route_recursive_v4(nexthop, 0, NULL,
787 786 ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR,
788 787 irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL,
789 788 NULL);
790 789 }
791 790 /* Update the route cache so we do the ire_refrele */
792 791 ASSERT(ire != NULL);
793 792 if (rtc->rtc_ire != NULL)
794 793 ire_refrele(rtc->rtc_ire);
795 794 rtc->rtc_ire = ire;
796 795 rtc->rtc_ipaddr = nexthop;
797 796 } else if (nexthop == rtc->rtc_ipaddr && rtc->rtc_ire != NULL) {
798 797 /* Use the route cache */
799 798 ire = rtc->rtc_ire;
800 799 } else {
801 800 /* Update the route cache */
802 801 if (CLASSD(nexthop)) {
803 802 ire = ire_multicast(ill);
804 803 } else {
805 804 /* Just match the destination */
806 805 ire = ire_route_recursive_dstonly_v4(nexthop, irr_flags,
807 806 ira->ira_xmit_hint, ipst);
808 807 }
809 808 ASSERT(ire != NULL);
810 809 if (rtc->rtc_ire != NULL)
811 810 ire_refrele(rtc->rtc_ire);
812 811 rtc->rtc_ire = ire;
813 812 rtc->rtc_ipaddr = nexthop;
814 813 }
815 814
816 815 ire->ire_ib_pkt_count++;
817 816
818 817 /*
819 818 * Based on ire_type and ire_flags call one of:
820 819 * ire_recv_local_v4 - for IRE_LOCAL
821 820 * ire_recv_loopback_v4 - for IRE_LOOPBACK
822 821 * ire_recv_multirt_v4 - if RTF_MULTIRT
823 822 * ire_recv_noroute_v4 - if RTF_REJECT or RTF_BLACHOLE
824 823 * ire_recv_multicast_v4 - for IRE_MULTICAST
825 824 * ire_recv_broadcast_v4 - for IRE_BROADCAST
826 825 * ire_recv_noaccept_v4 - for ire_noaccept ones
827 826 * ire_recv_forward_v4 - for the rest.
828 827 */
829 828 (*ire->ire_recvfn)(ire, mp, ipha, ira);
830 829 }
831 830 #undef rptr
832 831
833 832 /*
834 833 * ire_recvfn for IREs that need forwarding
835 834 */
836 835 void
837 836 ire_recv_forward_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
838 837 {
839 838 ipha_t *ipha = (ipha_t *)iph_arg;
840 839 ill_t *ill = ira->ira_ill;
841 840 ip_stack_t *ipst = ill->ill_ipst;
842 841 ill_t *dst_ill;
843 842 nce_t *nce;
844 843 ipaddr_t src = ipha->ipha_src;
845 844 uint32_t added_tx_len;
846 845 uint32_t mtu, iremtu;
847 846
848 847 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
849 848 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
850 849 ip_drop_input("l2 multicast not forwarded", mp, ill);
851 850 freemsg(mp);
852 851 return;
853 852 }
854 853
855 854 if (!(ill->ill_flags & ILLF_ROUTER) && !ip_source_routed(ipha, ipst)) {
856 855 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
857 856 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
858 857 freemsg(mp);
859 858 return;
860 859 }
861 860
862 861 /*
863 862 * Either ire_nce_capable or ire_dep_parent would be set for the IRE
864 863 * when it is found by ire_route_recursive, but that some other thread
865 864 * could have changed the routes with the effect of clearing
866 865 * ire_dep_parent. In that case we'd end up dropping the packet, or
867 866 * finding a new nce below.
868 867 * Get, allocate, or update the nce.
869 868 * We get a refhold on ire_nce_cache as a result of this to avoid races
870 869 * where ire_nce_cache is deleted.
871 870 *
872 871 * This ensures that we don't forward if the interface is down since
873 872 * ipif_down removes all the nces.
874 873 */
875 874 mutex_enter(&ire->ire_lock);
876 875 nce = ire->ire_nce_cache;
877 876 if (nce == NULL) {
878 877 /* Not yet set up - try to set one up */
879 878 mutex_exit(&ire->ire_lock);
880 879 (void) ire_revalidate_nce(ire);
881 880 mutex_enter(&ire->ire_lock);
882 881 nce = ire->ire_nce_cache;
883 882 if (nce == NULL) {
884 883 mutex_exit(&ire->ire_lock);
885 884 /* The ire_dep_parent chain went bad, or no memory */
886 885 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
887 886 ip_drop_input("No ire_dep_parent", mp, ill);
888 887 freemsg(mp);
889 888 return;
890 889 }
891 890 }
892 891 nce_refhold(nce);
893 892 mutex_exit(&ire->ire_lock);
894 893
895 894 if (nce->nce_is_condemned) {
896 895 nce_t *nce1;
897 896
898 897 nce1 = ire_handle_condemned_nce(nce, ire, ipha, NULL, B_FALSE);
899 898 nce_refrele(nce);
900 899 if (nce1 == NULL) {
901 900 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
902 901 ip_drop_input("No nce", mp, ill);
903 902 freemsg(mp);
904 903 return;
905 904 }
906 905 nce = nce1;
907 906 }
908 907 dst_ill = nce->nce_ill;
909 908
910 909 /*
911 910 * Unless we are forwarding, drop the packet.
912 911 * We have to let source routed packets through if they go out
913 912 * the same interface i.e., they are 'ping -l' packets.
914 913 */
915 914 if (!(dst_ill->ill_flags & ILLF_ROUTER) &&
916 915 !(ip_source_routed(ipha, ipst) && dst_ill == ill)) {
917 916 if (ip_source_routed(ipha, ipst)) {
918 917 ip_drop_input("ICMP_SOURCE_ROUTE_FAILED", mp, ill);
919 918 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira);
920 919 nce_refrele(nce);
921 920 return;
922 921 }
923 922 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
924 923 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
925 924 freemsg(mp);
926 925 nce_refrele(nce);
927 926 return;
928 927 }
929 928
930 929 if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) {
931 930 ipaddr_t dst = ipha->ipha_dst;
932 931
933 932 ire->ire_ib_pkt_count--;
934 933 /*
935 934 * Should only use IREs that are visible from the
936 935 * global zone for forwarding.
937 936 * Take a source route into account the same way as ip_input
938 937 * did.
939 938 */
940 939 if (ira->ira_flags & IRAF_IPV4_OPTIONS) {
941 940 int error = 0;
942 941
943 942 dst = ip_input_options(ipha, dst, mp, ira, &error);
944 943 ASSERT(error == 0); /* ip_input checked */
945 944 }
946 945 ire = ire_route_recursive_v4(dst, 0, NULL, GLOBAL_ZONEID,
947 946 ira->ira_tsl, MATCH_IRE_SECATTR,
948 947 (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE,
949 948 ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
950 949 ire->ire_ib_pkt_count++;
951 950 (*ire->ire_recvfn)(ire, mp, ipha, ira);
952 951 ire_refrele(ire);
953 952 nce_refrele(nce);
954 953 return;
955 954 }
956 955
957 956 /*
958 957 * ipIfStatsHCInForwDatagrams should only be increment if there
959 958 * will be an attempt to forward the packet, which is why we
960 959 * increment after the above condition has been checked.
961 960 */
962 961 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
963 962
964 963 /* Initiate Read side IPPF processing */
965 964 if (IPP_ENABLED(IPP_FWD_IN, ipst)) {
966 965 /* ip_process translates an IS_UNDER_IPMP */
967 966 mp = ip_process(IPP_FWD_IN, mp, ill, ill);
968 967 if (mp == NULL) {
969 968 /* ip_drop_packet and MIB done */
970 969 ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred "
971 970 "during IPPF processing\n"));
972 971 nce_refrele(nce);
973 972 return;
974 973 }
975 974 }
976 975
977 976 DTRACE_PROBE4(ip4__forwarding__start,
978 977 ill_t *, ill, ill_t *, dst_ill, ipha_t *, ipha, mblk_t *, mp);
979 978
980 979 if (HOOKS4_INTERESTED_FORWARDING(ipst)) {
981 980 int error;
982 981
983 982 FW_HOOKS(ipst->ips_ip4_forwarding_event,
984 983 ipst->ips_ipv4firewall_forwarding,
985 984 ill, dst_ill, ipha, mp, mp, 0, ipst, error);
986 985
987 986 DTRACE_PROBE1(ip4__forwarding__end, mblk_t *, mp);
988 987
989 988 if (mp == NULL) {
990 989 nce_refrele(nce);
991 990 return;
992 991 }
993 992 /*
994 993 * Even if the destination was changed by the filter we use the
995 994 * forwarding decision that was made based on the address
996 995 * in ip_input.
997 996 */
998 997
999 998 /* Might have changed */
1000 999 ipha = (ipha_t *)mp->b_rptr;
1001 1000 ira->ira_pktlen = ntohs(ipha->ipha_length);
1002 1001 }
1003 1002
1004 1003 /* Packet is being forwarded. Turning off hwcksum flag. */
1005 1004 DB_CKSUMFLAGS(mp) = 0;
1006 1005
1007 1006 /*
1008 1007 * Martian Address Filtering [RFC 1812, Section 5.3.7]
1009 1008 * The loopback address check for both src and dst has already
1010 1009 * been checked in ip_input
1011 1010 * In the future one can envision adding RPF checks using number 3.
1012 1011 * If we already checked the same source address we can skip this.
1013 1012 */
1014 1013 if (!(ira->ira_flags & IRAF_VERIFIED_SRC) ||
1015 1014 src != ira->ira_verified_src) {
1016 1015 switch (ipst->ips_src_check) {
1017 1016 case 0:
1018 1017 break;
1019 1018 case 2:
1020 1019 if (ip_type_v4(src, ipst) == IRE_BROADCAST) {
1021 1020 BUMP_MIB(ill->ill_ip_mib,
1022 1021 ipIfStatsForwProhibits);
1023 1022 BUMP_MIB(ill->ill_ip_mib,
1024 1023 ipIfStatsInAddrErrors);
1025 1024 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1026 1025 freemsg(mp);
1027 1026 nce_refrele(nce);
1028 1027 return;
1029 1028 }
1030 1029 /* FALLTHRU */
1031 1030
1032 1031 case 1:
1033 1032 if (CLASSD(src)) {
1034 1033 BUMP_MIB(ill->ill_ip_mib,
1035 1034 ipIfStatsForwProhibits);
1036 1035 BUMP_MIB(ill->ill_ip_mib,
1037 1036 ipIfStatsInAddrErrors);
1038 1037 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1039 1038 freemsg(mp);
1040 1039 nce_refrele(nce);
1041 1040 return;
1042 1041 }
1043 1042 break;
1044 1043 }
1045 1044 /* Remember for next packet */
1046 1045 ira->ira_flags |= IRAF_VERIFIED_SRC;
1047 1046 ira->ira_verified_src = src;
1048 1047 }
1049 1048
1050 1049 /*
1051 1050 * Check if packet is going out the same link on which it arrived.
1052 1051 * Means we might need to send a redirect.
1053 1052 */
1054 1053 if (IS_ON_SAME_LAN(dst_ill, ill) && ipst->ips_ip_g_send_redirects) {
1055 1054 ip_send_potential_redirect_v4(mp, ipha, ire, ira);
1056 1055 }
1057 1056
1058 1057 added_tx_len = 0;
1059 1058 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
1060 1059 mblk_t *mp1;
1061 1060 uint32_t old_pkt_len = ira->ira_pktlen;
1062 1061
1063 1062 /* Verify IP header checksum before adding/removing options */
1064 1063 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) &&
1065 1064 ip_csum_hdr(ipha)) {
1066 1065 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs);
1067 1066 ip_drop_input("ipIfStatsInCksumErrs", mp, ill);
1068 1067 freemsg(mp);
1069 1068 nce_refrele(nce);
1070 1069 return;
1071 1070 }
1072 1071 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM;
1073 1072
1074 1073 /*
1075 1074 * Check if it can be forwarded and add/remove
1076 1075 * CIPSO options as needed.
1077 1076 */
1078 1077 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) {
1079 1078 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1080 1079 ip_drop_input("tsol_ip_forward", mp, ill);
1081 1080 freemsg(mp);
1082 1081 nce_refrele(nce);
1083 1082 return;
1084 1083 }
1085 1084 /*
1086 1085 * Size may have changed. Remember amount added in case
1087 1086 * IP needs to send an ICMP too big.
1088 1087 */
1089 1088 mp = mp1;
1090 1089 ipha = (ipha_t *)mp->b_rptr;
1091 1090 ira->ira_pktlen = ntohs(ipha->ipha_length);
1092 1091 ira->ira_ip_hdr_length = IPH_HDR_LENGTH(ipha);
1093 1092 if (ira->ira_pktlen > old_pkt_len)
1094 1093 added_tx_len = ira->ira_pktlen - old_pkt_len;
1095 1094
1096 1095 /* Options can have been added or removed */
1097 1096 if (ira->ira_ip_hdr_length != IP_SIMPLE_HDR_LENGTH)
1098 1097 ira->ira_flags |= IRAF_IPV4_OPTIONS;
1099 1098 else
1100 1099 ira->ira_flags &= ~IRAF_IPV4_OPTIONS;
1101 1100 }
1102 1101
1103 1102 mtu = dst_ill->ill_mtu;
1104 1103 if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu)
1105 1104 mtu = iremtu;
1106 1105 ip_forward_xmit_v4(nce, ill, mp, ipha, ira, mtu, added_tx_len);
1107 1106 nce_refrele(nce);
1108 1107 }
1109 1108
1110 1109 /*
1111 1110 * Used for sending out unicast and multicast packets that are
1112 1111 * forwarded.
1113 1112 */
1114 1113 void
1115 1114 ip_forward_xmit_v4(nce_t *nce, ill_t *ill, mblk_t *mp, ipha_t *ipha,
1116 1115 ip_recv_attr_t *ira, uint32_t mtu, uint32_t added_tx_len)
1117 1116 {
1118 1117 ill_t *dst_ill = nce->nce_ill;
1119 1118 uint32_t pkt_len;
1120 1119 uint32_t sum;
1121 1120 iaflags_t iraflags = ira->ira_flags;
1122 1121 ip_stack_t *ipst = ill->ill_ipst;
1123 1122 iaflags_t ixaflags;
1124 1123
1125 1124 if (ipha->ipha_ttl <= 1) {
1126 1125 /* Perhaps the checksum was bad */
1127 1126 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) {
1128 1127 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs);
1129 1128 ip_drop_input("ipIfStatsInCksumErrs", mp, ill);
1130 1129 freemsg(mp);
1131 1130 return;
1132 1131 }
1133 1132 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1134 1133 ip_drop_input("ICMP_TTL_EXCEEDED", mp, ill);
1135 1134 icmp_time_exceeded(mp, ICMP_TTL_EXCEEDED, ira);
1136 1135 return;
1137 1136 }
1138 1137 ipha->ipha_ttl--;
1139 1138 /* Adjust the checksum to reflect the ttl decrement. */
1140 1139 sum = (int)ipha->ipha_hdr_checksum + IP_HDR_CSUM_TTL_ADJUST;
1141 1140 ipha->ipha_hdr_checksum = (uint16_t)(sum + (sum >> 16));
1142 1141
1143 1142 /* Check if there are options to update */
1144 1143 if (iraflags & IRAF_IPV4_OPTIONS) {
1145 1144 ASSERT(ipha->ipha_version_and_hdr_length !=
1146 1145 IP_SIMPLE_HDR_VERSION);
1147 1146 ASSERT(!(iraflags & IRAF_VERIFY_IP_CKSUM));
1148 1147
1149 1148 if (!ip_forward_options(mp, ipha, dst_ill, ira)) {
1150 1149 /* ipIfStatsForwProhibits and ip_drop_input done */
1151 1150 return;
1152 1151 }
1153 1152
1154 1153 ipha->ipha_hdr_checksum = 0;
1155 1154 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1156 1155 }
1157 1156
1158 1157 /* Initiate Write side IPPF processing before any fragmentation */
1159 1158 if (IPP_ENABLED(IPP_FWD_OUT, ipst)) {
1160 1159 /* ip_process translates an IS_UNDER_IPMP */
1161 1160 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill);
1162 1161 if (mp == NULL) {
1163 1162 /* ip_drop_packet and MIB done */
1164 1163 ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred" \
1165 1164 " during IPPF processing\n"));
1166 1165 return;
1167 1166 }
1168 1167 }
1169 1168
1170 1169 pkt_len = ira->ira_pktlen;
1171 1170
1172 1171 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams);
1173 1172
1174 1173 ixaflags = IXAF_IS_IPV4 | IXAF_NO_DEV_FLOW_CTL;
1175 1174
1176 1175 if (pkt_len > mtu) {
1177 1176 /*
1178 1177 * It needs fragging on its way out. If we haven't
1179 1178 * verified the header checksum yet we do it now since
1180 1179 * are going to put a surely good checksum in the
1181 1180 * outgoing header, we have to make sure that it
1182 1181 * was good coming in.
1183 1182 */
1184 1183 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) {
1185 1184 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs);
1186 1185 ip_drop_input("ipIfStatsInCksumErrs", mp, ill);
1187 1186 freemsg(mp);
1188 1187 return;
1189 1188 }
1190 1189 if (ipha->ipha_fragment_offset_and_flags & IPH_DF_HTONS) {
1191 1190 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails);
1192 1191 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill);
1193 1192 if (iraflags & IRAF_SYSTEM_LABELED) {
1194 1193 /*
1195 1194 * Remove any CIPSO option added by
1196 1195 * tsol_ip_forward, and make sure we report
1197 1196 * a path MTU so that there
1198 1197 * is room to add such a CIPSO option for future
1199 1198 * packets.
1200 1199 */
1201 1200 mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len,
1202 1201 AF_INET);
1203 1202 }
1204 1203
1205 1204 icmp_frag_needed(mp, mtu, ira);
1206 1205 return;
1207 1206 }
1208 1207
1209 1208 (void) ip_fragment_v4(mp, nce, ixaflags, pkt_len, mtu,
1210 1209 ira->ira_xmit_hint, GLOBAL_ZONEID, 0, ip_xmit, NULL);
1211 1210 return;
1212 1211 }
1213 1212
1214 1213 ASSERT(pkt_len == ntohs(((ipha_t *)mp->b_rptr)->ipha_length));
1215 1214 if (iraflags & IRAF_LOOPBACK_COPY) {
1216 1215 /*
1217 1216 * IXAF_NO_LOOP_ZONEID is not set hence 7th arg
1218 1217 * is don't care
1219 1218 */
1220 1219 (void) ip_postfrag_loopcheck(mp, nce,
1221 1220 ixaflags | IXAF_LOOPBACK_COPY,
1222 1221 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1223 1222 } else {
1224 1223 (void) ip_xmit(mp, nce, ixaflags, pkt_len, ira->ira_xmit_hint,
1225 1224 GLOBAL_ZONEID, 0, NULL);
1226 1225 }
1227 1226 }
1228 1227
1229 1228 /*
1230 1229 * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE,
1231 1230 * which is what ire_route_recursive returns when there is no matching ire.
1232 1231 * Send ICMP unreachable unless blackhole.
1233 1232 */
1234 1233 void
1235 1234 ire_recv_noroute_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1236 1235 {
1237 1236 ipha_t *ipha = (ipha_t *)iph_arg;
1238 1237 ill_t *ill = ira->ira_ill;
1239 1238 ip_stack_t *ipst = ill->ill_ipst;
1240 1239
1241 1240 /* Would we have forwarded this packet if we had a route? */
1242 1241 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
1243 1242 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1244 1243 ip_drop_input("l2 multicast not forwarded", mp, ill);
1245 1244 freemsg(mp);
1246 1245 return;
1247 1246 }
1248 1247
1249 1248 if (!(ill->ill_flags & ILLF_ROUTER)) {
1250 1249 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1251 1250 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
1252 1251 freemsg(mp);
1253 1252 return;
1254 1253 }
1255 1254 /*
1256 1255 * If we had a route this could have been forwarded. Count as such.
1257 1256 *
1258 1257 * ipIfStatsHCInForwDatagrams should only be increment if there
1259 1258 * will be an attempt to forward the packet, which is why we
1260 1259 * increment after the above condition has been checked.
1261 1260 */
1262 1261 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
1263 1262
1264 1263 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1265 1264
1266 1265 ip_rts_change(RTM_MISS, ipha->ipha_dst, 0, 0, 0, 0, 0, 0, RTA_DST,
1267 1266 ipst);
1268 1267
1269 1268 if (ire->ire_flags & RTF_BLACKHOLE) {
1270 1269 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill);
1271 1270 freemsg(mp);
1272 1271 } else {
1273 1272 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill);
1274 1273
1275 1274 if (ip_source_routed(ipha, ipst)) {
1276 1275 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira);
1277 1276 } else {
1278 1277 icmp_unreachable(mp, ICMP_HOST_UNREACHABLE, ira);
1279 1278 }
1280 1279 }
1281 1280 }
1282 1281
1283 1282 /*
1284 1283 * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for
1285 1284 * VRRP when in noaccept mode.
1286 1285 * We silently drop the packet. ARP handles packets even if noaccept is set.
1287 1286 */
1288 1287 /* ARGSUSED */
1289 1288 void
1290 1289 ire_recv_noaccept_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1291 1290 ip_recv_attr_t *ira)
1292 1291 {
1293 1292 ill_t *ill = ira->ira_ill;
1294 1293
1295 1294 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1296 1295 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1297 1296 freemsg(mp);
1298 1297 }
1299 1298
1300 1299 /*
1301 1300 * ire_recvfn for IRE_BROADCAST.
1302 1301 */
1303 1302 void
1304 1303 ire_recv_broadcast_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1305 1304 ip_recv_attr_t *ira)
1306 1305 {
1307 1306 ipha_t *ipha = (ipha_t *)iph_arg;
1308 1307 ill_t *ill = ira->ira_ill;
1309 1308 ill_t *dst_ill = ire->ire_ill;
1310 1309 ip_stack_t *ipst = ill->ill_ipst;
1311 1310 ire_t *alt_ire;
1312 1311 nce_t *nce;
1313 1312 ipaddr_t ipha_dst;
1314 1313
1315 1314 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInBcastPkts);
1316 1315
1317 1316 /* Tag for higher-level protocols */
1318 1317 ira->ira_flags |= IRAF_BROADCAST;
1319 1318
1320 1319 /*
1321 1320 * Whether local or directed broadcast forwarding: don't allow
1322 1321 * for TCP.
1323 1322 */
1324 1323 if (ipha->ipha_protocol == IPPROTO_TCP) {
1325 1324 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1326 1325 ip_drop_input("ipIfStatsInDiscards", mp, ill);
1327 1326 freemsg(mp);
1328 1327 return;
1329 1328 }
1330 1329
1331 1330 /*
1332 1331 * So that we don't end up with dups, only one ill an IPMP group is
1333 1332 * nominated to receive broadcast traffic.
1334 1333 * If we have no cast_ill we are liberal and accept everything.
1335 1334 */
1336 1335 if (IS_UNDER_IPMP(ill)) {
1337 1336 /* For an under ill_grp can change under lock */
1338 1337 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1339 1338 if (!ill->ill_nom_cast && ill->ill_grp != NULL &&
1340 1339 ill->ill_grp->ig_cast_ill != NULL) {
1341 1340 rw_exit(&ipst->ips_ill_g_lock);
1342 1341 /* No MIB since this is normal operation */
1343 1342 ip_drop_input("not nom_cast", mp, ill);
1344 1343 freemsg(mp);
1345 1344 return;
1346 1345 }
1347 1346 rw_exit(&ipst->ips_ill_g_lock);
1348 1347
1349 1348 ira->ira_ruifindex = ill_get_upper_ifindex(ill);
1350 1349 }
1351 1350
1352 1351 /*
1353 1352 * After reassembly and IPsec we will need to duplicate the
1354 1353 * broadcast packet for all matching zones on the ill.
1355 1354 */
1356 1355 ira->ira_zoneid = ALL_ZONES;
1357 1356
1358 1357 /*
1359 1358 * Check for directed broadcast i.e. ire->ire_ill is different than
1360 1359 * the incoming ill.
1361 1360 * The same broadcast address can be assigned to multiple interfaces
1362 1361 * so have to check explicitly for that case by looking up the alt_ire
1363 1362 */
1364 1363 if (dst_ill == ill && !(ire->ire_flags & RTF_MULTIRT)) {
1365 1364 /* Reassemble on the ill on which the packet arrived */
1366 1365 ip_input_local_v4(ire, mp, ipha, ira);
1367 1366 /* Restore */
1368 1367 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1369 1368 return;
1370 1369 }
1371 1370
1372 1371 /* Is there an IRE_BROADCAST on the incoming ill? */
1373 1372 ipha_dst = ((ira->ira_flags & IRAF_DHCP_UNICAST) ? INADDR_BROADCAST :
1374 1373 ipha->ipha_dst);
1375 1374 alt_ire = ire_ftable_lookup_v4(ipha_dst, 0, 0, IRE_BROADCAST, ill,
1376 1375 ALL_ZONES, ira->ira_tsl,
1377 1376 MATCH_IRE_TYPE|MATCH_IRE_ILL|MATCH_IRE_SECATTR, 0, ipst, NULL);
1378 1377 if (alt_ire != NULL) {
1379 1378 /* Not a directed broadcast */
1380 1379 /*
1381 1380 * In the special case of multirouted broadcast
1382 1381 * packets, we unconditionally need to "gateway"
1383 1382 * them to the appropriate interface here so that reassembly
1384 1383 * works. We know that the IRE_BROADCAST on cgtp0 doesn't
1385 1384 * have RTF_MULTIRT set so we look for such an IRE in the
1386 1385 * bucket.
1387 1386 */
1388 1387 if (alt_ire->ire_flags & RTF_MULTIRT) {
1389 1388 irb_t *irb;
1390 1389 ire_t *ire1;
1391 1390
1392 1391 irb = ire->ire_bucket;
1393 1392 irb_refhold(irb);
1394 1393 for (ire1 = irb->irb_ire; ire1 != NULL;
1395 1394 ire1 = ire1->ire_next) {
1396 1395 if (IRE_IS_CONDEMNED(ire1))
1397 1396 continue;
1398 1397 if (!(ire1->ire_type & IRE_BROADCAST) ||
1399 1398 (ire1->ire_flags & RTF_MULTIRT))
1400 1399 continue;
1401 1400 ill = ire1->ire_ill;
1402 1401 ill_refhold(ill);
1403 1402 break;
1404 1403 }
1405 1404 irb_refrele(irb);
1406 1405 if (ire1 != NULL) {
1407 1406 ill_t *orig_ill = ira->ira_ill;
1408 1407
1409 1408 ire_refrele(alt_ire);
1410 1409 /* Reassemble on the new ill */
1411 1410 ira->ira_ill = ill;
1412 1411 ip_input_local_v4(ire, mp, ipha, ira);
1413 1412 ill_refrele(ill);
1414 1413 /* Restore */
1415 1414 ira->ira_ill = orig_ill;
1416 1415 ira->ira_ruifindex =
1417 1416 orig_ill->ill_phyint->phyint_ifindex;
1418 1417 return;
1419 1418 }
1420 1419 }
1421 1420 ire_refrele(alt_ire);
1422 1421 /* Reassemble on the ill on which the packet arrived */
1423 1422 ip_input_local_v4(ire, mp, ipha, ira);
1424 1423 goto done;
1425 1424 }
1426 1425
1427 1426 /*
1428 1427 * This is a directed broadcast
1429 1428 *
1430 1429 * If directed broadcast is allowed, then forward the packet out
1431 1430 * the destination interface with IXAF_LOOPBACK_COPY set. That will
1432 1431 * result in ip_input() receiving a copy of the packet on the
1433 1432 * appropriate ill. (We could optimize this to avoid the extra trip
1434 1433 * via ip_input(), but since directed broadcasts are normally disabled
1435 1434 * it doesn't make sense to optimize it.)
1436 1435 */
1437 1436 if (!ipst->ips_ip_g_forward_directed_bcast ||
1438 1437 (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST))) {
1439 1438 ip_drop_input("directed broadcast not allowed", mp, ill);
1440 1439 freemsg(mp);
1441 1440 goto done;
1442 1441 }
1443 1442 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) {
1444 1443 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs);
1445 1444 ip_drop_input("ipIfStatsInCksumErrs", mp, ill);
1446 1445 freemsg(mp);
1447 1446 goto done;
1448 1447 }
1449 1448
1450 1449 /*
1451 1450 * Clear the indication that this may have hardware
1452 1451 * checksum as we are not using it for forwarding.
1453 1452 */
1454 1453 DB_CKSUMFLAGS(mp) = 0;
1455 1454
1456 1455 /*
1457 1456 * Adjust ttl to 2 (1+1 - the forward engine will decrement it by one.
1458 1457 */
1459 1458 ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl + 1;
1460 1459 ipha->ipha_hdr_checksum = 0;
1461 1460 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1462 1461
1463 1462 /*
1464 1463 * We use ip_forward_xmit to do any fragmentation.
1465 1464 * and loopback copy on the outbound interface.
1466 1465 *
1467 1466 * Make it so that IXAF_LOOPBACK_COPY to be set on transmit side.
1468 1467 */
1469 1468 ira->ira_flags |= IRAF_LOOPBACK_COPY;
1470 1469
1471 1470 nce = arp_nce_init(dst_ill, ipha->ipha_dst, IRE_BROADCAST);
1472 1471 if (nce == NULL) {
1473 1472 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutDiscards);
1474 1473 ip_drop_output("No nce", mp, dst_ill);
1475 1474 freemsg(mp);
1476 1475 goto done;
1477 1476 }
1478 1477
1479 1478 ip_forward_xmit_v4(nce, ill, mp, ipha, ira, dst_ill->ill_mc_mtu, 0);
1480 1479 nce_refrele(nce);
1481 1480 done:
1482 1481 /* Restore */
1483 1482 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1484 1483 }
1485 1484
1486 1485 /*
1487 1486 * ire_recvfn for IRE_MULTICAST.
1488 1487 */
1489 1488 void
1490 1489 ire_recv_multicast_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1491 1490 ip_recv_attr_t *ira)
1492 1491 {
1493 1492 ipha_t *ipha = (ipha_t *)iph_arg;
1494 1493 ill_t *ill = ira->ira_ill;
1495 1494 ip_stack_t *ipst = ill->ill_ipst;
1496 1495
1497 1496 ASSERT(ire->ire_ill == ira->ira_ill);
1498 1497
1499 1498 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts);
1500 1499 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen);
1501 1500
1502 1501 /* RSVP hook */
1503 1502 if (ira->ira_flags & IRAF_RSVP)
1504 1503 goto forus;
1505 1504
1506 1505 /* Tag for higher-level protocols */
1507 1506 ira->ira_flags |= IRAF_MULTICAST;
1508 1507
1509 1508 /*
1510 1509 * So that we don't end up with dups, only one ill an IPMP group is
1511 1510 * nominated to receive multicast traffic.
1512 1511 * If we have no cast_ill we are liberal and accept everything.
1513 1512 */
1514 1513 if (IS_UNDER_IPMP(ill)) {
1515 1514 ip_stack_t *ipst = ill->ill_ipst;
1516 1515
1517 1516 /* For an under ill_grp can change under lock */
1518 1517 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1519 1518 if (!ill->ill_nom_cast && ill->ill_grp != NULL &&
1520 1519 ill->ill_grp->ig_cast_ill != NULL) {
1521 1520 rw_exit(&ipst->ips_ill_g_lock);
1522 1521 ip_drop_input("not on cast ill", mp, ill);
1523 1522 freemsg(mp);
1524 1523 return;
1525 1524 }
1526 1525 rw_exit(&ipst->ips_ill_g_lock);
1527 1526 /*
1528 1527 * We switch to the upper ill so that mrouter and hasmembers
1529 1528 * can operate on upper here and in ip_input_multicast.
1530 1529 */
1531 1530 ill = ipmp_ill_hold_ipmp_ill(ill);
1532 1531 if (ill != NULL) {
1533 1532 ASSERT(ill != ira->ira_ill);
1534 1533 ASSERT(ire->ire_ill == ira->ira_ill);
1535 1534 ira->ira_ill = ill;
1536 1535 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1537 1536 } else {
1538 1537 ill = ira->ira_ill;
1539 1538 }
1540 1539 }
1541 1540
1542 1541 /*
1543 1542 * Check if we are a multicast router - send ip_mforward a copy of
1544 1543 * the packet.
1545 1544 * Due to mroute_decap tunnels we consider forwarding packets even if
1546 1545 * mrouted has not joined the allmulti group on this interface.
1547 1546 */
1548 1547 if (ipst->ips_ip_g_mrouter) {
1549 1548 int retval;
1550 1549
1551 1550 /*
1552 1551 * Clear the indication that this may have hardware
1553 1552 * checksum as we are not using it for forwarding.
1554 1553 */
1555 1554 DB_CKSUMFLAGS(mp) = 0;
1556 1555
1557 1556 /*
1558 1557 * ip_mforward helps us make these distinctions: If received
1559 1558 * on tunnel and not IGMP, then drop.
1560 1559 * If IGMP packet, then don't check membership
1561 1560 * If received on a phyint and IGMP or PIM, then
1562 1561 * don't check membership
1563 1562 */
1564 1563 retval = ip_mforward(mp, ira);
1565 1564 /* ip_mforward updates mib variables if needed */
1566 1565
1567 1566 switch (retval) {
1568 1567 case 0:
1569 1568 /*
1570 1569 * pkt is okay and arrived on phyint.
1571 1570 *
1572 1571 * If we are running as a multicast router
1573 1572 * we need to see all IGMP and/or PIM packets.
1574 1573 */
1575 1574 if ((ipha->ipha_protocol == IPPROTO_IGMP) ||
1576 1575 (ipha->ipha_protocol == IPPROTO_PIM)) {
1577 1576 goto forus;
1578 1577 }
1579 1578 break;
1580 1579 case -1:
1581 1580 /* pkt is mal-formed, toss it */
1582 1581 freemsg(mp);
1583 1582 goto done;
1584 1583 case 1:
1585 1584 /*
1586 1585 * pkt is okay and arrived on a tunnel
1587 1586 *
1588 1587 * If we are running a multicast router
1589 1588 * we need to see all igmp packets.
1590 1589 */
1591 1590 if (ipha->ipha_protocol == IPPROTO_IGMP) {
1592 1591 goto forus;
1593 1592 }
1594 1593 ip_drop_input("Multicast on tunnel ignored", mp, ill);
1595 1594 freemsg(mp);
1596 1595 goto done;
1597 1596 }
1598 1597 }
1599 1598
1600 1599 /*
1601 1600 * Check if we have members on this ill. This is not necessary for
1602 1601 * correctness because even if the NIC/GLD had a leaky filter, we
1603 1602 * filter before passing to each conn_t.
1604 1603 */
1605 1604 if (!ill_hasmembers_v4(ill, ipha->ipha_dst)) {
1606 1605 /*
1607 1606 * Nobody interested
1608 1607 *
1609 1608 * This might just be caused by the fact that
1610 1609 * multiple IP Multicast addresses map to the same
1611 1610 * link layer multicast - no need to increment counter!
1612 1611 */
1613 1612 ip_drop_input("Multicast with no members", mp, ill);
1614 1613 freemsg(mp);
1615 1614 goto done;
1616 1615 }
1617 1616 forus:
1618 1617 ip2dbg(("ire_recv_multicast_v4: multicast for us: 0x%x\n",
1619 1618 ntohl(ipha->ipha_dst)));
1620 1619
1621 1620 /*
1622 1621 * After reassembly and IPsec we will need to duplicate the
1623 1622 * multicast packet for all matching zones on the ill.
1624 1623 */
1625 1624 ira->ira_zoneid = ALL_ZONES;
1626 1625
1627 1626 /* Reassemble on the ill on which the packet arrived */
1628 1627 ip_input_local_v4(ire, mp, ipha, ira);
1629 1628 done:
1630 1629 if (ill != ire->ire_ill) {
1631 1630 ill_refrele(ill);
1632 1631 ira->ira_ill = ire->ire_ill;
1633 1632 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex;
1634 1633 }
1635 1634 }
1636 1635
1637 1636 /*
1638 1637 * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT.
1639 1638 * Drop packets since we don't forward out multirt routes.
1640 1639 */
1641 1640 /* ARGSUSED */
1642 1641 void
1643 1642 ire_recv_multirt_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1644 1643 {
1645 1644 ill_t *ill = ira->ira_ill;
1646 1645
1647 1646 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1648 1647 ip_drop_input("Not forwarding out MULTIRT", mp, ill);
1649 1648 freemsg(mp);
1650 1649 }
1651 1650
1652 1651 /*
1653 1652 * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK
1654 1653 * has rewritten the packet to have a loopback destination address (We
1655 1654 * filter out packet with a loopback destination from arriving over the wire).
1656 1655 * We don't know what zone to use, thus we always use the GLOBAL_ZONEID.
1657 1656 */
1658 1657 void
1659 1658 ire_recv_loopback_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1660 1659 {
1661 1660 ipha_t *ipha = (ipha_t *)iph_arg;
1662 1661 ill_t *ill = ira->ira_ill;
1663 1662 ill_t *ire_ill = ire->ire_ill;
1664 1663
1665 1664 ira->ira_zoneid = GLOBAL_ZONEID;
1666 1665
1667 1666 /* Switch to the lo0 ill for further processing */
1668 1667 if (ire_ill != ill) {
1669 1668 /*
1670 1669 * Update ira_ill to be the ILL on which the IP address
1671 1670 * is hosted.
1672 1671 * No need to hold the ill since we have a hold on the ire
1673 1672 */
1674 1673 ASSERT(ira->ira_ill == ira->ira_rill);
1675 1674 ira->ira_ill = ire_ill;
1676 1675
1677 1676 ip_input_local_v4(ire, mp, ipha, ira);
1678 1677
1679 1678 /* Restore */
1680 1679 ASSERT(ira->ira_ill == ire_ill);
1681 1680 ira->ira_ill = ill;
1682 1681 return;
1683 1682
1684 1683 }
1685 1684 ip_input_local_v4(ire, mp, ipha, ira);
1686 1685 }
1687 1686
1688 1687 /*
1689 1688 * ire_recvfn for IRE_LOCAL.
1690 1689 */
1691 1690 void
1692 1691 ire_recv_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1693 1692 {
1694 1693 ipha_t *ipha = (ipha_t *)iph_arg;
1695 1694 ill_t *ill = ira->ira_ill;
1696 1695 ill_t *ire_ill = ire->ire_ill;
1697 1696
1698 1697 /* Make a note for DAD that this address is in use */
1699 1698 ire->ire_last_used_time = LBOLT_FASTPATH;
1700 1699
1701 1700 /* Only target the IRE_LOCAL with the right zoneid. */
1702 1701 ira->ira_zoneid = ire->ire_zoneid;
1703 1702
1704 1703 /*
1705 1704 * If the packet arrived on the wrong ill, we check that
1706 1705 * this is ok.
1707 1706 * If it is, then we ensure that we do the reassembly on
1708 1707 * the ill on which the address is hosted. We keep ira_rill as
1709 1708 * the one on which the packet arrived, so that IP_PKTINFO and
1710 1709 * friends can report this.
1711 1710 */
1712 1711 if (ire_ill != ill) {
1713 1712 ire_t *new_ire;
1714 1713
1715 1714 new_ire = ip_check_multihome(&ipha->ipha_dst, ire, ill);
1716 1715 if (new_ire == NULL) {
1717 1716 /* Drop packet */
1718 1717 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1719 1718 ip_drop_input("ipIfStatsInForwProhibits", mp, ill);
1720 1719 freemsg(mp);
1721 1720 return;
1722 1721 }
1723 1722 /*
1724 1723 * Update ira_ill to be the ILL on which the IP address
1725 1724 * is hosted. No need to hold the ill since we have a
1726 1725 * hold on the ire. Note that we do the switch even if
1727 1726 * new_ire == ire (for IPMP, ire would be the one corresponding
1728 1727 * to the IPMP ill).
1729 1728 */
1730 1729 ASSERT(ira->ira_ill == ira->ira_rill);
1731 1730 ira->ira_ill = new_ire->ire_ill;
1732 1731
1733 1732 /* ira_ruifindex tracks the upper for ira_rill */
1734 1733 if (IS_UNDER_IPMP(ill))
1735 1734 ira->ira_ruifindex = ill_get_upper_ifindex(ill);
1736 1735
1737 1736 ip_input_local_v4(new_ire, mp, ipha, ira);
1738 1737
1739 1738 /* Restore */
1740 1739 ASSERT(ira->ira_ill == new_ire->ire_ill);
1741 1740 ira->ira_ill = ill;
1742 1741 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1743 1742
1744 1743 if (new_ire != ire)
1745 1744 ire_refrele(new_ire);
1746 1745 return;
1747 1746 }
1748 1747
1749 1748 ip_input_local_v4(ire, mp, ipha, ira);
1750 1749 }
1751 1750
1752 1751 /*
1753 1752 * Common function for packets arriving for the host. Handles
1754 1753 * checksum verification, reassembly checks, etc.
1755 1754 */
1756 1755 static void
1757 1756 ip_input_local_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira)
1758 1757 {
1759 1758 ill_t *ill = ira->ira_ill;
1760 1759 iaflags_t iraflags = ira->ira_flags;
1761 1760
1762 1761 /*
1763 1762 * Verify IP header checksum. If the packet was AH or ESP then
1764 1763 * this flag has already been cleared. Likewise if the packet
1765 1764 * had a hardware checksum.
1766 1765 */
1767 1766 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) {
1768 1767 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs);
1769 1768 ip_drop_input("ipIfStatsInCksumErrs", mp, ill);
1770 1769 freemsg(mp);
1771 1770 return;
1772 1771 }
1773 1772
1774 1773 if (iraflags & IRAF_IPV4_OPTIONS) {
1775 1774 if (!ip_input_local_options(mp, ipha, ira)) {
1776 1775 /* Error has been sent and mp consumed */
1777 1776 return;
1778 1777 }
1779 1778 /*
1780 1779 * Some old hardware does partial checksum by including the
1781 1780 * whole IP header, so the partial checksum value might have
1782 1781 * become invalid if any option in the packet have been
1783 1782 * updated. Always clear partial checksum flag here.
1784 1783 */
1785 1784 DB_CKSUMFLAGS(mp) &= ~HCK_PARTIALCKSUM;
1786 1785 }
1787 1786
1788 1787 /*
1789 1788 * Is packet part of fragmented IP packet?
1790 1789 * We compare against defined values in network byte order
1791 1790 */
1792 1791 if (ipha->ipha_fragment_offset_and_flags &
1793 1792 (IPH_MF_HTONS | IPH_OFFSET_HTONS)) {
1794 1793 /*
1795 1794 * Make sure we have ira_l2src before we loose the original
1796 1795 * mblk
1797 1796 */
1798 1797 if (!(ira->ira_flags & IRAF_L2SRC_SET))
1799 1798 ip_setl2src(mp, ira, ira->ira_rill);
1800 1799
1801 1800 mp = ip_input_fragment(mp, ipha, ira);
1802 1801 if (mp == NULL)
1803 1802 return;
1804 1803 /* Completed reassembly */
1805 1804 ipha = (ipha_t *)mp->b_rptr;
1806 1805 }
1807 1806
1808 1807 /*
1809 1808 * For broadcast and multicast we need some extra work before
1810 1809 * we call ip_fanout_v4(), since in the case of shared-IP zones
1811 1810 * we need to pretend that a packet arrived for each zoneid.
1812 1811 */
1813 1812 if (iraflags & IRAF_MULTIBROADCAST) {
1814 1813 if (iraflags & IRAF_BROADCAST)
1815 1814 ip_input_broadcast_v4(ire, mp, ipha, ira);
1816 1815 else
1817 1816 ip_input_multicast_v4(ire, mp, ipha, ira);
1818 1817 return;
1819 1818 }
1820 1819 ip_fanout_v4(mp, ipha, ira);
1821 1820 }
1822 1821
1823 1822
1824 1823 /*
1825 1824 * Handle multiple zones which match the same broadcast address
1826 1825 * and ill by delivering a packet to each of them.
1827 1826 * Walk the bucket and look for different ire_zoneid but otherwise
1828 1827 * the same IRE (same ill/addr/mask/type).
1829 1828 * Note that ire_add() tracks IREs that are identical in all
1830 1829 * fields (addr/mask/type/gw/ill/zoneid) within a single IRE by
1831 1830 * increasing ire_identical_cnt. Thus we don't need to be concerned
1832 1831 * about those.
1833 1832 */
1834 1833 static void
1835 1834 ip_input_broadcast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira)
1836 1835 {
1837 1836 ill_t *ill = ira->ira_ill;
1838 1837 ip_stack_t *ipst = ill->ill_ipst;
1839 1838 netstack_t *ns = ipst->ips_netstack;
1840 1839 irb_t *irb;
1841 1840 ire_t *ire1;
1842 1841 mblk_t *mp1;
1843 1842 ipha_t *ipha1;
1844 1843 uint_t ira_pktlen = ira->ira_pktlen;
1845 1844 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length;
1846 1845
1847 1846 irb = ire->ire_bucket;
1848 1847
1849 1848 /*
1850 1849 * If we don't have more than one shared-IP zone, or if
1851 1850 * there can't be more than one IRE_BROADCAST for this
1852 1851 * IP address, then just set the zoneid and proceed.
1853 1852 */
1854 1853 if (ns->netstack_numzones == 1 || irb->irb_ire_cnt == 1) {
1855 1854 ira->ira_zoneid = ire->ire_zoneid;
1856 1855
1857 1856 ip_fanout_v4(mp, ipha, ira);
1858 1857 return;
1859 1858 }
1860 1859 irb_refhold(irb);
1861 1860 for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
1862 1861 /* We do the main IRE after the end of the loop */
1863 1862 if (ire1 == ire)
1864 1863 continue;
1865 1864
1866 1865 /*
1867 1866 * Only IREs for the same IP address should be in the same
1868 1867 * bucket.
1869 1868 * But could have IRE_HOSTs in the case of CGTP.
1870 1869 */
1871 1870 ASSERT(ire1->ire_addr == ire->ire_addr);
1872 1871 if (!(ire1->ire_type & IRE_BROADCAST))
1873 1872 continue;
1874 1873
1875 1874 if (IRE_IS_CONDEMNED(ire1))
1876 1875 continue;
1877 1876
1878 1877 mp1 = copymsg(mp);
1879 1878 if (mp1 == NULL) {
1880 1879 /* Failed to deliver to one zone */
1881 1880 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1882 1881 ip_drop_input("ipIfStatsInDiscards", mp, ill);
1883 1882 continue;
1884 1883 }
1885 1884 ira->ira_zoneid = ire1->ire_zoneid;
1886 1885 ipha1 = (ipha_t *)mp1->b_rptr;
1887 1886 ip_fanout_v4(mp1, ipha1, ira);
1888 1887 /*
1889 1888 * IPsec might have modified ira_pktlen and ira_ip_hdr_length
1890 1889 * so we restore them for a potential next iteration
1891 1890 */
1892 1891 ira->ira_pktlen = ira_pktlen;
1893 1892 ira->ira_ip_hdr_length = ira_ip_hdr_length;
1894 1893 }
1895 1894 irb_refrele(irb);
1896 1895 /* Do the main ire */
1897 1896 ira->ira_zoneid = ire->ire_zoneid;
1898 1897 ip_fanout_v4(mp, ipha, ira);
1899 1898 }
1900 1899
1901 1900 /*
1902 1901 * Handle multiple zones which want to receive the same multicast packets
1903 1902 * on this ill by delivering a packet to each of them.
1904 1903 *
1905 1904 * Note that for packets delivered to transports we could instead do this
1906 1905 * as part of the fanout code, but since we need to handle icmp_inbound
1907 1906 * it is simpler to have multicast work the same as broadcast.
1908 1907 *
1909 1908 * The ip_fanout matching for multicast matches based on ilm independent of
1910 1909 * zoneid since the zoneid restriction is applied when joining a multicast
1911 1910 * group.
1912 1911 */
1913 1912 /* ARGSUSED */
1914 1913 static void
1915 1914 ip_input_multicast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira)
1916 1915 {
1917 1916 ill_t *ill = ira->ira_ill;
1918 1917 iaflags_t iraflags = ira->ira_flags;
1919 1918 ip_stack_t *ipst = ill->ill_ipst;
1920 1919 netstack_t *ns = ipst->ips_netstack;
1921 1920 zoneid_t zoneid;
1922 1921 mblk_t *mp1;
1923 1922 ipha_t *ipha1;
1924 1923 uint_t ira_pktlen = ira->ira_pktlen;
1925 1924 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length;
1926 1925
1927 1926 /* ire_recv_multicast has switched to the upper ill for IPMP */
1928 1927 ASSERT(!IS_UNDER_IPMP(ill));
1929 1928
1930 1929 /*
1931 1930 * If we don't have more than one shared-IP zone, or if
1932 1931 * there are no members in anything but the global zone,
1933 1932 * then just set the zoneid and proceed.
1934 1933 */
1935 1934 if (ns->netstack_numzones == 1 ||
1936 1935 !ill_hasmembers_otherzones_v4(ill, ipha->ipha_dst,
1937 1936 GLOBAL_ZONEID)) {
1938 1937 ira->ira_zoneid = GLOBAL_ZONEID;
1939 1938
1940 1939 /* If sender didn't want this zone to receive it, drop */
1941 1940 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1942 1941 ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1943 1942 ip_drop_input("Multicast but wrong zoneid", mp, ill);
1944 1943 freemsg(mp);
1945 1944 return;
1946 1945 }
1947 1946 ip_fanout_v4(mp, ipha, ira);
1948 1947 return;
1949 1948 }
1950 1949
1951 1950 /*
1952 1951 * Here we loop over all zoneids that have members in the group
1953 1952 * and deliver a packet to ip_fanout for each zoneid.
1954 1953 *
1955 1954 * First find any members in the lowest numeric zoneid by looking for
1956 1955 * first zoneid larger than -1 (ALL_ZONES).
1957 1956 * We terminate the loop when we receive -1 (ALL_ZONES).
1958 1957 */
1959 1958 zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, ALL_ZONES);
1960 1959 for (; zoneid != ALL_ZONES;
1961 1960 zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, zoneid)) {
1962 1961 /*
1963 1962 * Avoid an extra copymsg/freemsg by skipping global zone here
1964 1963 * and doing that at the end.
1965 1964 */
1966 1965 if (zoneid == GLOBAL_ZONEID)
1967 1966 continue;
1968 1967
1969 1968 ira->ira_zoneid = zoneid;
1970 1969
1971 1970 /* If sender didn't want this zone to receive it, skip */
1972 1971 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1973 1972 ira->ira_no_loop_zoneid == ira->ira_zoneid)
1974 1973 continue;
1975 1974
1976 1975 mp1 = copymsg(mp);
1977 1976 if (mp1 == NULL) {
1978 1977 /* Failed to deliver to one zone */
1979 1978 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1980 1979 ip_drop_input("ipIfStatsInDiscards", mp, ill);
1981 1980 continue;
1982 1981 }
1983 1982 ipha1 = (ipha_t *)mp1->b_rptr;
1984 1983 ip_fanout_v4(mp1, ipha1, ira);
1985 1984 /*
1986 1985 * IPsec might have modified ira_pktlen and ira_ip_hdr_length
1987 1986 * so we restore them for a potential next iteration
1988 1987 */
1989 1988 ira->ira_pktlen = ira_pktlen;
1990 1989 ira->ira_ip_hdr_length = ira_ip_hdr_length;
1991 1990 }
1992 1991
1993 1992 /* Do the main ire */
1994 1993 ira->ira_zoneid = GLOBAL_ZONEID;
1995 1994 /* If sender didn't want this zone to receive it, drop */
1996 1995 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1997 1996 ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1998 1997 ip_drop_input("Multicast but wrong zoneid", mp, ill);
1999 1998 freemsg(mp);
2000 1999 } else {
2001 2000 ip_fanout_v4(mp, ipha, ira);
2002 2001 }
2003 2002 }
2004 2003
2005 2004
2006 2005 /*
2007 2006 * Determine the zoneid and IRAF_TX_* flags if trusted extensions
2008 2007 * is in use. Updates ira_zoneid and ira_flags as a result.
2009 2008 */
2010 2009 static void
2011 2010 ip_fanout_tx_v4(mblk_t *mp, ipha_t *ipha, uint8_t protocol,
2012 2011 uint_t ip_hdr_length, ip_recv_attr_t *ira)
2013 2012 {
2014 2013 uint16_t *up;
2015 2014 uint16_t lport;
2016 2015 zoneid_t zoneid;
2017 2016
2018 2017 ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED);
2019 2018
2020 2019 /*
2021 2020 * If the packet is unlabeled we might allow read-down
2022 2021 * for MAC_EXEMPT. Below we clear this if it is a multi-level
2023 2022 * port (MLP).
2024 2023 * Note that ira_tsl can be NULL here.
2025 2024 */
2026 2025 if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED)
2027 2026 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE;
2028 2027
2029 2028 if (ira->ira_zoneid != ALL_ZONES)
2030 2029 return;
2031 2030
2032 2031 ira->ira_flags |= IRAF_TX_SHARED_ADDR;
2033 2032
2034 2033 up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length);
2035 2034 switch (protocol) {
2036 2035 case IPPROTO_TCP:
2037 2036 case IPPROTO_SCTP:
2038 2037 case IPPROTO_UDP:
2039 2038 /* Caller ensures this */
2040 2039 ASSERT(((uchar_t *)ipha) + ip_hdr_length +4 <= mp->b_wptr);
2041 2040
2042 2041 /*
2043 2042 * Only these transports support MLP.
2044 2043 * We know their destination port numbers is in
2045 2044 * the same place in the header.
2046 2045 */
2047 2046 lport = up[1];
2048 2047
2049 2048 /*
2050 2049 * No need to handle exclusive-stack zones
2051 2050 * since ALL_ZONES only applies to the shared IP instance.
2052 2051 */
2053 2052 zoneid = tsol_mlp_findzone(protocol, lport);
2054 2053 /*
2055 2054 * If no shared MLP is found, tsol_mlp_findzone returns
2056 2055 * ALL_ZONES. In that case, we assume it's SLP, and
2057 2056 * search for the zone based on the packet label.
2058 2057 *
2059 2058 * If there is such a zone, we prefer to find a
2060 2059 * connection in it. Otherwise, we look for a
2061 2060 * MAC-exempt connection in any zone whose label
2062 2061 * dominates the default label on the packet.
2063 2062 */
2064 2063 if (zoneid == ALL_ZONES)
2065 2064 zoneid = tsol_attr_to_zoneid(ira);
2066 2065 else
2067 2066 ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE;
2068 2067 break;
2069 2068 default:
2070 2069 /* Handle shared address for other protocols */
2071 2070 zoneid = tsol_attr_to_zoneid(ira);
2072 2071 break;
2073 2072 }
2074 2073 ira->ira_zoneid = zoneid;
2075 2074 }
2076 2075
2077 2076 /*
2078 2077 * Increment checksum failure statistics
2079 2078 */
2080 2079 static void
2081 2080 ip_input_cksum_err_v4(uint8_t protocol, uint16_t hck_flags, ill_t *ill)
2082 2081 {
2083 2082 ip_stack_t *ipst = ill->ill_ipst;
2084 2083
2085 2084 switch (protocol) {
2086 2085 case IPPROTO_TCP:
2087 2086 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs);
2088 2087
2089 2088 if (hck_flags & HCK_FULLCKSUM)
2090 2089 IP_STAT(ipst, ip_tcp_in_full_hw_cksum_err);
2091 2090 else if (hck_flags & HCK_PARTIALCKSUM)
2092 2091 IP_STAT(ipst, ip_tcp_in_part_hw_cksum_err);
2093 2092 else
2094 2093 IP_STAT(ipst, ip_tcp_in_sw_cksum_err);
2095 2094 break;
2096 2095 case IPPROTO_UDP:
2097 2096 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
2098 2097 if (hck_flags & HCK_FULLCKSUM)
2099 2098 IP_STAT(ipst, ip_udp_in_full_hw_cksum_err);
2100 2099 else if (hck_flags & HCK_PARTIALCKSUM)
2101 2100 IP_STAT(ipst, ip_udp_in_part_hw_cksum_err);
2102 2101 else
2103 2102 IP_STAT(ipst, ip_udp_in_sw_cksum_err);
2104 2103 break;
2105 2104 case IPPROTO_ICMP:
2106 2105 BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs);
2107 2106 break;
2108 2107 default:
2109 2108 ASSERT(0);
2110 2109 break;
2111 2110 }
2112 2111 }
2113 2112
2114 2113 /* Calculate the IPv4 pseudo-header checksum */
2115 2114 uint32_t
2116 2115 ip_input_cksum_pseudo_v4(ipha_t *ipha, ip_recv_attr_t *ira)
2117 2116 {
2118 2117 uint_t ulp_len;
2119 2118 uint32_t cksum;
2120 2119 uint8_t protocol = ira->ira_protocol;
2121 2120 uint16_t ip_hdr_length = ira->ira_ip_hdr_length;
2122 2121
2123 2122 #define iphs ((uint16_t *)ipha)
2124 2123
2125 2124 switch (protocol) {
2126 2125 case IPPROTO_TCP:
2127 2126 ulp_len = ira->ira_pktlen - ip_hdr_length;
2128 2127
2129 2128 /* Protocol and length */
2130 2129 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP;
2131 2130 /* IP addresses */
2132 2131 cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9];
2133 2132 break;
2134 2133
2135 2134 case IPPROTO_UDP: {
2136 2135 udpha_t *udpha;
2137 2136
2138 2137 udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length);
2139 2138
2140 2139 /* Protocol and length */
2141 2140 cksum = udpha->uha_length + IP_UDP_CSUM_COMP;
2142 2141 /* IP addresses */
2143 2142 cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9];
2144 2143 break;
2145 2144 }
2146 2145
2147 2146 default:
2148 2147 cksum = 0;
2149 2148 break;
2150 2149 }
2151 2150 #undef iphs
2152 2151 return (cksum);
2153 2152 }
2154 2153
2155 2154
2156 2155 /*
2157 2156 * Software verification of the ULP checksums.
2158 2157 * Returns B_TRUE if ok.
2159 2158 * Increments statistics of failed.
2160 2159 */
2161 2160 static boolean_t
2162 2161 ip_input_sw_cksum_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira)
2163 2162 {
2164 2163 ip_stack_t *ipst = ira->ira_ill->ill_ipst;
2165 2164 uint32_t cksum;
2166 2165 uint8_t protocol = ira->ira_protocol;
2167 2166 uint16_t ip_hdr_length = ira->ira_ip_hdr_length;
2168 2167
2169 2168 IP_STAT(ipst, ip_in_sw_cksum);
2170 2169
2171 2170 ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP);
2172 2171
2173 2172 cksum = ip_input_cksum_pseudo_v4(ipha, ira);
2174 2173 cksum = IP_CSUM(mp, ip_hdr_length, cksum);
2175 2174 if (cksum == 0)
2176 2175 return (B_TRUE);
2177 2176
2178 2177 ip_input_cksum_err_v4(protocol, 0, ira->ira_ill);
2179 2178 return (B_FALSE);
2180 2179 }
2181 2180
2182 2181 /*
2183 2182 * Verify the ULP checksums.
2184 2183 * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum
2185 2184 * algorithm.
2186 2185 * Increments statistics if failed.
2187 2186 */
2188 2187 static boolean_t
2189 2188 ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha,
2190 2189 ip_recv_attr_t *ira)
2191 2190 {
2192 2191 ill_t *ill = ira->ira_rill;
2193 2192 uint16_t hck_flags;
2194 2193 uint32_t cksum;
2195 2194 mblk_t *mp1;
2196 2195 int32_t len;
2197 2196 uint8_t protocol = ira->ira_protocol;
2198 2197 uint16_t ip_hdr_length = ira->ira_ip_hdr_length;
2199 2198
2200 2199
2201 2200 switch (protocol) {
2202 2201 case IPPROTO_TCP:
2203 2202 break;
2204 2203
2205 2204 case IPPROTO_UDP: {
2206 2205 udpha_t *udpha;
2207 2206
2208 2207 udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length);
2209 2208 if (udpha->uha_checksum == 0) {
2210 2209 /* Packet doesn't have a UDP checksum */
2211 2210 return (B_TRUE);
2212 2211 }
2213 2212 break;
2214 2213 }
2215 2214 case IPPROTO_SCTP: {
2216 2215 sctp_hdr_t *sctph;
2217 2216 uint32_t pktsum;
2218 2217
2219 2218 sctph = (sctp_hdr_t *)((uchar_t *)ipha + ip_hdr_length);
2220 2219 #ifdef DEBUG
2221 2220 if (skip_sctp_cksum)
2222 2221 return (B_TRUE);
2223 2222 #endif
2224 2223 pktsum = sctph->sh_chksum;
2225 2224 sctph->sh_chksum = 0;
2226 2225 cksum = sctp_cksum(mp, ip_hdr_length);
2227 2226 sctph->sh_chksum = pktsum;
2228 2227 if (cksum == pktsum)
2229 2228 return (B_TRUE);
2230 2229
2231 2230 /*
2232 2231 * Defer until later whether a bad checksum is ok
2233 2232 * in order to allow RAW sockets to use Adler checksum
2234 2233 * with SCTP.
2235 2234 */
2236 2235 ira->ira_flags |= IRAF_SCTP_CSUM_ERR;
2237 2236 return (B_TRUE);
2238 2237 }
2239 2238
2240 2239 default:
2241 2240 /* No ULP checksum to verify. */
2242 2241 return (B_TRUE);
2243 2242 }
2244 2243 /*
2245 2244 * Revert to software checksum calculation if the interface
2246 2245 * isn't capable of checksum offload.
2247 2246 * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout.
2248 2247 * Note: IRAF_NO_HW_CKSUM is not currently used.
2249 2248 */
2250 2249 ASSERT(!IS_IPMP(ill));
2251 2250 if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) ||
2252 2251 !dohwcksum) {
2253 2252 return (ip_input_sw_cksum_v4(mp, ipha, ira));
2254 2253 }
2255 2254
2256 2255 /*
2257 2256 * We apply this for all ULP protocols. Does the HW know to
2258 2257 * not set the flags for SCTP and other protocols.
2259 2258 */
2260 2259
2261 2260 hck_flags = DB_CKSUMFLAGS(mp);
2262 2261
2263 2262 if (hck_flags & HCK_FULLCKSUM_OK) {
2264 2263 /*
2265 2264 * Hardware has already verified the checksum.
2266 2265 */
2267 2266 return (B_TRUE);
2268 2267 }
2269 2268
2270 2269 if (hck_flags & HCK_FULLCKSUM) {
2271 2270 /*
2272 2271 * Full checksum has been computed by the hardware
2273 2272 * and has been attached. If the driver wants us to
2274 2273 * verify the correctness of the attached value, in
2275 2274 * order to protect against faulty hardware, compare
2276 2275 * it against -0 (0xFFFF) to see if it's valid.
2277 2276 */
2278 2277 cksum = DB_CKSUM16(mp);
2279 2278 if (cksum == 0xFFFF)
2280 2279 return (B_TRUE);
2281 2280 ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill);
2282 2281 return (B_FALSE);
2283 2282 }
2284 2283
2285 2284 mp1 = mp->b_cont;
2286 2285 if ((hck_flags & HCK_PARTIALCKSUM) &&
2287 2286 (mp1 == NULL || mp1->b_cont == NULL) &&
2288 2287 ip_hdr_length >= DB_CKSUMSTART(mp) &&
2289 2288 ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) {
2290 2289 uint32_t adj;
2291 2290 uchar_t *cksum_start;
2292 2291
2293 2292 cksum = ip_input_cksum_pseudo_v4(ipha, ira);
2294 2293
2295 2294 cksum_start = ((uchar_t *)ipha + DB_CKSUMSTART(mp));
2296 2295
2297 2296 /*
2298 2297 * Partial checksum has been calculated by hardware
2299 2298 * and attached to the packet; in addition, any
2300 2299 * prepended extraneous data is even byte aligned,
2301 2300 * and there are at most two mblks associated with
2302 2301 * the packet. If any such data exists, we adjust
2303 2302 * the checksum; also take care any postpended data.
2304 2303 */
2305 2304 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj);
2306 2305 /*
2307 2306 * One's complement subtract extraneous checksum
2308 2307 */
2309 2308 cksum += DB_CKSUM16(mp);
2310 2309 if (adj >= cksum)
2311 2310 cksum = ~(adj - cksum) & 0xFFFF;
2312 2311 else
2313 2312 cksum -= adj;
2314 2313 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
2315 2314 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
2316 2315 if (!(~cksum & 0xFFFF))
2317 2316 return (B_TRUE);
2318 2317
2319 2318 ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill);
2320 2319 return (B_FALSE);
2321 2320 }
2322 2321 return (ip_input_sw_cksum_v4(mp, ipha, ira));
2323 2322 }
2324 2323
2325 2324
2326 2325 /*
2327 2326 * Handle fanout of received packets.
2328 2327 * Unicast packets that are looped back (from ire_send_local_v4) and packets
2329 2328 * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM.
2330 2329 *
2331 2330 * IPQoS Notes
2332 2331 * Before sending it to the client, invoke IPPF processing. Policy processing
2333 2332 * takes place only if the callout_position, IPP_LOCAL_IN, is enabled.
2334 2333 */
2335 2334 void
2336 2335 ip_fanout_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira)
2337 2336 {
2338 2337 ill_t *ill = ira->ira_ill;
2339 2338 iaflags_t iraflags = ira->ira_flags;
2340 2339 ip_stack_t *ipst = ill->ill_ipst;
2341 2340 uint8_t protocol = ipha->ipha_protocol;
2342 2341 conn_t *connp;
2343 2342 #define rptr ((uchar_t *)ipha)
2344 2343 uint_t ip_hdr_length;
2345 2344 uint_t min_ulp_header_length;
2346 2345 int offset;
2347 2346 ssize_t len;
2348 2347 netstack_t *ns = ipst->ips_netstack;
2349 2348 ipsec_stack_t *ipss = ns->netstack_ipsec;
2350 2349 ill_t *rill = ira->ira_rill;
2351 2350
2352 2351 ASSERT(ira->ira_pktlen == ntohs(ipha->ipha_length));
2353 2352
2354 2353 ip_hdr_length = ira->ira_ip_hdr_length;
2355 2354 ira->ira_protocol = protocol;
2356 2355
2357 2356 /*
2358 2357 * Time for IPP once we've done reassembly and IPsec.
2359 2358 * We skip this for loopback packets since we don't do IPQoS
2360 2359 * on loopback.
2361 2360 */
2362 2361 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) &&
2363 2362 !(iraflags & IRAF_LOOPBACK) &&
2364 2363 (protocol != IPPROTO_ESP && protocol != IPPROTO_AH)) {
2365 2364 /*
2366 2365 * Use the interface on which the packet arrived - not where
2367 2366 * the IP address is hosted.
2368 2367 */
2369 2368 /* ip_process translates an IS_UNDER_IPMP */
2370 2369 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill);
2371 2370 if (mp == NULL) {
2372 2371 /* ip_drop_packet and MIB done */
2373 2372 return;
2374 2373 }
2375 2374 }
2376 2375
2377 2376 /* Determine the minimum required size of the upper-layer header */
2378 2377 /* Need to do this for at least the set of ULPs that TX handles. */
2379 2378 switch (protocol) {
2380 2379 case IPPROTO_TCP:
2381 2380 min_ulp_header_length = TCP_MIN_HEADER_LENGTH;
2382 2381 break;
2383 2382 case IPPROTO_SCTP:
2384 2383 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH;
2385 2384 break;
2386 2385 case IPPROTO_UDP:
2387 2386 min_ulp_header_length = UDPH_SIZE;
2388 2387 break;
2389 2388 case IPPROTO_ICMP:
2390 2389 min_ulp_header_length = ICMPH_SIZE;
2391 2390 break;
2392 2391 default:
2393 2392 min_ulp_header_length = 0;
2394 2393 break;
2395 2394 }
2396 2395 /* Make sure we have the min ULP header length */
2397 2396 len = mp->b_wptr - rptr;
2398 2397 if (len < ip_hdr_length + min_ulp_header_length) {
2399 2398 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length) {
2400 2399 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
2401 2400 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
2402 2401 freemsg(mp);
2403 2402 return;
2404 2403 }
2405 2404 IP_STAT(ipst, ip_recv_pullup);
2406 2405 ipha = ip_pullup(mp, ip_hdr_length + min_ulp_header_length,
2407 2406 ira);
2408 2407 if (ipha == NULL)
2409 2408 goto discard;
2410 2409 len = mp->b_wptr - rptr;
2411 2410 }
2412 2411
2413 2412 /*
2414 2413 * If trusted extensions then determine the zoneid and TX specific
2415 2414 * ira_flags.
2416 2415 */
2417 2416 if (iraflags & IRAF_SYSTEM_LABELED) {
2418 2417 /* This can update ira->ira_flags and ira->ira_zoneid */
2419 2418 ip_fanout_tx_v4(mp, ipha, protocol, ip_hdr_length, ira);
2420 2419 iraflags = ira->ira_flags;
2421 2420 }
2422 2421
2423 2422
2424 2423 /* Verify ULP checksum. Handles TCP, UDP, and SCTP */
2425 2424 if (iraflags & IRAF_VERIFY_ULP_CKSUM) {
2426 2425 if (!ip_input_cksum_v4(iraflags, mp, ipha, ira)) {
2427 2426 /* Bad checksum. Stats are already incremented */
2428 2427 ip_drop_input("Bad ULP checksum", mp, ill);
2429 2428 freemsg(mp);
2430 2429 return;
2431 2430 }
2432 2431 /* IRAF_SCTP_CSUM_ERR could have been set */
2433 2432 iraflags = ira->ira_flags;
2434 2433 }
2435 2434 switch (protocol) {
2436 2435 case IPPROTO_TCP:
2437 2436 /* For TCP, discard broadcast and multicast packets. */
2438 2437 if (iraflags & IRAF_MULTIBROADCAST)
2439 2438 goto discard;
2440 2439
2441 2440 /* First mblk contains IP+TCP headers per above check */
2442 2441 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH);
2443 2442
2444 2443 /* TCP options present? */
2445 2444 offset = ((uchar_t *)ipha)[ip_hdr_length + 12] >> 4;
2446 2445 if (offset != 5) {
2447 2446 if (offset < 5)
2448 2447 goto discard;
2449 2448
2450 2449 /*
2451 2450 * There must be TCP options.
2452 2451 * Make sure we can grab them.
2453 2452 */
2454 2453 offset <<= 2;
2455 2454 offset += ip_hdr_length;
2456 2455 if (len < offset) {
2457 2456 if (ira->ira_pktlen < offset) {
2458 2457 BUMP_MIB(ill->ill_ip_mib,
2459 2458 ipIfStatsInTruncatedPkts);
2460 2459 ip_drop_input(
2461 2460 "ipIfStatsInTruncatedPkts",
2462 2461 mp, ill);
2463 2462 freemsg(mp);
2464 2463 return;
2465 2464 }
2466 2465 IP_STAT(ipst, ip_recv_pullup);
2467 2466 ipha = ip_pullup(mp, offset, ira);
2468 2467 if (ipha == NULL)
2469 2468 goto discard;
2470 2469 len = mp->b_wptr - rptr;
2471 2470 }
2472 2471 }
2473 2472
2474 2473 /*
2475 2474 * Pass up a squeue hint to tcp.
2476 2475 * If ira_sqp is already set (this is loopback) we leave it
2477 2476 * alone.
2478 2477 */
2479 2478 if (ira->ira_sqp == NULL) {
2480 2479 ira->ira_sqp = ip_squeue_get(ira->ira_ring);
2481 2480 }
2482 2481
2483 2482 /* Look for AF_INET or AF_INET6 that matches */
2484 2483 connp = ipcl_classify_v4(mp, IPPROTO_TCP, ip_hdr_length,
2485 2484 ira, ipst);
2486 2485 if (connp == NULL) {
2487 2486 /* Send the TH_RST */
2488 2487 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2489 2488 tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2490 2489 return;
2491 2490 }
2492 2491 if (connp->conn_incoming_ifindex != 0 &&
2493 2492 connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2494 2493 CONN_DEC_REF(connp);
2495 2494
2496 2495 /* Send the TH_RST */
2497 2496 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2498 2497 tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2499 2498 return;
2500 2499 }
2501 2500 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) ||
2502 2501 (iraflags & IRAF_IPSEC_SECURE)) {
2503 2502 mp = ipsec_check_inbound_policy(mp, connp,
2504 2503 ipha, NULL, ira);
2505 2504 if (mp == NULL) {
2506 2505 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2507 2506 /* Note that mp is NULL */
2508 2507 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2509 2508 CONN_DEC_REF(connp);
2510 2509 return;
2511 2510 }
2512 2511 }
2513 2512 /* Found a client; up it goes */
2514 2513 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2515 2514 ira->ira_ill = ira->ira_rill = NULL;
2516 2515 if (!IPCL_IS_TCP(connp)) {
2517 2516 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
2518 2517 (connp->conn_recv)(connp, mp, NULL, ira);
2519 2518 CONN_DEC_REF(connp);
2520 2519 ira->ira_ill = ill;
2521 2520 ira->ira_rill = rill;
2522 2521 return;
2523 2522 }
2524 2523
2525 2524 /*
2526 2525 * We do different processing whether called from
2527 2526 * ip_accept_tcp and we match the target, don't match
2528 2527 * the target, and when we are called by ip_input.
2529 2528 */
2530 2529 if (iraflags & IRAF_TARGET_SQP) {
2531 2530 if (ira->ira_target_sqp == connp->conn_sqp) {
2532 2531 mblk_t *attrmp;
2533 2532
2534 2533 attrmp = ip_recv_attr_to_mblk(ira);
2535 2534 if (attrmp == NULL) {
2536 2535 BUMP_MIB(ill->ill_ip_mib,
2537 2536 ipIfStatsInDiscards);
2538 2537 ip_drop_input("ipIfStatsInDiscards",
2539 2538 mp, ill);
2540 2539 freemsg(mp);
2541 2540 CONN_DEC_REF(connp);
2542 2541 } else {
2543 2542 SET_SQUEUE(attrmp, connp->conn_recv,
2544 2543 connp);
2545 2544 attrmp->b_cont = mp;
2546 2545 ASSERT(ira->ira_target_sqp_mp == NULL);
2547 2546 ira->ira_target_sqp_mp = attrmp;
2548 2547 /*
2549 2548 * Conn ref release when drained from
2550 2549 * the squeue.
2551 2550 */
2552 2551 }
2553 2552 } else {
2554 2553 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
2555 2554 connp->conn_recv, connp, ira, SQ_FILL,
2556 2555 SQTAG_IP_TCP_INPUT);
2557 2556 }
2558 2557 } else {
2559 2558 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv,
2560 2559 connp, ira, ip_squeue_flag, SQTAG_IP_TCP_INPUT);
2561 2560 }
2562 2561 ira->ira_ill = ill;
2563 2562 ira->ira_rill = rill;
2564 2563 return;
2565 2564
2566 2565 case IPPROTO_SCTP: {
2567 2566 sctp_hdr_t *sctph;
2568 2567 in6_addr_t map_src, map_dst;
2569 2568 uint32_t ports; /* Source and destination ports */
2570 2569 sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp;
2571 2570
2572 2571 /* For SCTP, discard broadcast and multicast packets. */
2573 2572 if (iraflags & IRAF_MULTIBROADCAST)
2574 2573 goto discard;
2575 2574
2576 2575 /*
2577 2576 * Since there is no SCTP h/w cksum support yet, just
2578 2577 * clear the flag.
2579 2578 */
2580 2579 DB_CKSUMFLAGS(mp) = 0;
2581 2580
2582 2581 /* Length ensured above */
2583 2582 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH);
2584 2583 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length);
2585 2584
2586 2585 /* get the ports */
2587 2586 ports = *(uint32_t *)&sctph->sh_sport;
2588 2587
2589 2588 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
2590 2589 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
2591 2590 if (iraflags & IRAF_SCTP_CSUM_ERR) {
2592 2591 /*
2593 2592 * No potential sctp checksum errors go to the Sun
2594 2593 * sctp stack however they might be Adler-32 summed
2595 2594 * packets a userland stack bound to a raw IP socket
2596 2595 * could reasonably use. Note though that Adler-32 is
2597 2596 * a long deprecated algorithm and customer sctp
2598 2597 * networks should eventually migrate to CRC-32 at
2599 2598 * which time this facility should be removed.
2600 2599 */
2601 2600 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira);
2602 2601 return;
2603 2602 }
2604 2603 connp = sctp_fanout(&map_src, &map_dst, ports, ira, mp,
2605 2604 sctps, sctph);
2606 2605 if (connp == NULL) {
2607 2606 /* Check for raw socket or OOTB handling */
2608 2607 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira);
2609 2608 return;
2610 2609 }
2611 2610 if (connp->conn_incoming_ifindex != 0 &&
2612 2611 connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2613 2612 CONN_DEC_REF(connp);
2614 2613 /* Check for raw socket or OOTB handling */
2615 2614 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira);
2616 2615 return;
2617 2616 }
2618 2617
2619 2618 /* Found a client; up it goes */
2620 2619 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2621 2620 sctp_input(connp, ipha, NULL, mp, ira);
2622 2621 /* sctp_input does a rele of the sctp_t */
2623 2622 return;
2624 2623 }
2625 2624
2626 2625 case IPPROTO_UDP:
2627 2626 /* First mblk contains IP+UDP headers as checked above */
2628 2627 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE);
2629 2628
2630 2629 if (iraflags & IRAF_MULTIBROADCAST) {
2631 2630 uint16_t *up; /* Pointer to ports in ULP header */
2632 2631
2633 2632 up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length);
2634 2633 ip_fanout_udp_multi_v4(mp, ipha, up[1], up[0], ira);
2635 2634 return;
2636 2635 }
2637 2636
2638 2637 /* Look for AF_INET or AF_INET6 that matches */
2639 2638 connp = ipcl_classify_v4(mp, IPPROTO_UDP, ip_hdr_length,
2640 2639 ira, ipst);
2641 2640 if (connp == NULL) {
2642 2641 no_udp_match:
2643 2642 if (ipst->ips_ipcl_proto_fanout_v4[IPPROTO_UDP].
2644 2643 connf_head != NULL) {
2645 2644 ASSERT(ira->ira_protocol == IPPROTO_UDP);
2646 2645 ip_fanout_proto_v4(mp, ipha, ira);
2647 2646 } else {
2648 2647 ip_fanout_send_icmp_v4(mp,
2649 2648 ICMP_DEST_UNREACHABLE,
2650 2649 ICMP_PORT_UNREACHABLE, ira);
2651 2650 }
2652 2651 return;
2653 2652
2654 2653 }
2655 2654 if (connp->conn_incoming_ifindex != 0 &&
2656 2655 connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2657 2656 CONN_DEC_REF(connp);
2658 2657 goto no_udp_match;
2659 2658 }
2660 2659 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld :
2661 2660 !canputnext(connp->conn_rq)) {
2662 2661 CONN_DEC_REF(connp);
2663 2662 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
2664 2663 ip_drop_input("udpIfStatsInOverflows", mp, ill);
2665 2664 freemsg(mp);
2666 2665 return;
2667 2666 }
2668 2667 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) ||
2669 2668 (iraflags & IRAF_IPSEC_SECURE)) {
2670 2669 mp = ipsec_check_inbound_policy(mp, connp,
2671 2670 ipha, NULL, ira);
2672 2671 if (mp == NULL) {
2673 2672 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2674 2673 /* Note that mp is NULL */
2675 2674 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2676 2675 CONN_DEC_REF(connp);
2677 2676 return;
2678 2677 }
2679 2678 }
2680 2679 /*
2681 2680 * Remove 0-spi if it's 0, or move everything behind
2682 2681 * the UDP header over it and forward to ESP via
2683 2682 * ip_fanout_v4().
2684 2683 */
2685 2684 if (connp->conn_udp->udp_nat_t_endpoint) {
2686 2685 if (iraflags & IRAF_IPSEC_SECURE) {
2687 2686 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2688 2687 DROPPER(ipss, ipds_esp_nat_t_ipsec),
2689 2688 &ipss->ipsec_dropper);
2690 2689 CONN_DEC_REF(connp);
2691 2690 return;
2692 2691 }
2693 2692
2694 2693 mp = zero_spi_check(mp, ira);
2695 2694 if (mp == NULL) {
2696 2695 /*
2697 2696 * Packet was consumed - probably sent to
2698 2697 * ip_fanout_v4.
2699 2698 */
2700 2699 CONN_DEC_REF(connp);
2701 2700 return;
2702 2701 }
2703 2702 /* Else continue like a normal UDP packet. */
2704 2703 ipha = (ipha_t *)mp->b_rptr;
2705 2704 protocol = ipha->ipha_protocol;
2706 2705 ira->ira_protocol = protocol;
2707 2706 }
2708 2707 /* Found a client; up it goes */
2709 2708 IP_STAT(ipst, ip_udp_fannorm);
2710 2709 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2711 2710 ira->ira_ill = ira->ira_rill = NULL;
2712 2711 (connp->conn_recv)(connp, mp, NULL, ira);
2713 2712 CONN_DEC_REF(connp);
2714 2713 ira->ira_ill = ill;
2715 2714 ira->ira_rill = rill;
2716 2715 return;
2717 2716 default:
2718 2717 break;
2719 2718 }
2720 2719
2721 2720 /*
2722 2721 * Clear hardware checksumming flag as it is currently only
2723 2722 * used by TCP and UDP.
2724 2723 */
2725 2724 DB_CKSUMFLAGS(mp) = 0;
2726 2725
2727 2726 switch (protocol) {
2728 2727 case IPPROTO_ICMP:
2729 2728 /*
2730 2729 * We need to accomodate icmp messages coming in clear
2731 2730 * until we get everything secure from the wire. If
2732 2731 * icmp_accept_clear_messages is zero we check with
2733 2732 * the global policy and act accordingly. If it is
2734 2733 * non-zero, we accept the message without any checks.
2735 2734 * But *this does not mean* that this will be delivered
2736 2735 * to RAW socket clients. By accepting we might send
2737 2736 * replies back, change our MTU value etc.,
2738 2737 * but delivery to the ULP/clients depends on their
2739 2738 * policy dispositions.
2740 2739 */
2741 2740 if (ipst->ips_icmp_accept_clear_messages == 0) {
2742 2741 mp = ipsec_check_global_policy(mp, NULL,
2743 2742 ipha, NULL, ira, ns);
2744 2743 if (mp == NULL)
2745 2744 return;
2746 2745 }
2747 2746
2748 2747 /*
2749 2748 * On a labeled system, we have to check whether the zone
2750 2749 * itself is permitted to receive raw traffic.
2751 2750 */
2752 2751 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2753 2752 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2754 2753 BUMP_MIB(&ipst->ips_icmp_mib, icmpInErrors);
2755 2754 ip_drop_input("tsol_can_accept_raw", mp, ill);
2756 2755 freemsg(mp);
2757 2756 return;
2758 2757 }
2759 2758 }
2760 2759
2761 2760 /*
2762 2761 * ICMP header checksum, including checksum field,
2763 2762 * should be zero.
2764 2763 */
2765 2764 if (IP_CSUM(mp, ip_hdr_length, 0)) {
2766 2765 BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs);
2767 2766 ip_drop_input("icmpInCksumErrs", mp, ill);
2768 2767 freemsg(mp);
2769 2768 return;
2770 2769 }
2771 2770 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2772 2771 mp = icmp_inbound_v4(mp, ira);
2773 2772 if (mp == NULL) {
2774 2773 /* No need to pass to RAW sockets */
2775 2774 return;
2776 2775 }
2777 2776 break;
2778 2777
2779 2778 case IPPROTO_IGMP:
2780 2779 /*
2781 2780 * If we are not willing to accept IGMP packets in clear,
2782 2781 * then check with global policy.
2783 2782 */
2784 2783 if (ipst->ips_igmp_accept_clear_messages == 0) {
2785 2784 mp = ipsec_check_global_policy(mp, NULL,
2786 2785 ipha, NULL, ira, ns);
2787 2786 if (mp == NULL)
2788 2787 return;
2789 2788 }
2790 2789 if ((ira->ira_flags & IRAF_SYSTEM_LABELED) &&
2791 2790 !tsol_can_accept_raw(mp, ira, B_TRUE)) {
2792 2791 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2793 2792 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2794 2793 freemsg(mp);
2795 2794 return;
2796 2795 }
2797 2796 /*
2798 2797 * Validate checksum
2799 2798 */
2800 2799 if (IP_CSUM(mp, ip_hdr_length, 0)) {
2801 2800 ++ipst->ips_igmpstat.igps_rcv_badsum;
2802 2801 ip_drop_input("igps_rcv_badsum", mp, ill);
2803 2802 freemsg(mp);
2804 2803 return;
2805 2804 }
2806 2805
2807 2806 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2808 2807 mp = igmp_input(mp, ira);
2809 2808 if (mp == NULL) {
2810 2809 /* Bad packet - discarded by igmp_input */
2811 2810 return;
2812 2811 }
2813 2812 break;
2814 2813 case IPPROTO_PIM:
2815 2814 /*
2816 2815 * If we are not willing to accept PIM packets in clear,
2817 2816 * then check with global policy.
2818 2817 */
2819 2818 if (ipst->ips_pim_accept_clear_messages == 0) {
2820 2819 mp = ipsec_check_global_policy(mp, NULL,
2821 2820 ipha, NULL, ira, ns);
2822 2821 if (mp == NULL)
2823 2822 return;
2824 2823 }
2825 2824 if ((ira->ira_flags & IRAF_SYSTEM_LABELED) &&
2826 2825 !tsol_can_accept_raw(mp, ira, B_TRUE)) {
2827 2826 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2828 2827 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2829 2828 freemsg(mp);
2830 2829 return;
2831 2830 }
2832 2831 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2833 2832
2834 2833 /* Checksum is verified in pim_input */
2835 2834 mp = pim_input(mp, ira);
2836 2835 if (mp == NULL) {
2837 2836 /* Bad packet - discarded by pim_input */
2838 2837 return;
2839 2838 }
2840 2839 break;
2841 2840 case IPPROTO_AH:
2842 2841 case IPPROTO_ESP: {
2843 2842 /*
2844 2843 * Fast path for AH/ESP.
2845 2844 */
2846 2845 netstack_t *ns = ipst->ips_netstack;
2847 2846 ipsec_stack_t *ipss = ns->netstack_ipsec;
2848 2847
2849 2848 IP_STAT(ipst, ipsec_proto_ahesp);
2850 2849
2851 2850 if (!ipsec_loaded(ipss)) {
2852 2851 ip_proto_not_sup(mp, ira);
2853 2852 return;
2854 2853 }
2855 2854
2856 2855 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2857 2856 /* select inbound SA and have IPsec process the pkt */
2858 2857 if (protocol == IPPROTO_ESP) {
2859 2858 esph_t *esph;
2860 2859 boolean_t esp_in_udp_sa;
2861 2860 boolean_t esp_in_udp_packet;
2862 2861
2863 2862 mp = ipsec_inbound_esp_sa(mp, ira, &esph);
2864 2863 if (mp == NULL)
2865 2864 return;
2866 2865
2867 2866 ASSERT(esph != NULL);
2868 2867 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2869 2868 ASSERT(ira->ira_ipsec_esp_sa != NULL);
2870 2869 ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL);
2871 2870
2872 2871 esp_in_udp_sa = ((ira->ira_ipsec_esp_sa->ipsa_flags &
2873 2872 IPSA_F_NATT) != 0);
2874 2873 esp_in_udp_packet =
2875 2874 (ira->ira_flags & IRAF_ESP_UDP_PORTS) != 0;
2876 2875
2877 2876 /*
2878 2877 * The following is a fancy, but quick, way of saying:
2879 2878 * ESP-in-UDP SA and Raw ESP packet --> drop
2880 2879 * OR
2881 2880 * ESP SA and ESP-in-UDP packet --> drop
2882 2881 */
2883 2882 if (esp_in_udp_sa != esp_in_udp_packet) {
2884 2883 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2885 2884 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2886 2885 DROPPER(ipss, ipds_esp_no_sa),
2887 2886 &ipss->ipsec_dropper);
2888 2887 return;
2889 2888 }
2890 2889 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph,
2891 2890 ira);
2892 2891 } else {
2893 2892 ah_t *ah;
2894 2893
2895 2894 mp = ipsec_inbound_ah_sa(mp, ira, &ah);
2896 2895 if (mp == NULL)
2897 2896 return;
2898 2897
2899 2898 ASSERT(ah != NULL);
2900 2899 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2901 2900 ASSERT(ira->ira_ipsec_ah_sa != NULL);
2902 2901 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
2903 2902 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah,
2904 2903 ira);
2905 2904 }
2906 2905
2907 2906 if (mp == NULL) {
2908 2907 /*
2909 2908 * Either it failed or is pending. In the former case
2910 2909 * ipIfStatsInDiscards was increased.
2911 2910 */
2912 2911 return;
2913 2912 }
2914 2913 /* we're done with IPsec processing, send it up */
2915 2914 ip_input_post_ipsec(mp, ira);
2916 2915 return;
2917 2916 }
2918 2917 case IPPROTO_ENCAP: {
2919 2918 ipha_t *inner_ipha;
2920 2919
2921 2920 /*
2922 2921 * Handle self-encapsulated packets (IP-in-IP where
2923 2922 * the inner addresses == the outer addresses).
2924 2923 */
2925 2924 if ((uchar_t *)ipha + ip_hdr_length + sizeof (ipha_t) >
2926 2925 mp->b_wptr) {
2927 2926 if (ira->ira_pktlen <
2928 2927 ip_hdr_length + sizeof (ipha_t)) {
2929 2928 BUMP_MIB(ill->ill_ip_mib,
2930 2929 ipIfStatsInTruncatedPkts);
2931 2930 ip_drop_input("ipIfStatsInTruncatedPkts",
2932 2931 mp, ill);
2933 2932 freemsg(mp);
2934 2933 return;
2935 2934 }
2936 2935 ipha = ip_pullup(mp, (uchar_t *)ipha + ip_hdr_length +
2937 2936 sizeof (ipha_t) - mp->b_rptr, ira);
2938 2937 if (ipha == NULL) {
2939 2938 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2940 2939 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2941 2940 freemsg(mp);
2942 2941 return;
2943 2942 }
2944 2943 }
2945 2944 inner_ipha = (ipha_t *)((uchar_t *)ipha + ip_hdr_length);
2946 2945 /*
2947 2946 * Check the sanity of the inner IP header.
2948 2947 */
2949 2948 if ((IPH_HDR_VERSION(inner_ipha) != IPV4_VERSION)) {
2950 2949 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2951 2950 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2952 2951 freemsg(mp);
2953 2952 return;
2954 2953 }
2955 2954 if (IPH_HDR_LENGTH(inner_ipha) < sizeof (ipha_t)) {
2956 2955 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2957 2956 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2958 2957 freemsg(mp);
2959 2958 return;
2960 2959 }
2961 2960 if (inner_ipha->ipha_src != ipha->ipha_src ||
2962 2961 inner_ipha->ipha_dst != ipha->ipha_dst) {
2963 2962 /* We fallthru to iptun fanout below */
2964 2963 goto iptun;
2965 2964 }
2966 2965
2967 2966 /*
2968 2967 * Self-encapsulated tunnel packet. Remove
2969 2968 * the outer IP header and fanout again.
2970 2969 * We also need to make sure that the inner
2971 2970 * header is pulled up until options.
2972 2971 */
2973 2972 mp->b_rptr = (uchar_t *)inner_ipha;
2974 2973 ipha = inner_ipha;
2975 2974 ip_hdr_length = IPH_HDR_LENGTH(ipha);
2976 2975 if ((uchar_t *)ipha + ip_hdr_length > mp->b_wptr) {
2977 2976 if (ira->ira_pktlen <
2978 2977 (uchar_t *)ipha + ip_hdr_length - mp->b_rptr) {
2979 2978 BUMP_MIB(ill->ill_ip_mib,
2980 2979 ipIfStatsInTruncatedPkts);
2981 2980 ip_drop_input("ipIfStatsInTruncatedPkts",
2982 2981 mp, ill);
2983 2982 freemsg(mp);
2984 2983 return;
2985 2984 }
2986 2985 ipha = ip_pullup(mp,
2987 2986 (uchar_t *)ipha + ip_hdr_length - mp->b_rptr, ira);
2988 2987 if (ipha == NULL) {
2989 2988 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2990 2989 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2991 2990 freemsg(mp);
2992 2991 return;
2993 2992 }
2994 2993 }
2995 2994 if (ip_hdr_length > sizeof (ipha_t)) {
2996 2995 /* We got options on the inner packet. */
2997 2996 ipaddr_t dst = ipha->ipha_dst;
2998 2997 int error = 0;
2999 2998
3000 2999 dst = ip_input_options(ipha, dst, mp, ira, &error);
3001 3000 if (error != 0) {
3002 3001 /*
3003 3002 * An ICMP error has been sent and the packet
3004 3003 * has been dropped.
3005 3004 */
3006 3005 return;
3007 3006 }
3008 3007 if (dst != ipha->ipha_dst) {
3009 3008 /*
3010 3009 * Someone put a source-route in
3011 3010 * the inside header of a self-
3012 3011 * encapsulated packet. Drop it
3013 3012 * with extreme prejudice and let
3014 3013 * the sender know.
3015 3014 */
3016 3015 ip_drop_input("ICMP_SOURCE_ROUTE_FAILED",
3017 3016 mp, ill);
3018 3017 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED,
3019 3018 ira);
3020 3019 return;
3021 3020 }
3022 3021 }
3023 3022 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
3024 3023 /*
3025 3024 * This means that somebody is sending
3026 3025 * Self-encapsualted packets without AH/ESP.
3027 3026 *
3028 3027 * Send this packet to find a tunnel endpoint.
3029 3028 * if I can't find one, an ICMP
3030 3029 * PROTOCOL_UNREACHABLE will get sent.
3031 3030 */
3032 3031 protocol = ipha->ipha_protocol;
3033 3032 ira->ira_protocol = protocol;
3034 3033 goto iptun;
3035 3034 }
3036 3035
3037 3036 /* Update based on removed IP header */
3038 3037 ira->ira_ip_hdr_length = ip_hdr_length;
3039 3038 ira->ira_pktlen = ntohs(ipha->ipha_length);
3040 3039
3041 3040 if (ira->ira_flags & IRAF_IPSEC_DECAPS) {
3042 3041 /*
3043 3042 * This packet is self-encapsulated multiple
3044 3043 * times. We don't want to recurse infinitely.
3045 3044 * To keep it simple, drop the packet.
3046 3045 */
3047 3046 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3048 3047 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3049 3048 freemsg(mp);
3050 3049 return;
3051 3050 }
3052 3051 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3053 3052 ira->ira_flags |= IRAF_IPSEC_DECAPS;
3054 3053
3055 3054 ip_input_post_ipsec(mp, ira);
3056 3055 return;
3057 3056 }
3058 3057
3059 3058 iptun: /* IPPROTO_ENCAPS that is not self-encapsulated */
3060 3059 case IPPROTO_IPV6:
3061 3060 /* iptun will verify trusted label */
3062 3061 connp = ipcl_classify_v4(mp, protocol, ip_hdr_length,
3063 3062 ira, ipst);
3064 3063 if (connp != NULL) {
3065 3064 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
3066 3065 ira->ira_ill = ira->ira_rill = NULL;
3067 3066 (connp->conn_recv)(connp, mp, NULL, ira);
3068 3067 CONN_DEC_REF(connp);
3069 3068 ira->ira_ill = ill;
3070 3069 ira->ira_rill = rill;
3071 3070 return;
3072 3071 }
3073 3072 /* FALLTHRU */
3074 3073 default:
3075 3074 /*
3076 3075 * On a labeled system, we have to check whether the zone
3077 3076 * itself is permitted to receive raw traffic.
3078 3077 */
3079 3078 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
3080 3079 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
3081 3080 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3082 3081 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3083 3082 freemsg(mp);
3084 3083 return;
3085 3084 }
3086 3085 }
3087 3086 break;
3088 3087 }
3089 3088
3090 3089 /*
3091 3090 * The above input functions may have returned the pulled up message.
3092 3091 * So ipha need to be reinitialized.
3093 3092 */
3094 3093 ipha = (ipha_t *)mp->b_rptr;
3095 3094 ira->ira_protocol = protocol = ipha->ipha_protocol;
3096 3095 if (ipst->ips_ipcl_proto_fanout_v4[protocol].connf_head == NULL) {
3097 3096 /*
3098 3097 * No user-level listener for these packets packets.
3099 3098 * Check for IPPROTO_ENCAP...
3100 3099 */
3101 3100 if (protocol == IPPROTO_ENCAP && ipst->ips_ip_g_mrouter) {
3102 3101 /*
3103 3102 * Check policy here,
3104 3103 * THEN ship off to ip_mroute_decap().
3105 3104 *
3106 3105 * BTW, If I match a configured IP-in-IP
3107 3106 * tunnel above, this path will not be reached, and
3108 3107 * ip_mroute_decap will never be called.
3109 3108 */
3110 3109 mp = ipsec_check_global_policy(mp, connp,
3111 3110 ipha, NULL, ira, ns);
3112 3111 if (mp != NULL) {
3113 3112 ip_mroute_decap(mp, ira);
3114 3113 } /* Else we already freed everything! */
3115 3114 } else {
3116 3115 ip_proto_not_sup(mp, ira);
3117 3116 }
3118 3117 return;
3119 3118 }
3120 3119
3121 3120 /*
3122 3121 * Handle fanout to raw sockets. There
3123 3122 * can be more than one stream bound to a particular
3124 3123 * protocol. When this is the case, each one gets a copy
3125 3124 * of any incoming packets.
3126 3125 */
3127 3126 ASSERT(ira->ira_protocol == ipha->ipha_protocol);
3128 3127 ip_fanout_proto_v4(mp, ipha, ira);
3129 3128 return;
3130 3129
3131 3130 discard:
3132 3131 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3133 3132 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3134 3133 freemsg(mp);
3135 3134 #undef rptr
3136 3135 }
↓ open down ↓ |
2972 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX