1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 * 25 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 26 * Copyright 2018 Joyent, Inc. 27 */ 28 /* Copyright (c) 1990 Mentat Inc. */ 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/dlpi.h> 33 #include <sys/stropts.h> 34 #include <sys/sysmacros.h> 35 #include <sys/strsubr.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #include <sys/zone.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/xti_inet.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/kobj.h> 47 #include <sys/modctl.h> 48 #include <sys/atomic.h> 49 #include <sys/policy.h> 50 #include <sys/priv.h> 51 52 #include <sys/systm.h> 53 #include <sys/param.h> 54 #include <sys/kmem.h> 55 #include <sys/sdt.h> 56 #include <sys/socket.h> 57 #include <sys/vtrace.h> 58 #include <sys/isa_defs.h> 59 #include <sys/mac.h> 60 #include <net/if.h> 61 #include <net/if_arp.h> 62 #include <net/route.h> 63 #include <sys/sockio.h> 64 #include <netinet/in.h> 65 #include <net/if_dl.h> 66 67 #include <inet/common.h> 68 #include <inet/mi.h> 69 #include <inet/mib2.h> 70 #include <inet/nd.h> 71 #include <inet/arp.h> 72 #include <inet/snmpcom.h> 73 #include <inet/kstatcom.h> 74 75 #include <netinet/igmp_var.h> 76 #include <netinet/ip6.h> 77 #include <netinet/icmp6.h> 78 #include <netinet/sctp.h> 79 80 #include <inet/ip.h> 81 #include <inet/ip_impl.h> 82 #include <inet/ip6.h> 83 #include <inet/ip6_asp.h> 84 #include <inet/optcom.h> 85 #include <inet/tcp.h> 86 #include <inet/tcp_impl.h> 87 #include <inet/ip_multi.h> 88 #include <inet/ip_if.h> 89 #include <inet/ip_ire.h> 90 #include <inet/ip_ftable.h> 91 #include <inet/ip_rts.h> 92 #include <inet/ip_ndp.h> 93 #include <inet/ip_listutils.h> 94 #include <netinet/igmp.h> 95 #include <netinet/ip_mroute.h> 96 #include <inet/ipp_common.h> 97 98 #include <net/pfkeyv2.h> 99 #include <inet/sadb.h> 100 #include <inet/ipsec_impl.h> 101 #include <inet/ipdrop.h> 102 #include <inet/ip_netinfo.h> 103 #include <inet/ilb_ip.h> 104 #include <sys/squeue_impl.h> 105 #include <sys/squeue.h> 106 107 #include <sys/ethernet.h> 108 #include <net/if_types.h> 109 #include <sys/cpuvar.h> 110 111 #include <ipp/ipp.h> 112 #include <ipp/ipp_impl.h> 113 #include <ipp/ipgpc/ipgpc.h> 114 115 #include <sys/pattr.h> 116 #include <inet/ipclassifier.h> 117 #include <inet/sctp_ip.h> 118 #include <inet/sctp/sctp_impl.h> 119 #include <inet/udp_impl.h> 120 #include <sys/sunddi.h> 121 122 #include <sys/tsol/label.h> 123 #include <sys/tsol/tnet.h> 124 125 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */ 126 127 #ifdef DEBUG 128 extern boolean_t skip_sctp_cksum; 129 #endif 130 131 static void ip_input_local_v4(ire_t *, mblk_t *, ipha_t *, 132 ip_recv_attr_t *); 133 134 static void ip_input_broadcast_v4(ire_t *, mblk_t *, ipha_t *, 135 ip_recv_attr_t *); 136 static void ip_input_multicast_v4(ire_t *, mblk_t *, ipha_t *, 137 ip_recv_attr_t *); 138 139 #pragma inline(ip_input_common_v4, ip_input_local_v4, ip_forward_xmit_v4) 140 141 /* 142 * Direct read side procedure capable of dealing with chains. GLDv3 based 143 * drivers call this function directly with mblk chains while STREAMS 144 * read side procedure ip_rput() calls this for single packet with ip_ring 145 * set to NULL to process one packet at a time. 146 * 147 * The ill will always be valid if this function is called directly from 148 * the driver. 149 * 150 * If this chain is part of a VLAN stream, then the VLAN tag is 151 * stripped from the MAC header before being delivered to this 152 * function. 153 * 154 * If the IP header in packet is not 32-bit aligned, every message in the 155 * chain will be aligned before further operations. This is required on SPARC 156 * platform. 157 */ 158 void 159 ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 160 struct mac_header_info_s *mhip) 161 { 162 (void) ip_input_common_v4(ill, ip_ring, mp_chain, mhip, NULL, NULL, 163 NULL); 164 } 165 166 /* 167 * ip_accept_tcp() - This function is called by the squeue when it retrieves 168 * a chain of packets in the poll mode. The packets have gone through the 169 * data link processing but not IP processing. For performance and latency 170 * reasons, the squeue wants to process the chain in line instead of feeding 171 * it back via ip_input path. 172 * 173 * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v4 174 * will pass back any TCP packets matching the target sqp to 175 * ip_input_common_v4 using ira_target_sqp_mp. Other packets are handled by 176 * ip_input_v4 and ip_fanout_v4 as normal. 177 * The TCP packets that match the target squeue are returned to the caller 178 * as a b_next chain after each packet has been prepend with an mblk 179 * from ip_recv_attr_to_mblk. 180 */ 181 mblk_t * 182 ip_accept_tcp(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp, 183 mblk_t *mp_chain, mblk_t **last, uint_t *cnt) 184 { 185 return (ip_input_common_v4(ill, ip_ring, mp_chain, NULL, target_sqp, 186 last, cnt)); 187 } 188 189 /* 190 * Used by ip_input and ip_accept_tcp 191 * The last three arguments are only used by ip_accept_tcp, and mhip is 192 * only used by ip_input. 193 */ 194 mblk_t * 195 ip_input_common_v4(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 196 struct mac_header_info_s *mhip, squeue_t *target_sqp, 197 mblk_t **last, uint_t *cnt) 198 { 199 mblk_t *mp; 200 ipha_t *ipha; 201 ip_recv_attr_t iras; /* Receive attributes */ 202 rtc_t rtc; 203 iaflags_t chain_flags = 0; /* Fixed for chain */ 204 mblk_t *ahead = NULL; /* Accepted head */ 205 mblk_t *atail = NULL; /* Accepted tail */ 206 uint_t acnt = 0; /* Accepted count */ 207 208 ASSERT(mp_chain != NULL); 209 ASSERT(ill != NULL); 210 211 /* These ones do not change as we loop over packets */ 212 iras.ira_ill = iras.ira_rill = ill; 213 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 214 iras.ira_rifindex = iras.ira_ruifindex; 215 iras.ira_sqp = NULL; 216 iras.ira_ring = ip_ring; 217 /* For ECMP and outbound transmit ring selection */ 218 iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring); 219 220 iras.ira_target_sqp = target_sqp; 221 iras.ira_target_sqp_mp = NULL; 222 if (target_sqp != NULL) 223 chain_flags |= IRAF_TARGET_SQP; 224 225 /* 226 * We try to have a mhip pointer when possible, but 227 * it might be NULL in some cases. In those cases we 228 * have to assume unicast. 229 */ 230 iras.ira_mhip = mhip; 231 iras.ira_flags = 0; 232 if (mhip != NULL) { 233 switch (mhip->mhi_dsttype) { 234 case MAC_ADDRTYPE_MULTICAST : 235 chain_flags |= IRAF_L2DST_MULTICAST; 236 break; 237 case MAC_ADDRTYPE_BROADCAST : 238 chain_flags |= IRAF_L2DST_BROADCAST; 239 break; 240 } 241 } 242 243 /* 244 * Initialize the one-element route cache. 245 * 246 * We do ire caching from one iteration to 247 * another. In the event the packet chain contains 248 * all packets from the same dst, this caching saves 249 * an ire_route_recursive for each of the succeeding 250 * packets in a packet chain. 251 */ 252 rtc.rtc_ire = NULL; 253 rtc.rtc_ipaddr = INADDR_ANY; 254 255 /* Loop over b_next */ 256 for (mp = mp_chain; mp != NULL; mp = mp_chain) { 257 mp_chain = mp->b_next; 258 mp->b_next = NULL; 259 260 ASSERT(DB_TYPE(mp) == M_DATA); 261 262 263 /* 264 * if db_ref > 1 then copymsg and free original. Packet 265 * may be changed and we do not want the other entity 266 * who has a reference to this message to trip over the 267 * changes. This is a blind change because trying to 268 * catch all places that might change the packet is too 269 * difficult. 270 * 271 * This corresponds to the fast path case, where we have 272 * a chain of M_DATA mblks. We check the db_ref count 273 * of only the 1st data block in the mblk chain. There 274 * doesn't seem to be a reason why a device driver would 275 * send up data with varying db_ref counts in the mblk 276 * chain. In any case the Fast path is a private 277 * interface, and our drivers don't do such a thing. 278 * Given the above assumption, there is no need to walk 279 * down the entire mblk chain (which could have a 280 * potential performance problem) 281 * 282 * The "(DB_REF(mp) > 1)" check was moved from ip_rput() 283 * to here because of exclusive ip stacks and vnics. 284 * Packets transmitted from exclusive stack over vnic 285 * can have db_ref > 1 and when it gets looped back to 286 * another vnic in a different zone, you have ip_input() 287 * getting dblks with db_ref > 1. So if someone 288 * complains of TCP performance under this scenario, 289 * take a serious look here on the impact of copymsg(). 290 */ 291 if (DB_REF(mp) > 1) { 292 if ((mp = ip_fix_dbref(mp, &iras)) == NULL) { 293 /* mhip might point into 1st packet in chain */ 294 iras.ira_mhip = NULL; 295 continue; 296 } 297 } 298 299 /* 300 * IP header ptr not aligned? 301 * OR IP header not complete in first mblk 302 */ 303 ipha = (ipha_t *)mp->b_rptr; 304 if (!OK_32PTR(ipha) || MBLKL(mp) < IP_SIMPLE_HDR_LENGTH) { 305 mp = ip_check_and_align_header(mp, IP_SIMPLE_HDR_LENGTH, 306 &iras); 307 if (mp == NULL) { 308 /* mhip might point into 1st packet in chain */ 309 iras.ira_mhip = NULL; 310 continue; 311 } 312 ipha = (ipha_t *)mp->b_rptr; 313 } 314 315 /* Protect against a mix of Ethertypes and IP versions */ 316 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 317 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 318 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 319 freemsg(mp); 320 /* mhip might point into 1st packet in the chain. */ 321 iras.ira_mhip = NULL; 322 continue; 323 } 324 325 /* 326 * Check for Martian addrs; we have to explicitly 327 * test for for zero dst since this is also used as 328 * an indication that the rtc is not used. 329 */ 330 if (ipha->ipha_dst == INADDR_ANY) { 331 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 332 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 333 freemsg(mp); 334 /* mhip might point into 1st packet in the chain. */ 335 iras.ira_mhip = NULL; 336 continue; 337 } 338 339 /* 340 * Keep L2SRC from a previous packet in chain since mhip 341 * might point into an earlier packet in the chain. 342 * Keep IRAF_VERIFIED_SRC to avoid redoing broadcast 343 * source check in forwarding path. 344 */ 345 chain_flags |= (iras.ira_flags & 346 (IRAF_L2SRC_SET|IRAF_VERIFIED_SRC)); 347 348 iras.ira_flags = IRAF_IS_IPV4 | IRAF_VERIFY_IP_CKSUM | 349 IRAF_VERIFY_ULP_CKSUM | chain_flags; 350 iras.ira_free_flags = 0; 351 iras.ira_cred = NULL; 352 iras.ira_cpid = NOPID; 353 iras.ira_tsl = NULL; 354 iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */ 355 356 /* 357 * We must count all incoming packets, even if they end 358 * up being dropped later on. Defer counting bytes until 359 * we have the whole IP header in first mblk. 360 */ 361 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 362 363 iras.ira_pktlen = ntohs(ipha->ipha_length); 364 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 365 iras.ira_pktlen); 366 367 /* 368 * Call one of: 369 * ill_input_full_v4 370 * ill_input_short_v4 371 * The former is used in unusual cases. See ill_set_inputfn(). 372 */ 373 (*ill->ill_inputfn)(mp, ipha, &ipha->ipha_dst, &iras, &rtc); 374 375 /* Any references to clean up? No hold on ira_ill */ 376 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED)) 377 ira_cleanup(&iras, B_FALSE); 378 379 if (iras.ira_target_sqp_mp != NULL) { 380 /* Better be called from ip_accept_tcp */ 381 ASSERT(target_sqp != NULL); 382 383 /* Found one packet to accept */ 384 mp = iras.ira_target_sqp_mp; 385 iras.ira_target_sqp_mp = NULL; 386 ASSERT(ip_recv_attr_is_mblk(mp)); 387 388 if (atail != NULL) 389 atail->b_next = mp; 390 else 391 ahead = mp; 392 atail = mp; 393 acnt++; 394 mp = NULL; 395 } 396 /* mhip might point into 1st packet in the chain. */ 397 iras.ira_mhip = NULL; 398 } 399 /* Any remaining references to the route cache? */ 400 if (rtc.rtc_ire != NULL) { 401 ASSERT(rtc.rtc_ipaddr != INADDR_ANY); 402 ire_refrele(rtc.rtc_ire); 403 } 404 405 if (ahead != NULL) { 406 /* Better be called from ip_accept_tcp */ 407 ASSERT(target_sqp != NULL); 408 *last = atail; 409 *cnt = acnt; 410 return (ahead); 411 } 412 413 return (NULL); 414 } 415 416 /* 417 * This input function is used when 418 * - is_system_labeled() 419 * - CGTP filtering 420 * - DHCP unicast before we have an IP address configured 421 * - there is an listener for IPPROTO_RSVP 422 */ 423 void 424 ill_input_full_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg, 425 ip_recv_attr_t *ira, rtc_t *rtc) 426 { 427 ipha_t *ipha = (ipha_t *)iph_arg; 428 ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg; 429 ill_t *ill = ira->ira_ill; 430 ip_stack_t *ipst = ill->ill_ipst; 431 int cgtp_flt_pkt; 432 433 ASSERT(ira->ira_tsl == NULL); 434 435 /* 436 * Attach any necessary label information to 437 * this packet 438 */ 439 if (is_system_labeled()) { 440 ira->ira_flags |= IRAF_SYSTEM_LABELED; 441 442 /* 443 * This updates ira_cred, ira_tsl and ira_free_flags based 444 * on the label. 445 */ 446 if (!tsol_get_pkt_label(mp, IPV4_VERSION, ira)) { 447 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 448 ip_drop_input("ipIfStatsInDiscards", mp, ill); 449 freemsg(mp); 450 return; 451 } 452 /* Note that ira_tsl can be NULL here. */ 453 454 /* tsol_get_pkt_label sometimes does pullupmsg */ 455 ipha = (ipha_t *)mp->b_rptr; 456 } 457 458 /* 459 * Invoke the CGTP (multirouting) filtering module to process 460 * the incoming packet. Packets identified as duplicates 461 * must be discarded. Filtering is active only if the 462 * the ip_cgtp_filter ndd variable is non-zero. 463 */ 464 cgtp_flt_pkt = CGTP_IP_PKT_NOT_CGTP; 465 if (ipst->ips_ip_cgtp_filter && 466 ipst->ips_ip_cgtp_filter_ops != NULL) { 467 netstackid_t stackid; 468 469 stackid = ipst->ips_netstack->netstack_stackid; 470 /* 471 * CGTP and IPMP are mutually exclusive so 472 * phyint_ifindex is fine here. 473 */ 474 cgtp_flt_pkt = 475 ipst->ips_ip_cgtp_filter_ops->cfo_filter(stackid, 476 ill->ill_phyint->phyint_ifindex, mp); 477 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 478 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill); 479 freemsg(mp); 480 return; 481 } 482 } 483 484 /* 485 * Brutal hack for DHCPv4 unicast: RFC2131 allows a DHCP 486 * server to unicast DHCP packets to a DHCP client using the 487 * IP address it is offering to the client. This can be 488 * disabled through the "broadcast bit", but not all DHCP 489 * servers honor that bit. Therefore, to interoperate with as 490 * many DHCP servers as possible, the DHCP client allows the 491 * server to unicast, but we treat those packets as broadcast 492 * here. Note that we don't rewrite the packet itself since 493 * (a) that would mess up the checksums and (b) the DHCP 494 * client conn is bound to INADDR_ANY so ip_fanout_udp() will 495 * hand it the packet regardless. 496 */ 497 if (ill->ill_dhcpinit != 0 && 498 ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION && 499 ipha->ipha_protocol == IPPROTO_UDP) { 500 udpha_t *udpha; 501 502 ipha = ip_pullup(mp, sizeof (ipha_t) + sizeof (udpha_t), ira); 503 if (ipha == NULL) { 504 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 505 ip_drop_input("ipIfStatsInDiscards - dhcp", mp, ill); 506 freemsg(mp); 507 return; 508 } 509 /* Reload since pullupmsg() can change b_rptr. */ 510 udpha = (udpha_t *)&ipha[1]; 511 512 if (ntohs(udpha->uha_dst_port) == IPPORT_BOOTPC) { 513 DTRACE_PROBE2(ip4__dhcpinit__pkt, ill_t *, ill, 514 mblk_t *, mp); 515 /* 516 * This assumes that we deliver to all conns for 517 * multicast and broadcast packets. 518 */ 519 nexthop = INADDR_BROADCAST; 520 ira->ira_flags |= IRAF_DHCP_UNICAST; 521 } 522 } 523 524 /* 525 * If rsvpd is running, let RSVP daemon handle its processing 526 * and forwarding of RSVP multicast/unicast packets. 527 * If rsvpd is not running but mrouted is running, RSVP 528 * multicast packets are forwarded as multicast traffic 529 * and RSVP unicast packets are forwarded by unicast router. 530 * If neither rsvpd nor mrouted is running, RSVP multicast 531 * packets are not forwarded, but the unicast packets are 532 * forwarded like unicast traffic. 533 */ 534 if (ipha->ipha_protocol == IPPROTO_RSVP && 535 ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) { 536 /* RSVP packet and rsvpd running. Treat as ours */ 537 ip2dbg(("ip_input: RSVP for us: 0x%x\n", ntohl(nexthop))); 538 /* 539 * We use a multicast address to get the packet to 540 * ire_recv_multicast_v4. There will not be a membership 541 * check since we set IRAF_RSVP 542 */ 543 nexthop = htonl(INADDR_UNSPEC_GROUP); 544 ira->ira_flags |= IRAF_RSVP; 545 } 546 547 ill_input_short_v4(mp, ipha, &nexthop, ira, rtc); 548 } 549 550 /* 551 * This is the tail-end of the full receive side packet handling. 552 * It can be used directly when the configuration is simple. 553 */ 554 void 555 ill_input_short_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg, 556 ip_recv_attr_t *ira, rtc_t *rtc) 557 { 558 ire_t *ire; 559 uint_t opt_len; 560 ill_t *ill = ira->ira_ill; 561 ip_stack_t *ipst = ill->ill_ipst; 562 uint_t pkt_len; 563 ssize_t len; 564 ipha_t *ipha = (ipha_t *)iph_arg; 565 ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg; 566 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; 567 uint_t irr_flags; 568 #define rptr ((uchar_t *)ipha) 569 570 ASSERT(DB_TYPE(mp) == M_DATA); 571 572 /* 573 * The following test for loopback is faster than 574 * IP_LOOPBACK_ADDR(), because it avoids any bitwise 575 * operations. 576 * Note that these addresses are always in network byte order 577 */ 578 if (((*(uchar_t *)&ipha->ipha_dst) == IN_LOOPBACKNET) || 579 ((*(uchar_t *)&ipha->ipha_src) == IN_LOOPBACKNET)) { 580 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 581 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 582 freemsg(mp); 583 return; 584 } 585 586 len = mp->b_wptr - rptr; 587 pkt_len = ira->ira_pktlen; 588 589 /* multiple mblk or too short */ 590 len -= pkt_len; 591 if (len != 0) { 592 mp = ip_check_length(mp, rptr, len, pkt_len, 593 IP_SIMPLE_HDR_LENGTH, ira); 594 if (mp == NULL) 595 return; 596 ipha = (ipha_t *)mp->b_rptr; 597 } 598 599 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 600 ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL, 601 int, 0); 602 603 /* 604 * The event for packets being received from a 'physical' 605 * interface is placed after validation of the source and/or 606 * destination address as being local so that packets can be 607 * redirected to loopback addresses using ipnat. 608 */ 609 DTRACE_PROBE4(ip4__physical__in__start, 610 ill_t *, ill, ill_t *, NULL, 611 ipha_t *, ipha, mblk_t *, mp); 612 613 if (HOOKS4_INTERESTED_PHYSICAL_IN(ipst)) { 614 int ll_multicast = 0; 615 int error; 616 ipaddr_t orig_dst = ipha->ipha_dst; 617 618 if (ira->ira_flags & IRAF_L2DST_MULTICAST) 619 ll_multicast = HPE_MULTICAST; 620 else if (ira->ira_flags & IRAF_L2DST_BROADCAST) 621 ll_multicast = HPE_BROADCAST; 622 623 FW_HOOKS(ipst->ips_ip4_physical_in_event, 624 ipst->ips_ipv4firewall_physical_in, 625 ill, NULL, ipha, mp, mp, ll_multicast, ipst, error); 626 627 DTRACE_PROBE1(ip4__physical__in__end, mblk_t *, mp); 628 629 if (mp == NULL) 630 return; 631 /* The length could have changed */ 632 ipha = (ipha_t *)mp->b_rptr; 633 ira->ira_pktlen = ntohs(ipha->ipha_length); 634 pkt_len = ira->ira_pktlen; 635 636 /* 637 * In case the destination changed we override any previous 638 * change to nexthop. 639 */ 640 if (orig_dst != ipha->ipha_dst) 641 nexthop = ipha->ipha_dst; 642 if (nexthop == INADDR_ANY) { 643 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 644 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 645 freemsg(mp); 646 return; 647 } 648 } 649 650 if (ipst->ips_ip4_observe.he_interested) { 651 zoneid_t dzone; 652 653 /* 654 * On the inbound path the src zone will be unknown as 655 * this packet has come from the wire. 656 */ 657 dzone = ip_get_zoneid_v4(nexthop, mp, ira, ALL_ZONES); 658 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst); 659 } 660 661 /* 662 * If there is a good HW IP header checksum we clear the need 663 * look at the IP header checksum. 664 */ 665 if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) && 666 ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 667 /* Header checksum was ok. Clear the flag */ 668 DB_CKSUMFLAGS(mp) &= ~HCK_IPV4_HDRCKSUM; 669 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 670 } 671 672 /* 673 * Here we check to see if we machine is setup as 674 * L3 loadbalancer and if the incoming packet is for a VIP 675 * 676 * Check the following: 677 * - there is at least a rule 678 * - protocol of the packet is supported 679 */ 680 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ipha->ipha_protocol)) { 681 ipaddr_t lb_dst; 682 int lb_ret; 683 684 /* For convenience, we pull up the mblk. */ 685 if (mp->b_cont != NULL) { 686 if (pullupmsg(mp, -1) == 0) { 687 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 688 ip_drop_input("ipIfStatsInDiscards - pullupmsg", 689 mp, ill); 690 freemsg(mp); 691 return; 692 } 693 ipha = (ipha_t *)mp->b_rptr; 694 } 695 696 /* 697 * We just drop all fragments going to any VIP, at 698 * least for now.... 699 */ 700 if (ntohs(ipha->ipha_fragment_offset_and_flags) & 701 (IPH_MF | IPH_OFFSET)) { 702 if (!ilb_rule_match_vip_v4(ilbs, nexthop, NULL)) { 703 goto after_ilb; 704 } 705 706 ILB_KSTAT_UPDATE(ilbs, ip_frag_in, 1); 707 ILB_KSTAT_UPDATE(ilbs, ip_frag_dropped, 1); 708 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 709 ip_drop_input("ILB fragment", mp, ill); 710 freemsg(mp); 711 return; 712 } 713 lb_ret = ilb_check_v4(ilbs, ill, mp, ipha, ipha->ipha_protocol, 714 (uint8_t *)ipha + IPH_HDR_LENGTH(ipha), &lb_dst); 715 716 if (lb_ret == ILB_DROPPED) { 717 /* Is this the right counter to increase? */ 718 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 719 ip_drop_input("ILB_DROPPED", mp, ill); 720 freemsg(mp); 721 return; 722 } 723 if (lb_ret == ILB_BALANCED) { 724 /* Set the dst to that of the chosen server */ 725 nexthop = lb_dst; 726 DB_CKSUMFLAGS(mp) = 0; 727 } 728 } 729 730 after_ilb: 731 opt_len = ipha->ipha_version_and_hdr_length - IP_SIMPLE_HDR_VERSION; 732 ira->ira_ip_hdr_length = IP_SIMPLE_HDR_LENGTH; 733 if (opt_len != 0) { 734 int error = 0; 735 736 ira->ira_ip_hdr_length += (opt_len << 2); 737 ira->ira_flags |= IRAF_IPV4_OPTIONS; 738 739 /* IP Options present! Validate the length. */ 740 mp = ip_check_optlen(mp, ipha, opt_len, pkt_len, ira); 741 if (mp == NULL) 742 return; 743 744 /* Might have changed */ 745 ipha = (ipha_t *)mp->b_rptr; 746 747 /* Verify IP header checksum before parsing the options */ 748 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && 749 ip_csum_hdr(ipha)) { 750 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 751 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 752 freemsg(mp); 753 return; 754 } 755 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 756 757 /* 758 * Go off to ip_input_options which returns the next hop 759 * destination address, which may have been affected 760 * by source routing. 761 */ 762 IP_STAT(ipst, ip_opt); 763 764 nexthop = ip_input_options(ipha, nexthop, mp, ira, &error); 765 if (error != 0) { 766 /* 767 * An ICMP error has been sent and the packet has 768 * been dropped. 769 */ 770 return; 771 } 772 } 773 774 if (ill->ill_flags & ILLF_ROUTER) 775 irr_flags = IRR_ALLOCATE; 776 else 777 irr_flags = IRR_NONE; 778 779 /* Can not use route cache with TX since the labels can differ */ 780 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 781 if (CLASSD(nexthop)) { 782 ire = ire_multicast(ill); 783 } else { 784 /* Match destination and label */ 785 ire = ire_route_recursive_v4(nexthop, 0, NULL, 786 ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR, 787 irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, 788 NULL); 789 } 790 /* Update the route cache so we do the ire_refrele */ 791 ASSERT(ire != NULL); 792 if (rtc->rtc_ire != NULL) 793 ire_refrele(rtc->rtc_ire); 794 rtc->rtc_ire = ire; 795 rtc->rtc_ipaddr = nexthop; 796 } else if (nexthop == rtc->rtc_ipaddr && rtc->rtc_ire != NULL) { 797 /* Use the route cache */ 798 ire = rtc->rtc_ire; 799 } else { 800 /* Update the route cache */ 801 if (CLASSD(nexthop)) { 802 ire = ire_multicast(ill); 803 } else { 804 /* Just match the destination */ 805 ire = ire_route_recursive_dstonly_v4(nexthop, irr_flags, 806 ira->ira_xmit_hint, ipst); 807 } 808 ASSERT(ire != NULL); 809 if (rtc->rtc_ire != NULL) 810 ire_refrele(rtc->rtc_ire); 811 rtc->rtc_ire = ire; 812 rtc->rtc_ipaddr = nexthop; 813 } 814 815 ire->ire_ib_pkt_count++; 816 817 /* 818 * Based on ire_type and ire_flags call one of: 819 * ire_recv_local_v4 - for IRE_LOCAL 820 * ire_recv_loopback_v4 - for IRE_LOOPBACK 821 * ire_recv_multirt_v4 - if RTF_MULTIRT 822 * ire_recv_noroute_v4 - if RTF_REJECT or RTF_BLACHOLE 823 * ire_recv_multicast_v4 - for IRE_MULTICAST 824 * ire_recv_broadcast_v4 - for IRE_BROADCAST 825 * ire_recv_noaccept_v4 - for ire_noaccept ones 826 * ire_recv_forward_v4 - for the rest. 827 */ 828 (*ire->ire_recvfn)(ire, mp, ipha, ira); 829 } 830 #undef rptr 831 832 /* 833 * ire_recvfn for IREs that need forwarding 834 */ 835 void 836 ire_recv_forward_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 837 { 838 ipha_t *ipha = (ipha_t *)iph_arg; 839 ill_t *ill = ira->ira_ill; 840 ip_stack_t *ipst = ill->ill_ipst; 841 ill_t *dst_ill; 842 nce_t *nce; 843 ipaddr_t src = ipha->ipha_src; 844 uint32_t added_tx_len; 845 uint32_t mtu, iremtu; 846 847 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 848 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 849 ip_drop_input("l2 multicast not forwarded", mp, ill); 850 freemsg(mp); 851 return; 852 } 853 854 if (!(ill->ill_flags & ILLF_ROUTER) && !ip_source_routed(ipha, ipst)) { 855 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 856 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 857 freemsg(mp); 858 return; 859 } 860 861 /* 862 * Either ire_nce_capable or ire_dep_parent would be set for the IRE 863 * when it is found by ire_route_recursive, but that some other thread 864 * could have changed the routes with the effect of clearing 865 * ire_dep_parent. In that case we'd end up dropping the packet, or 866 * finding a new nce below. 867 * Get, allocate, or update the nce. 868 * We get a refhold on ire_nce_cache as a result of this to avoid races 869 * where ire_nce_cache is deleted. 870 * 871 * This ensures that we don't forward if the interface is down since 872 * ipif_down removes all the nces. 873 */ 874 mutex_enter(&ire->ire_lock); 875 nce = ire->ire_nce_cache; 876 if (nce == NULL) { 877 /* Not yet set up - try to set one up */ 878 mutex_exit(&ire->ire_lock); 879 (void) ire_revalidate_nce(ire); 880 mutex_enter(&ire->ire_lock); 881 nce = ire->ire_nce_cache; 882 if (nce == NULL) { 883 mutex_exit(&ire->ire_lock); 884 /* The ire_dep_parent chain went bad, or no memory */ 885 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 886 ip_drop_input("No ire_dep_parent", mp, ill); 887 freemsg(mp); 888 return; 889 } 890 } 891 nce_refhold(nce); 892 mutex_exit(&ire->ire_lock); 893 894 if (nce->nce_is_condemned) { 895 nce_t *nce1; 896 897 nce1 = ire_handle_condemned_nce(nce, ire, ipha, NULL, B_FALSE); 898 nce_refrele(nce); 899 if (nce1 == NULL) { 900 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 901 ip_drop_input("No nce", mp, ill); 902 freemsg(mp); 903 return; 904 } 905 nce = nce1; 906 } 907 dst_ill = nce->nce_ill; 908 909 /* 910 * Unless we are forwarding, drop the packet. 911 * We have to let source routed packets through if they go out 912 * the same interface i.e., they are 'ping -l' packets. 913 */ 914 if (!(dst_ill->ill_flags & ILLF_ROUTER) && 915 !(ip_source_routed(ipha, ipst) && dst_ill == ill)) { 916 if (ip_source_routed(ipha, ipst)) { 917 ip_drop_input("ICMP_SOURCE_ROUTE_FAILED", mp, ill); 918 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira); 919 nce_refrele(nce); 920 return; 921 } 922 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 923 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 924 freemsg(mp); 925 nce_refrele(nce); 926 return; 927 } 928 929 if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) { 930 ipaddr_t dst = ipha->ipha_dst; 931 932 ire->ire_ib_pkt_count--; 933 /* 934 * Should only use IREs that are visible from the 935 * global zone for forwarding. 936 * Take a source route into account the same way as ip_input 937 * did. 938 */ 939 if (ira->ira_flags & IRAF_IPV4_OPTIONS) { 940 int error = 0; 941 942 dst = ip_input_options(ipha, dst, mp, ira, &error); 943 ASSERT(error == 0); /* ip_input checked */ 944 } 945 ire = ire_route_recursive_v4(dst, 0, NULL, GLOBAL_ZONEID, 946 ira->ira_tsl, MATCH_IRE_SECATTR, 947 (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE, 948 ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 949 ire->ire_ib_pkt_count++; 950 (*ire->ire_recvfn)(ire, mp, ipha, ira); 951 ire_refrele(ire); 952 nce_refrele(nce); 953 return; 954 } 955 956 /* 957 * ipIfStatsHCInForwDatagrams should only be increment if there 958 * will be an attempt to forward the packet, which is why we 959 * increment after the above condition has been checked. 960 */ 961 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 962 963 /* Initiate Read side IPPF processing */ 964 if (IPP_ENABLED(IPP_FWD_IN, ipst)) { 965 /* ip_process translates an IS_UNDER_IPMP */ 966 mp = ip_process(IPP_FWD_IN, mp, ill, ill); 967 if (mp == NULL) { 968 /* ip_drop_packet and MIB done */ 969 ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred " 970 "during IPPF processing\n")); 971 nce_refrele(nce); 972 return; 973 } 974 } 975 976 DTRACE_PROBE4(ip4__forwarding__start, 977 ill_t *, ill, ill_t *, dst_ill, ipha_t *, ipha, mblk_t *, mp); 978 979 if (HOOKS4_INTERESTED_FORWARDING(ipst)) { 980 int error; 981 982 FW_HOOKS(ipst->ips_ip4_forwarding_event, 983 ipst->ips_ipv4firewall_forwarding, 984 ill, dst_ill, ipha, mp, mp, 0, ipst, error); 985 986 DTRACE_PROBE1(ip4__forwarding__end, mblk_t *, mp); 987 988 if (mp == NULL) { 989 nce_refrele(nce); 990 return; 991 } 992 /* 993 * Even if the destination was changed by the filter we use the 994 * forwarding decision that was made based on the address 995 * in ip_input. 996 */ 997 998 /* Might have changed */ 999 ipha = (ipha_t *)mp->b_rptr; 1000 ira->ira_pktlen = ntohs(ipha->ipha_length); 1001 } 1002 1003 /* Packet is being forwarded. Turning off hwcksum flag. */ 1004 DB_CKSUMFLAGS(mp) = 0; 1005 1006 /* 1007 * Martian Address Filtering [RFC 1812, Section 5.3.7] 1008 * The loopback address check for both src and dst has already 1009 * been checked in ip_input 1010 * In the future one can envision adding RPF checks using number 3. 1011 * If we already checked the same source address we can skip this. 1012 */ 1013 if (!(ira->ira_flags & IRAF_VERIFIED_SRC) || 1014 src != ira->ira_verified_src) { 1015 switch (ipst->ips_src_check) { 1016 case 0: 1017 break; 1018 case 2: 1019 if (ip_type_v4(src, ipst) == IRE_BROADCAST) { 1020 BUMP_MIB(ill->ill_ip_mib, 1021 ipIfStatsForwProhibits); 1022 BUMP_MIB(ill->ill_ip_mib, 1023 ipIfStatsInAddrErrors); 1024 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1025 freemsg(mp); 1026 nce_refrele(nce); 1027 return; 1028 } 1029 /* FALLTHRU */ 1030 1031 case 1: 1032 if (CLASSD(src)) { 1033 BUMP_MIB(ill->ill_ip_mib, 1034 ipIfStatsForwProhibits); 1035 BUMP_MIB(ill->ill_ip_mib, 1036 ipIfStatsInAddrErrors); 1037 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1038 freemsg(mp); 1039 nce_refrele(nce); 1040 return; 1041 } 1042 break; 1043 } 1044 /* Remember for next packet */ 1045 ira->ira_flags |= IRAF_VERIFIED_SRC; 1046 ira->ira_verified_src = src; 1047 } 1048 1049 /* 1050 * Check if packet is going out the same link on which it arrived. 1051 * Means we might need to send a redirect. 1052 */ 1053 if (IS_ON_SAME_LAN(dst_ill, ill) && ipst->ips_ip_g_send_redirects) { 1054 ip_send_potential_redirect_v4(mp, ipha, ire, ira); 1055 } 1056 1057 added_tx_len = 0; 1058 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 1059 mblk_t *mp1; 1060 uint32_t old_pkt_len = ira->ira_pktlen; 1061 1062 /* Verify IP header checksum before adding/removing options */ 1063 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && 1064 ip_csum_hdr(ipha)) { 1065 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1066 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1067 freemsg(mp); 1068 nce_refrele(nce); 1069 return; 1070 } 1071 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 1072 1073 /* 1074 * Check if it can be forwarded and add/remove 1075 * CIPSO options as needed. 1076 */ 1077 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) { 1078 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1079 ip_drop_input("tsol_ip_forward", mp, ill); 1080 freemsg(mp); 1081 nce_refrele(nce); 1082 return; 1083 } 1084 /* 1085 * Size may have changed. Remember amount added in case 1086 * IP needs to send an ICMP too big. 1087 */ 1088 mp = mp1; 1089 ipha = (ipha_t *)mp->b_rptr; 1090 ira->ira_pktlen = ntohs(ipha->ipha_length); 1091 ira->ira_ip_hdr_length = IPH_HDR_LENGTH(ipha); 1092 if (ira->ira_pktlen > old_pkt_len) 1093 added_tx_len = ira->ira_pktlen - old_pkt_len; 1094 1095 /* Options can have been added or removed */ 1096 if (ira->ira_ip_hdr_length != IP_SIMPLE_HDR_LENGTH) 1097 ira->ira_flags |= IRAF_IPV4_OPTIONS; 1098 else 1099 ira->ira_flags &= ~IRAF_IPV4_OPTIONS; 1100 } 1101 1102 mtu = dst_ill->ill_mtu; 1103 if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu) 1104 mtu = iremtu; 1105 ip_forward_xmit_v4(nce, ill, mp, ipha, ira, mtu, added_tx_len); 1106 nce_refrele(nce); 1107 } 1108 1109 /* 1110 * Used for sending out unicast and multicast packets that are 1111 * forwarded. 1112 */ 1113 void 1114 ip_forward_xmit_v4(nce_t *nce, ill_t *ill, mblk_t *mp, ipha_t *ipha, 1115 ip_recv_attr_t *ira, uint32_t mtu, uint32_t added_tx_len) 1116 { 1117 ill_t *dst_ill = nce->nce_ill; 1118 uint32_t pkt_len; 1119 uint32_t sum; 1120 iaflags_t iraflags = ira->ira_flags; 1121 ip_stack_t *ipst = ill->ill_ipst; 1122 iaflags_t ixaflags; 1123 1124 if (ipha->ipha_ttl <= 1) { 1125 /* Perhaps the checksum was bad */ 1126 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1127 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1128 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1129 freemsg(mp); 1130 return; 1131 } 1132 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1133 ip_drop_input("ICMP_TTL_EXCEEDED", mp, ill); 1134 icmp_time_exceeded(mp, ICMP_TTL_EXCEEDED, ira); 1135 return; 1136 } 1137 ipha->ipha_ttl--; 1138 /* Adjust the checksum to reflect the ttl decrement. */ 1139 sum = (int)ipha->ipha_hdr_checksum + IP_HDR_CSUM_TTL_ADJUST; 1140 ipha->ipha_hdr_checksum = (uint16_t)(sum + (sum >> 16)); 1141 1142 /* Check if there are options to update */ 1143 if (iraflags & IRAF_IPV4_OPTIONS) { 1144 ASSERT(ipha->ipha_version_and_hdr_length != 1145 IP_SIMPLE_HDR_VERSION); 1146 ASSERT(!(iraflags & IRAF_VERIFY_IP_CKSUM)); 1147 1148 if (!ip_forward_options(mp, ipha, dst_ill, ira)) { 1149 /* ipIfStatsForwProhibits and ip_drop_input done */ 1150 return; 1151 } 1152 1153 ipha->ipha_hdr_checksum = 0; 1154 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1155 } 1156 1157 /* Initiate Write side IPPF processing before any fragmentation */ 1158 if (IPP_ENABLED(IPP_FWD_OUT, ipst)) { 1159 /* ip_process translates an IS_UNDER_IPMP */ 1160 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill); 1161 if (mp == NULL) { 1162 /* ip_drop_packet and MIB done */ 1163 ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred" \ 1164 " during IPPF processing\n")); 1165 return; 1166 } 1167 } 1168 1169 pkt_len = ira->ira_pktlen; 1170 1171 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 1172 1173 ixaflags = IXAF_IS_IPV4 | IXAF_NO_DEV_FLOW_CTL; 1174 1175 if (pkt_len > mtu) { 1176 /* 1177 * It needs fragging on its way out. If we haven't 1178 * verified the header checksum yet we do it now since 1179 * are going to put a surely good checksum in the 1180 * outgoing header, we have to make sure that it 1181 * was good coming in. 1182 */ 1183 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1184 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1185 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1186 freemsg(mp); 1187 return; 1188 } 1189 if (ipha->ipha_fragment_offset_and_flags & IPH_DF_HTONS) { 1190 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails); 1191 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill); 1192 if (iraflags & IRAF_SYSTEM_LABELED) { 1193 /* 1194 * Remove any CIPSO option added by 1195 * tsol_ip_forward, and make sure we report 1196 * a path MTU so that there 1197 * is room to add such a CIPSO option for future 1198 * packets. 1199 */ 1200 mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, 1201 AF_INET); 1202 } 1203 1204 icmp_frag_needed(mp, mtu, ira); 1205 return; 1206 } 1207 1208 (void) ip_fragment_v4(mp, nce, ixaflags, pkt_len, mtu, 1209 ira->ira_xmit_hint, GLOBAL_ZONEID, 0, ip_xmit, NULL); 1210 return; 1211 } 1212 1213 ASSERT(pkt_len == ntohs(((ipha_t *)mp->b_rptr)->ipha_length)); 1214 if (iraflags & IRAF_LOOPBACK_COPY) { 1215 /* 1216 * IXAF_NO_LOOP_ZONEID is not set hence 7th arg 1217 * is don't care 1218 */ 1219 (void) ip_postfrag_loopcheck(mp, nce, 1220 ixaflags | IXAF_LOOPBACK_COPY, 1221 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1222 } else { 1223 (void) ip_xmit(mp, nce, ixaflags, pkt_len, ira->ira_xmit_hint, 1224 GLOBAL_ZONEID, 0, NULL); 1225 } 1226 } 1227 1228 /* 1229 * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE, 1230 * which is what ire_route_recursive returns when there is no matching ire. 1231 * Send ICMP unreachable unless blackhole. 1232 */ 1233 void 1234 ire_recv_noroute_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1235 { 1236 ipha_t *ipha = (ipha_t *)iph_arg; 1237 ill_t *ill = ira->ira_ill; 1238 ip_stack_t *ipst = ill->ill_ipst; 1239 1240 /* Would we have forwarded this packet if we had a route? */ 1241 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 1242 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1243 ip_drop_input("l2 multicast not forwarded", mp, ill); 1244 freemsg(mp); 1245 return; 1246 } 1247 1248 if (!(ill->ill_flags & ILLF_ROUTER)) { 1249 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1250 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 1251 freemsg(mp); 1252 return; 1253 } 1254 /* 1255 * If we had a route this could have been forwarded. Count as such. 1256 * 1257 * ipIfStatsHCInForwDatagrams should only be increment if there 1258 * will be an attempt to forward the packet, which is why we 1259 * increment after the above condition has been checked. 1260 */ 1261 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 1262 1263 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1264 1265 ip_rts_change(RTM_MISS, ipha->ipha_dst, 0, 0, 0, 0, 0, 0, RTA_DST, 1266 ipst); 1267 1268 if (ire->ire_flags & RTF_BLACKHOLE) { 1269 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill); 1270 freemsg(mp); 1271 } else { 1272 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill); 1273 1274 if (ip_source_routed(ipha, ipst)) { 1275 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira); 1276 } else { 1277 icmp_unreachable(mp, ICMP_HOST_UNREACHABLE, ira); 1278 } 1279 } 1280 } 1281 1282 /* 1283 * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for 1284 * VRRP when in noaccept mode. 1285 * We silently drop the packet. ARP handles packets even if noaccept is set. 1286 */ 1287 /* ARGSUSED */ 1288 void 1289 ire_recv_noaccept_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1290 ip_recv_attr_t *ira) 1291 { 1292 ill_t *ill = ira->ira_ill; 1293 1294 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1295 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1296 freemsg(mp); 1297 } 1298 1299 /* 1300 * ire_recvfn for IRE_BROADCAST. 1301 */ 1302 void 1303 ire_recv_broadcast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1304 ip_recv_attr_t *ira) 1305 { 1306 ipha_t *ipha = (ipha_t *)iph_arg; 1307 ill_t *ill = ira->ira_ill; 1308 ill_t *dst_ill = ire->ire_ill; 1309 ip_stack_t *ipst = ill->ill_ipst; 1310 ire_t *alt_ire; 1311 nce_t *nce; 1312 ipaddr_t ipha_dst; 1313 1314 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInBcastPkts); 1315 1316 /* Tag for higher-level protocols */ 1317 ira->ira_flags |= IRAF_BROADCAST; 1318 1319 /* 1320 * Whether local or directed broadcast forwarding: don't allow 1321 * for TCP. 1322 */ 1323 if (ipha->ipha_protocol == IPPROTO_TCP) { 1324 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1325 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1326 freemsg(mp); 1327 return; 1328 } 1329 1330 /* 1331 * So that we don't end up with dups, only one ill an IPMP group is 1332 * nominated to receive broadcast traffic. 1333 * If we have no cast_ill we are liberal and accept everything. 1334 */ 1335 if (IS_UNDER_IPMP(ill)) { 1336 /* For an under ill_grp can change under lock */ 1337 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1338 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1339 ill->ill_grp->ig_cast_ill != NULL) { 1340 rw_exit(&ipst->ips_ill_g_lock); 1341 /* No MIB since this is normal operation */ 1342 ip_drop_input("not nom_cast", mp, ill); 1343 freemsg(mp); 1344 return; 1345 } 1346 rw_exit(&ipst->ips_ill_g_lock); 1347 1348 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1349 } 1350 1351 /* 1352 * After reassembly and IPsec we will need to duplicate the 1353 * broadcast packet for all matching zones on the ill. 1354 */ 1355 ira->ira_zoneid = ALL_ZONES; 1356 1357 /* 1358 * Check for directed broadcast i.e. ire->ire_ill is different than 1359 * the incoming ill. 1360 * The same broadcast address can be assigned to multiple interfaces 1361 * so have to check explicitly for that case by looking up the alt_ire 1362 */ 1363 if (dst_ill == ill && !(ire->ire_flags & RTF_MULTIRT)) { 1364 /* Reassemble on the ill on which the packet arrived */ 1365 ip_input_local_v4(ire, mp, ipha, ira); 1366 /* Restore */ 1367 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1368 return; 1369 } 1370 1371 /* Is there an IRE_BROADCAST on the incoming ill? */ 1372 ipha_dst = ((ira->ira_flags & IRAF_DHCP_UNICAST) ? INADDR_BROADCAST : 1373 ipha->ipha_dst); 1374 alt_ire = ire_ftable_lookup_v4(ipha_dst, 0, 0, IRE_BROADCAST, ill, 1375 ALL_ZONES, ira->ira_tsl, 1376 MATCH_IRE_TYPE|MATCH_IRE_ILL|MATCH_IRE_SECATTR, 0, ipst, NULL); 1377 if (alt_ire != NULL) { 1378 /* Not a directed broadcast */ 1379 /* 1380 * In the special case of multirouted broadcast 1381 * packets, we unconditionally need to "gateway" 1382 * them to the appropriate interface here so that reassembly 1383 * works. We know that the IRE_BROADCAST on cgtp0 doesn't 1384 * have RTF_MULTIRT set so we look for such an IRE in the 1385 * bucket. 1386 */ 1387 if (alt_ire->ire_flags & RTF_MULTIRT) { 1388 irb_t *irb; 1389 ire_t *ire1; 1390 1391 irb = ire->ire_bucket; 1392 irb_refhold(irb); 1393 for (ire1 = irb->irb_ire; ire1 != NULL; 1394 ire1 = ire1->ire_next) { 1395 if (IRE_IS_CONDEMNED(ire1)) 1396 continue; 1397 if (!(ire1->ire_type & IRE_BROADCAST) || 1398 (ire1->ire_flags & RTF_MULTIRT)) 1399 continue; 1400 ill = ire1->ire_ill; 1401 ill_refhold(ill); 1402 break; 1403 } 1404 irb_refrele(irb); 1405 if (ire1 != NULL) { 1406 ill_t *orig_ill = ira->ira_ill; 1407 1408 ire_refrele(alt_ire); 1409 /* Reassemble on the new ill */ 1410 ira->ira_ill = ill; 1411 ip_input_local_v4(ire, mp, ipha, ira); 1412 ill_refrele(ill); 1413 /* Restore */ 1414 ira->ira_ill = orig_ill; 1415 ira->ira_ruifindex = 1416 orig_ill->ill_phyint->phyint_ifindex; 1417 return; 1418 } 1419 } 1420 ire_refrele(alt_ire); 1421 /* Reassemble on the ill on which the packet arrived */ 1422 ip_input_local_v4(ire, mp, ipha, ira); 1423 goto done; 1424 } 1425 1426 /* 1427 * This is a directed broadcast 1428 * 1429 * If directed broadcast is allowed, then forward the packet out 1430 * the destination interface with IXAF_LOOPBACK_COPY set. That will 1431 * result in ip_input() receiving a copy of the packet on the 1432 * appropriate ill. (We could optimize this to avoid the extra trip 1433 * via ip_input(), but since directed broadcasts are normally disabled 1434 * it doesn't make sense to optimize it.) 1435 */ 1436 if (!ipst->ips_ip_g_forward_directed_bcast || 1437 (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST))) { 1438 ip_drop_input("directed broadcast not allowed", mp, ill); 1439 freemsg(mp); 1440 goto done; 1441 } 1442 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1443 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1444 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1445 freemsg(mp); 1446 goto done; 1447 } 1448 1449 /* 1450 * Clear the indication that this may have hardware 1451 * checksum as we are not using it for forwarding. 1452 */ 1453 DB_CKSUMFLAGS(mp) = 0; 1454 1455 /* 1456 * Adjust ttl to 2 (1+1 - the forward engine will decrement it by one. 1457 */ 1458 ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl + 1; 1459 ipha->ipha_hdr_checksum = 0; 1460 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1461 1462 /* 1463 * We use ip_forward_xmit to do any fragmentation. 1464 * and loopback copy on the outbound interface. 1465 * 1466 * Make it so that IXAF_LOOPBACK_COPY to be set on transmit side. 1467 */ 1468 ira->ira_flags |= IRAF_LOOPBACK_COPY; 1469 1470 nce = arp_nce_init(dst_ill, ipha->ipha_dst, IRE_BROADCAST); 1471 if (nce == NULL) { 1472 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutDiscards); 1473 ip_drop_output("No nce", mp, dst_ill); 1474 freemsg(mp); 1475 goto done; 1476 } 1477 1478 ip_forward_xmit_v4(nce, ill, mp, ipha, ira, dst_ill->ill_mc_mtu, 0); 1479 nce_refrele(nce); 1480 done: 1481 /* Restore */ 1482 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1483 } 1484 1485 /* 1486 * ire_recvfn for IRE_MULTICAST. 1487 */ 1488 void 1489 ire_recv_multicast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1490 ip_recv_attr_t *ira) 1491 { 1492 ipha_t *ipha = (ipha_t *)iph_arg; 1493 ill_t *ill = ira->ira_ill; 1494 ip_stack_t *ipst = ill->ill_ipst; 1495 1496 ASSERT(ire->ire_ill == ira->ira_ill); 1497 1498 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 1499 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen); 1500 1501 /* RSVP hook */ 1502 if (ira->ira_flags & IRAF_RSVP) 1503 goto forus; 1504 1505 /* Tag for higher-level protocols */ 1506 ira->ira_flags |= IRAF_MULTICAST; 1507 1508 /* 1509 * So that we don't end up with dups, only one ill an IPMP group is 1510 * nominated to receive multicast traffic. 1511 * If we have no cast_ill we are liberal and accept everything. 1512 */ 1513 if (IS_UNDER_IPMP(ill)) { 1514 ip_stack_t *ipst = ill->ill_ipst; 1515 1516 /* For an under ill_grp can change under lock */ 1517 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1518 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1519 ill->ill_grp->ig_cast_ill != NULL) { 1520 rw_exit(&ipst->ips_ill_g_lock); 1521 ip_drop_input("not on cast ill", mp, ill); 1522 freemsg(mp); 1523 return; 1524 } 1525 rw_exit(&ipst->ips_ill_g_lock); 1526 /* 1527 * We switch to the upper ill so that mrouter and hasmembers 1528 * can operate on upper here and in ip_input_multicast. 1529 */ 1530 ill = ipmp_ill_hold_ipmp_ill(ill); 1531 if (ill != NULL) { 1532 ASSERT(ill != ira->ira_ill); 1533 ASSERT(ire->ire_ill == ira->ira_ill); 1534 ira->ira_ill = ill; 1535 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1536 } else { 1537 ill = ira->ira_ill; 1538 } 1539 } 1540 1541 /* 1542 * Check if we are a multicast router - send ip_mforward a copy of 1543 * the packet. 1544 * Due to mroute_decap tunnels we consider forwarding packets even if 1545 * mrouted has not joined the allmulti group on this interface. 1546 */ 1547 if (ipst->ips_ip_g_mrouter) { 1548 int retval; 1549 1550 /* 1551 * Clear the indication that this may have hardware 1552 * checksum as we are not using it for forwarding. 1553 */ 1554 DB_CKSUMFLAGS(mp) = 0; 1555 1556 /* 1557 * ip_mforward helps us make these distinctions: If received 1558 * on tunnel and not IGMP, then drop. 1559 * If IGMP packet, then don't check membership 1560 * If received on a phyint and IGMP or PIM, then 1561 * don't check membership 1562 */ 1563 retval = ip_mforward(mp, ira); 1564 /* ip_mforward updates mib variables if needed */ 1565 1566 switch (retval) { 1567 case 0: 1568 /* 1569 * pkt is okay and arrived on phyint. 1570 * 1571 * If we are running as a multicast router 1572 * we need to see all IGMP and/or PIM packets. 1573 */ 1574 if ((ipha->ipha_protocol == IPPROTO_IGMP) || 1575 (ipha->ipha_protocol == IPPROTO_PIM)) { 1576 goto forus; 1577 } 1578 break; 1579 case -1: 1580 /* pkt is mal-formed, toss it */ 1581 freemsg(mp); 1582 goto done; 1583 case 1: 1584 /* 1585 * pkt is okay and arrived on a tunnel 1586 * 1587 * If we are running a multicast router 1588 * we need to see all igmp packets. 1589 */ 1590 if (ipha->ipha_protocol == IPPROTO_IGMP) { 1591 goto forus; 1592 } 1593 ip_drop_input("Multicast on tunnel ignored", mp, ill); 1594 freemsg(mp); 1595 goto done; 1596 } 1597 } 1598 1599 /* 1600 * Check if we have members on this ill. This is not necessary for 1601 * correctness because even if the NIC/GLD had a leaky filter, we 1602 * filter before passing to each conn_t. 1603 */ 1604 if (!ill_hasmembers_v4(ill, ipha->ipha_dst)) { 1605 /* 1606 * Nobody interested 1607 * 1608 * This might just be caused by the fact that 1609 * multiple IP Multicast addresses map to the same 1610 * link layer multicast - no need to increment counter! 1611 */ 1612 ip_drop_input("Multicast with no members", mp, ill); 1613 freemsg(mp); 1614 goto done; 1615 } 1616 forus: 1617 ip2dbg(("ire_recv_multicast_v4: multicast for us: 0x%x\n", 1618 ntohl(ipha->ipha_dst))); 1619 1620 /* 1621 * After reassembly and IPsec we will need to duplicate the 1622 * multicast packet for all matching zones on the ill. 1623 */ 1624 ira->ira_zoneid = ALL_ZONES; 1625 1626 /* Reassemble on the ill on which the packet arrived */ 1627 ip_input_local_v4(ire, mp, ipha, ira); 1628 done: 1629 if (ill != ire->ire_ill) { 1630 ill_refrele(ill); 1631 ira->ira_ill = ire->ire_ill; 1632 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 1633 } 1634 } 1635 1636 /* 1637 * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT. 1638 * Drop packets since we don't forward out multirt routes. 1639 */ 1640 /* ARGSUSED */ 1641 void 1642 ire_recv_multirt_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1643 { 1644 ill_t *ill = ira->ira_ill; 1645 1646 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1647 ip_drop_input("Not forwarding out MULTIRT", mp, ill); 1648 freemsg(mp); 1649 } 1650 1651 /* 1652 * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK 1653 * has rewritten the packet to have a loopback destination address (We 1654 * filter out packet with a loopback destination from arriving over the wire). 1655 * We don't know what zone to use, thus we always use the GLOBAL_ZONEID. 1656 */ 1657 void 1658 ire_recv_loopback_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1659 { 1660 ipha_t *ipha = (ipha_t *)iph_arg; 1661 ill_t *ill = ira->ira_ill; 1662 ill_t *ire_ill = ire->ire_ill; 1663 1664 ira->ira_zoneid = GLOBAL_ZONEID; 1665 1666 /* Switch to the lo0 ill for further processing */ 1667 if (ire_ill != ill) { 1668 /* 1669 * Update ira_ill to be the ILL on which the IP address 1670 * is hosted. 1671 * No need to hold the ill since we have a hold on the ire 1672 */ 1673 ASSERT(ira->ira_ill == ira->ira_rill); 1674 ira->ira_ill = ire_ill; 1675 1676 ip_input_local_v4(ire, mp, ipha, ira); 1677 1678 /* Restore */ 1679 ASSERT(ira->ira_ill == ire_ill); 1680 ira->ira_ill = ill; 1681 return; 1682 1683 } 1684 ip_input_local_v4(ire, mp, ipha, ira); 1685 } 1686 1687 /* 1688 * ire_recvfn for IRE_LOCAL. 1689 */ 1690 void 1691 ire_recv_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1692 { 1693 ipha_t *ipha = (ipha_t *)iph_arg; 1694 ill_t *ill = ira->ira_ill; 1695 ill_t *ire_ill = ire->ire_ill; 1696 1697 /* Make a note for DAD that this address is in use */ 1698 ire->ire_last_used_time = LBOLT_FASTPATH; 1699 1700 /* Only target the IRE_LOCAL with the right zoneid. */ 1701 ira->ira_zoneid = ire->ire_zoneid; 1702 1703 /* 1704 * If the packet arrived on the wrong ill, we check that 1705 * this is ok. 1706 * If it is, then we ensure that we do the reassembly on 1707 * the ill on which the address is hosted. We keep ira_rill as 1708 * the one on which the packet arrived, so that IP_PKTINFO and 1709 * friends can report this. 1710 */ 1711 if (ire_ill != ill) { 1712 ire_t *new_ire; 1713 1714 new_ire = ip_check_multihome(&ipha->ipha_dst, ire, ill); 1715 if (new_ire == NULL) { 1716 /* Drop packet */ 1717 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1718 ip_drop_input("ipIfStatsInForwProhibits", mp, ill); 1719 freemsg(mp); 1720 return; 1721 } 1722 /* 1723 * Update ira_ill to be the ILL on which the IP address 1724 * is hosted. No need to hold the ill since we have a 1725 * hold on the ire. Note that we do the switch even if 1726 * new_ire == ire (for IPMP, ire would be the one corresponding 1727 * to the IPMP ill). 1728 */ 1729 ASSERT(ira->ira_ill == ira->ira_rill); 1730 ira->ira_ill = new_ire->ire_ill; 1731 1732 /* ira_ruifindex tracks the upper for ira_rill */ 1733 if (IS_UNDER_IPMP(ill)) 1734 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1735 1736 ip_input_local_v4(new_ire, mp, ipha, ira); 1737 1738 /* Restore */ 1739 ASSERT(ira->ira_ill == new_ire->ire_ill); 1740 ira->ira_ill = ill; 1741 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1742 1743 if (new_ire != ire) 1744 ire_refrele(new_ire); 1745 return; 1746 } 1747 1748 ip_input_local_v4(ire, mp, ipha, ira); 1749 } 1750 1751 /* 1752 * Common function for packets arriving for the host. Handles 1753 * checksum verification, reassembly checks, etc. 1754 */ 1755 static void 1756 ip_input_local_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1757 { 1758 ill_t *ill = ira->ira_ill; 1759 iaflags_t iraflags = ira->ira_flags; 1760 1761 /* 1762 * Verify IP header checksum. If the packet was AH or ESP then 1763 * this flag has already been cleared. Likewise if the packet 1764 * had a hardware checksum. 1765 */ 1766 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1767 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1768 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1769 freemsg(mp); 1770 return; 1771 } 1772 1773 if (iraflags & IRAF_IPV4_OPTIONS) { 1774 if (!ip_input_local_options(mp, ipha, ira)) { 1775 /* Error has been sent and mp consumed */ 1776 return; 1777 } 1778 /* 1779 * Some old hardware does partial checksum by including the 1780 * whole IP header, so the partial checksum value might have 1781 * become invalid if any option in the packet have been 1782 * updated. Always clear partial checksum flag here. 1783 */ 1784 DB_CKSUMFLAGS(mp) &= ~HCK_PARTIALCKSUM; 1785 } 1786 1787 /* 1788 * Is packet part of fragmented IP packet? 1789 * We compare against defined values in network byte order 1790 */ 1791 if (ipha->ipha_fragment_offset_and_flags & 1792 (IPH_MF_HTONS | IPH_OFFSET_HTONS)) { 1793 /* 1794 * Make sure we have ira_l2src before we loose the original 1795 * mblk 1796 */ 1797 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 1798 ip_setl2src(mp, ira, ira->ira_rill); 1799 1800 mp = ip_input_fragment(mp, ipha, ira); 1801 if (mp == NULL) 1802 return; 1803 /* Completed reassembly */ 1804 ipha = (ipha_t *)mp->b_rptr; 1805 } 1806 1807 /* 1808 * For broadcast and multicast we need some extra work before 1809 * we call ip_fanout_v4(), since in the case of shared-IP zones 1810 * we need to pretend that a packet arrived for each zoneid. 1811 */ 1812 if (iraflags & IRAF_MULTIBROADCAST) { 1813 if (iraflags & IRAF_BROADCAST) 1814 ip_input_broadcast_v4(ire, mp, ipha, ira); 1815 else 1816 ip_input_multicast_v4(ire, mp, ipha, ira); 1817 return; 1818 } 1819 ip_fanout_v4(mp, ipha, ira); 1820 } 1821 1822 1823 /* 1824 * Handle multiple zones which match the same broadcast address 1825 * and ill by delivering a packet to each of them. 1826 * Walk the bucket and look for different ire_zoneid but otherwise 1827 * the same IRE (same ill/addr/mask/type). 1828 * Note that ire_add() tracks IREs that are identical in all 1829 * fields (addr/mask/type/gw/ill/zoneid) within a single IRE by 1830 * increasing ire_identical_cnt. Thus we don't need to be concerned 1831 * about those. 1832 */ 1833 static void 1834 ip_input_broadcast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1835 { 1836 ill_t *ill = ira->ira_ill; 1837 ip_stack_t *ipst = ill->ill_ipst; 1838 netstack_t *ns = ipst->ips_netstack; 1839 irb_t *irb; 1840 ire_t *ire1; 1841 mblk_t *mp1; 1842 ipha_t *ipha1; 1843 uint_t ira_pktlen = ira->ira_pktlen; 1844 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1845 1846 irb = ire->ire_bucket; 1847 1848 /* 1849 * If we don't have more than one shared-IP zone, or if 1850 * there can't be more than one IRE_BROADCAST for this 1851 * IP address, then just set the zoneid and proceed. 1852 */ 1853 if (ns->netstack_numzones == 1 || irb->irb_ire_cnt == 1) { 1854 ira->ira_zoneid = ire->ire_zoneid; 1855 1856 ip_fanout_v4(mp, ipha, ira); 1857 return; 1858 } 1859 irb_refhold(irb); 1860 for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 1861 /* We do the main IRE after the end of the loop */ 1862 if (ire1 == ire) 1863 continue; 1864 1865 /* 1866 * Only IREs for the same IP address should be in the same 1867 * bucket. 1868 * But could have IRE_HOSTs in the case of CGTP. 1869 */ 1870 ASSERT(ire1->ire_addr == ire->ire_addr); 1871 if (!(ire1->ire_type & IRE_BROADCAST)) 1872 continue; 1873 1874 if (IRE_IS_CONDEMNED(ire1)) 1875 continue; 1876 1877 mp1 = copymsg(mp); 1878 if (mp1 == NULL) { 1879 /* Failed to deliver to one zone */ 1880 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1881 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1882 continue; 1883 } 1884 ira->ira_zoneid = ire1->ire_zoneid; 1885 ipha1 = (ipha_t *)mp1->b_rptr; 1886 ip_fanout_v4(mp1, ipha1, ira); 1887 /* 1888 * IPsec might have modified ira_pktlen and ira_ip_hdr_length 1889 * so we restore them for a potential next iteration 1890 */ 1891 ira->ira_pktlen = ira_pktlen; 1892 ira->ira_ip_hdr_length = ira_ip_hdr_length; 1893 } 1894 irb_refrele(irb); 1895 /* Do the main ire */ 1896 ira->ira_zoneid = ire->ire_zoneid; 1897 ip_fanout_v4(mp, ipha, ira); 1898 } 1899 1900 /* 1901 * Handle multiple zones which want to receive the same multicast packets 1902 * on this ill by delivering a packet to each of them. 1903 * 1904 * Note that for packets delivered to transports we could instead do this 1905 * as part of the fanout code, but since we need to handle icmp_inbound 1906 * it is simpler to have multicast work the same as broadcast. 1907 * 1908 * The ip_fanout matching for multicast matches based on ilm independent of 1909 * zoneid since the zoneid restriction is applied when joining a multicast 1910 * group. 1911 */ 1912 /* ARGSUSED */ 1913 static void 1914 ip_input_multicast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1915 { 1916 ill_t *ill = ira->ira_ill; 1917 iaflags_t iraflags = ira->ira_flags; 1918 ip_stack_t *ipst = ill->ill_ipst; 1919 netstack_t *ns = ipst->ips_netstack; 1920 zoneid_t zoneid; 1921 mblk_t *mp1; 1922 ipha_t *ipha1; 1923 uint_t ira_pktlen = ira->ira_pktlen; 1924 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1925 1926 /* ire_recv_multicast has switched to the upper ill for IPMP */ 1927 ASSERT(!IS_UNDER_IPMP(ill)); 1928 1929 /* 1930 * If we don't have more than one shared-IP zone, or if 1931 * there are no members in anything but the global zone, 1932 * then just set the zoneid and proceed. 1933 */ 1934 if (ns->netstack_numzones == 1 || 1935 !ill_hasmembers_otherzones_v4(ill, ipha->ipha_dst, 1936 GLOBAL_ZONEID)) { 1937 ira->ira_zoneid = GLOBAL_ZONEID; 1938 1939 /* If sender didn't want this zone to receive it, drop */ 1940 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1941 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1942 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1943 freemsg(mp); 1944 return; 1945 } 1946 ip_fanout_v4(mp, ipha, ira); 1947 return; 1948 } 1949 1950 /* 1951 * Here we loop over all zoneids that have members in the group 1952 * and deliver a packet to ip_fanout for each zoneid. 1953 * 1954 * First find any members in the lowest numeric zoneid by looking for 1955 * first zoneid larger than -1 (ALL_ZONES). 1956 * We terminate the loop when we receive -1 (ALL_ZONES). 1957 */ 1958 zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, ALL_ZONES); 1959 for (; zoneid != ALL_ZONES; 1960 zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, zoneid)) { 1961 /* 1962 * Avoid an extra copymsg/freemsg by skipping global zone here 1963 * and doing that at the end. 1964 */ 1965 if (zoneid == GLOBAL_ZONEID) 1966 continue; 1967 1968 ira->ira_zoneid = zoneid; 1969 1970 /* If sender didn't want this zone to receive it, skip */ 1971 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1972 ira->ira_no_loop_zoneid == ira->ira_zoneid) 1973 continue; 1974 1975 mp1 = copymsg(mp); 1976 if (mp1 == NULL) { 1977 /* Failed to deliver to one zone */ 1978 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1979 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1980 continue; 1981 } 1982 ipha1 = (ipha_t *)mp1->b_rptr; 1983 ip_fanout_v4(mp1, ipha1, ira); 1984 /* 1985 * IPsec might have modified ira_pktlen and ira_ip_hdr_length 1986 * so we restore them for a potential next iteration 1987 */ 1988 ira->ira_pktlen = ira_pktlen; 1989 ira->ira_ip_hdr_length = ira_ip_hdr_length; 1990 } 1991 1992 /* Do the main ire */ 1993 ira->ira_zoneid = GLOBAL_ZONEID; 1994 /* If sender didn't want this zone to receive it, drop */ 1995 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1996 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1997 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1998 freemsg(mp); 1999 } else { 2000 ip_fanout_v4(mp, ipha, ira); 2001 } 2002 } 2003 2004 2005 /* 2006 * Determine the zoneid and IRAF_TX_* flags if trusted extensions 2007 * is in use. Updates ira_zoneid and ira_flags as a result. 2008 */ 2009 static void 2010 ip_fanout_tx_v4(mblk_t *mp, ipha_t *ipha, uint8_t protocol, 2011 uint_t ip_hdr_length, ip_recv_attr_t *ira) 2012 { 2013 uint16_t *up; 2014 uint16_t lport; 2015 zoneid_t zoneid; 2016 2017 ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED); 2018 2019 /* 2020 * If the packet is unlabeled we might allow read-down 2021 * for MAC_EXEMPT. Below we clear this if it is a multi-level 2022 * port (MLP). 2023 * Note that ira_tsl can be NULL here. 2024 */ 2025 if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED) 2026 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE; 2027 2028 if (ira->ira_zoneid != ALL_ZONES) 2029 return; 2030 2031 ira->ira_flags |= IRAF_TX_SHARED_ADDR; 2032 2033 up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length); 2034 switch (protocol) { 2035 case IPPROTO_TCP: 2036 case IPPROTO_SCTP: 2037 case IPPROTO_UDP: 2038 /* Caller ensures this */ 2039 ASSERT(((uchar_t *)ipha) + ip_hdr_length +4 <= mp->b_wptr); 2040 2041 /* 2042 * Only these transports support MLP. 2043 * We know their destination port numbers is in 2044 * the same place in the header. 2045 */ 2046 lport = up[1]; 2047 2048 /* 2049 * No need to handle exclusive-stack zones 2050 * since ALL_ZONES only applies to the shared IP instance. 2051 */ 2052 zoneid = tsol_mlp_findzone(protocol, lport); 2053 /* 2054 * If no shared MLP is found, tsol_mlp_findzone returns 2055 * ALL_ZONES. In that case, we assume it's SLP, and 2056 * search for the zone based on the packet label. 2057 * 2058 * If there is such a zone, we prefer to find a 2059 * connection in it. Otherwise, we look for a 2060 * MAC-exempt connection in any zone whose label 2061 * dominates the default label on the packet. 2062 */ 2063 if (zoneid == ALL_ZONES) 2064 zoneid = tsol_attr_to_zoneid(ira); 2065 else 2066 ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE; 2067 break; 2068 default: 2069 /* Handle shared address for other protocols */ 2070 zoneid = tsol_attr_to_zoneid(ira); 2071 break; 2072 } 2073 ira->ira_zoneid = zoneid; 2074 } 2075 2076 /* 2077 * Increment checksum failure statistics 2078 */ 2079 static void 2080 ip_input_cksum_err_v4(uint8_t protocol, uint16_t hck_flags, ill_t *ill) 2081 { 2082 ip_stack_t *ipst = ill->ill_ipst; 2083 2084 switch (protocol) { 2085 case IPPROTO_TCP: 2086 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 2087 2088 if (hck_flags & HCK_FULLCKSUM) 2089 IP_STAT(ipst, ip_tcp_in_full_hw_cksum_err); 2090 else if (hck_flags & HCK_PARTIALCKSUM) 2091 IP_STAT(ipst, ip_tcp_in_part_hw_cksum_err); 2092 else 2093 IP_STAT(ipst, ip_tcp_in_sw_cksum_err); 2094 break; 2095 case IPPROTO_UDP: 2096 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 2097 if (hck_flags & HCK_FULLCKSUM) 2098 IP_STAT(ipst, ip_udp_in_full_hw_cksum_err); 2099 else if (hck_flags & HCK_PARTIALCKSUM) 2100 IP_STAT(ipst, ip_udp_in_part_hw_cksum_err); 2101 else 2102 IP_STAT(ipst, ip_udp_in_sw_cksum_err); 2103 break; 2104 case IPPROTO_ICMP: 2105 BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs); 2106 break; 2107 default: 2108 ASSERT(0); 2109 break; 2110 } 2111 } 2112 2113 /* Calculate the IPv4 pseudo-header checksum */ 2114 uint32_t 2115 ip_input_cksum_pseudo_v4(ipha_t *ipha, ip_recv_attr_t *ira) 2116 { 2117 uint_t ulp_len; 2118 uint32_t cksum; 2119 uint8_t protocol = ira->ira_protocol; 2120 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2121 2122 #define iphs ((uint16_t *)ipha) 2123 2124 switch (protocol) { 2125 case IPPROTO_TCP: 2126 ulp_len = ira->ira_pktlen - ip_hdr_length; 2127 2128 /* Protocol and length */ 2129 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP; 2130 /* IP addresses */ 2131 cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9]; 2132 break; 2133 2134 case IPPROTO_UDP: { 2135 udpha_t *udpha; 2136 2137 udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length); 2138 2139 /* Protocol and length */ 2140 cksum = udpha->uha_length + IP_UDP_CSUM_COMP; 2141 /* IP addresses */ 2142 cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9]; 2143 break; 2144 } 2145 2146 default: 2147 cksum = 0; 2148 break; 2149 } 2150 #undef iphs 2151 return (cksum); 2152 } 2153 2154 2155 /* 2156 * Software verification of the ULP checksums. 2157 * Returns B_TRUE if ok. 2158 * Increments statistics of failed. 2159 */ 2160 static boolean_t 2161 ip_input_sw_cksum_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 2162 { 2163 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 2164 uint32_t cksum; 2165 uint8_t protocol = ira->ira_protocol; 2166 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2167 2168 IP_STAT(ipst, ip_in_sw_cksum); 2169 2170 ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP); 2171 2172 cksum = ip_input_cksum_pseudo_v4(ipha, ira); 2173 cksum = IP_CSUM(mp, ip_hdr_length, cksum); 2174 if (cksum == 0) 2175 return (B_TRUE); 2176 2177 ip_input_cksum_err_v4(protocol, 0, ira->ira_ill); 2178 return (B_FALSE); 2179 } 2180 2181 /* 2182 * Verify the ULP checksums. 2183 * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum 2184 * algorithm. 2185 * Increments statistics if failed. 2186 */ 2187 static boolean_t 2188 ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, 2189 ip_recv_attr_t *ira) 2190 { 2191 ill_t *ill = ira->ira_rill; 2192 uint16_t hck_flags; 2193 uint32_t cksum; 2194 mblk_t *mp1; 2195 int32_t len; 2196 uint8_t protocol = ira->ira_protocol; 2197 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2198 2199 2200 switch (protocol) { 2201 case IPPROTO_TCP: 2202 break; 2203 2204 case IPPROTO_UDP: { 2205 udpha_t *udpha; 2206 2207 udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length); 2208 if (udpha->uha_checksum == 0) { 2209 /* Packet doesn't have a UDP checksum */ 2210 return (B_TRUE); 2211 } 2212 break; 2213 } 2214 case IPPROTO_SCTP: { 2215 sctp_hdr_t *sctph; 2216 uint32_t pktsum; 2217 2218 sctph = (sctp_hdr_t *)((uchar_t *)ipha + ip_hdr_length); 2219 #ifdef DEBUG 2220 if (skip_sctp_cksum) 2221 return (B_TRUE); 2222 #endif 2223 pktsum = sctph->sh_chksum; 2224 sctph->sh_chksum = 0; 2225 cksum = sctp_cksum(mp, ip_hdr_length); 2226 sctph->sh_chksum = pktsum; 2227 if (cksum == pktsum) 2228 return (B_TRUE); 2229 2230 /* 2231 * Defer until later whether a bad checksum is ok 2232 * in order to allow RAW sockets to use Adler checksum 2233 * with SCTP. 2234 */ 2235 ira->ira_flags |= IRAF_SCTP_CSUM_ERR; 2236 return (B_TRUE); 2237 } 2238 2239 default: 2240 /* No ULP checksum to verify. */ 2241 return (B_TRUE); 2242 } 2243 /* 2244 * Revert to software checksum calculation if the interface 2245 * isn't capable of checksum offload. 2246 * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout. 2247 * Note: IRAF_NO_HW_CKSUM is not currently used. 2248 */ 2249 ASSERT(!IS_IPMP(ill)); 2250 if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 2251 !dohwcksum) { 2252 return (ip_input_sw_cksum_v4(mp, ipha, ira)); 2253 } 2254 2255 /* 2256 * We apply this for all ULP protocols. Does the HW know to 2257 * not set the flags for SCTP and other protocols. 2258 */ 2259 2260 hck_flags = DB_CKSUMFLAGS(mp); 2261 2262 if (hck_flags & HCK_FULLCKSUM_OK) { 2263 /* 2264 * Hardware has already verified the checksum. 2265 */ 2266 return (B_TRUE); 2267 } 2268 2269 if (hck_flags & HCK_FULLCKSUM) { 2270 /* 2271 * Full checksum has been computed by the hardware 2272 * and has been attached. If the driver wants us to 2273 * verify the correctness of the attached value, in 2274 * order to protect against faulty hardware, compare 2275 * it against -0 (0xFFFF) to see if it's valid. 2276 */ 2277 cksum = DB_CKSUM16(mp); 2278 if (cksum == 0xFFFF) 2279 return (B_TRUE); 2280 ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill); 2281 return (B_FALSE); 2282 } 2283 2284 mp1 = mp->b_cont; 2285 if ((hck_flags & HCK_PARTIALCKSUM) && 2286 (mp1 == NULL || mp1->b_cont == NULL) && 2287 ip_hdr_length >= DB_CKSUMSTART(mp) && 2288 ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) { 2289 uint32_t adj; 2290 uchar_t *cksum_start; 2291 2292 cksum = ip_input_cksum_pseudo_v4(ipha, ira); 2293 2294 cksum_start = ((uchar_t *)ipha + DB_CKSUMSTART(mp)); 2295 2296 /* 2297 * Partial checksum has been calculated by hardware 2298 * and attached to the packet; in addition, any 2299 * prepended extraneous data is even byte aligned, 2300 * and there are at most two mblks associated with 2301 * the packet. If any such data exists, we adjust 2302 * the checksum; also take care any postpended data. 2303 */ 2304 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj); 2305 /* 2306 * One's complement subtract extraneous checksum 2307 */ 2308 cksum += DB_CKSUM16(mp); 2309 if (adj >= cksum) 2310 cksum = ~(adj - cksum) & 0xFFFF; 2311 else 2312 cksum -= adj; 2313 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 2314 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 2315 if (!(~cksum & 0xFFFF)) 2316 return (B_TRUE); 2317 2318 ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill); 2319 return (B_FALSE); 2320 } 2321 return (ip_input_sw_cksum_v4(mp, ipha, ira)); 2322 } 2323 2324 2325 /* 2326 * Handle fanout of received packets. 2327 * Unicast packets that are looped back (from ire_send_local_v4) and packets 2328 * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM. 2329 * 2330 * IPQoS Notes 2331 * Before sending it to the client, invoke IPPF processing. Policy processing 2332 * takes place only if the callout_position, IPP_LOCAL_IN, is enabled. 2333 */ 2334 void 2335 ip_fanout_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 2336 { 2337 ill_t *ill = ira->ira_ill; 2338 iaflags_t iraflags = ira->ira_flags; 2339 ip_stack_t *ipst = ill->ill_ipst; 2340 uint8_t protocol = ipha->ipha_protocol; 2341 conn_t *connp; 2342 #define rptr ((uchar_t *)ipha) 2343 uint_t ip_hdr_length; 2344 uint_t min_ulp_header_length; 2345 int offset; 2346 ssize_t len; 2347 netstack_t *ns = ipst->ips_netstack; 2348 ipsec_stack_t *ipss = ns->netstack_ipsec; 2349 ill_t *rill = ira->ira_rill; 2350 2351 ASSERT(ira->ira_pktlen == ntohs(ipha->ipha_length)); 2352 2353 ip_hdr_length = ira->ira_ip_hdr_length; 2354 ira->ira_protocol = protocol; 2355 2356 /* 2357 * Time for IPP once we've done reassembly and IPsec. 2358 * We skip this for loopback packets since we don't do IPQoS 2359 * on loopback. 2360 */ 2361 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 2362 !(iraflags & IRAF_LOOPBACK) && 2363 (protocol != IPPROTO_ESP && protocol != IPPROTO_AH)) { 2364 /* 2365 * Use the interface on which the packet arrived - not where 2366 * the IP address is hosted. 2367 */ 2368 /* ip_process translates an IS_UNDER_IPMP */ 2369 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill); 2370 if (mp == NULL) { 2371 /* ip_drop_packet and MIB done */ 2372 return; 2373 } 2374 } 2375 2376 /* Determine the minimum required size of the upper-layer header */ 2377 /* Need to do this for at least the set of ULPs that TX handles. */ 2378 switch (protocol) { 2379 case IPPROTO_TCP: 2380 min_ulp_header_length = TCP_MIN_HEADER_LENGTH; 2381 break; 2382 case IPPROTO_SCTP: 2383 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH; 2384 break; 2385 case IPPROTO_UDP: 2386 min_ulp_header_length = UDPH_SIZE; 2387 break; 2388 case IPPROTO_ICMP: 2389 min_ulp_header_length = ICMPH_SIZE; 2390 break; 2391 default: 2392 min_ulp_header_length = 0; 2393 break; 2394 } 2395 /* Make sure we have the min ULP header length */ 2396 len = mp->b_wptr - rptr; 2397 if (len < ip_hdr_length + min_ulp_header_length) { 2398 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length) { 2399 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 2400 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 2401 freemsg(mp); 2402 return; 2403 } 2404 IP_STAT(ipst, ip_recv_pullup); 2405 ipha = ip_pullup(mp, ip_hdr_length + min_ulp_header_length, 2406 ira); 2407 if (ipha == NULL) 2408 goto discard; 2409 len = mp->b_wptr - rptr; 2410 } 2411 2412 /* 2413 * If trusted extensions then determine the zoneid and TX specific 2414 * ira_flags. 2415 */ 2416 if (iraflags & IRAF_SYSTEM_LABELED) { 2417 /* This can update ira->ira_flags and ira->ira_zoneid */ 2418 ip_fanout_tx_v4(mp, ipha, protocol, ip_hdr_length, ira); 2419 iraflags = ira->ira_flags; 2420 } 2421 2422 2423 /* Verify ULP checksum. Handles TCP, UDP, and SCTP */ 2424 if (iraflags & IRAF_VERIFY_ULP_CKSUM) { 2425 if (!ip_input_cksum_v4(iraflags, mp, ipha, ira)) { 2426 /* Bad checksum. Stats are already incremented */ 2427 ip_drop_input("Bad ULP checksum", mp, ill); 2428 freemsg(mp); 2429 return; 2430 } 2431 /* IRAF_SCTP_CSUM_ERR could have been set */ 2432 iraflags = ira->ira_flags; 2433 } 2434 switch (protocol) { 2435 case IPPROTO_TCP: 2436 /* For TCP, discard broadcast and multicast packets. */ 2437 if (iraflags & IRAF_MULTIBROADCAST) 2438 goto discard; 2439 2440 /* First mblk contains IP+TCP headers per above check */ 2441 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH); 2442 2443 /* TCP options present? */ 2444 offset = ((uchar_t *)ipha)[ip_hdr_length + 12] >> 4; 2445 if (offset != 5) { 2446 if (offset < 5) 2447 goto discard; 2448 2449 /* 2450 * There must be TCP options. 2451 * Make sure we can grab them. 2452 */ 2453 offset <<= 2; 2454 offset += ip_hdr_length; 2455 if (len < offset) { 2456 if (ira->ira_pktlen < offset) { 2457 BUMP_MIB(ill->ill_ip_mib, 2458 ipIfStatsInTruncatedPkts); 2459 ip_drop_input( 2460 "ipIfStatsInTruncatedPkts", 2461 mp, ill); 2462 freemsg(mp); 2463 return; 2464 } 2465 IP_STAT(ipst, ip_recv_pullup); 2466 ipha = ip_pullup(mp, offset, ira); 2467 if (ipha == NULL) 2468 goto discard; 2469 len = mp->b_wptr - rptr; 2470 } 2471 } 2472 2473 /* 2474 * Pass up a squeue hint to tcp. 2475 * If ira_sqp is already set (this is loopback) we leave it 2476 * alone. 2477 */ 2478 if (ira->ira_sqp == NULL) { 2479 ira->ira_sqp = ip_squeue_get(ira->ira_ring); 2480 } 2481 2482 /* Look for AF_INET or AF_INET6 that matches */ 2483 connp = ipcl_classify_v4(mp, IPPROTO_TCP, ip_hdr_length, 2484 ira, ipst); 2485 if (connp == NULL) { 2486 /* Send the TH_RST */ 2487 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2488 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2489 return; 2490 } 2491 if (connp->conn_incoming_ifindex != 0 && 2492 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2493 CONN_DEC_REF(connp); 2494 2495 /* Send the TH_RST */ 2496 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2497 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2498 return; 2499 } 2500 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || 2501 (iraflags & IRAF_IPSEC_SECURE)) { 2502 mp = ipsec_check_inbound_policy(mp, connp, 2503 ipha, NULL, ira); 2504 if (mp == NULL) { 2505 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2506 /* Note that mp is NULL */ 2507 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2508 CONN_DEC_REF(connp); 2509 return; 2510 } 2511 } 2512 /* Found a client; up it goes */ 2513 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2514 ira->ira_ill = ira->ira_rill = NULL; 2515 if (!IPCL_IS_TCP(connp)) { 2516 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 2517 (connp->conn_recv)(connp, mp, NULL, ira); 2518 CONN_DEC_REF(connp); 2519 ira->ira_ill = ill; 2520 ira->ira_rill = rill; 2521 return; 2522 } 2523 2524 /* 2525 * We do different processing whether called from 2526 * ip_accept_tcp and we match the target, don't match 2527 * the target, and when we are called by ip_input. 2528 */ 2529 if (iraflags & IRAF_TARGET_SQP) { 2530 if (ira->ira_target_sqp == connp->conn_sqp) { 2531 mblk_t *attrmp; 2532 2533 attrmp = ip_recv_attr_to_mblk(ira); 2534 if (attrmp == NULL) { 2535 BUMP_MIB(ill->ill_ip_mib, 2536 ipIfStatsInDiscards); 2537 ip_drop_input("ipIfStatsInDiscards", 2538 mp, ill); 2539 freemsg(mp); 2540 CONN_DEC_REF(connp); 2541 } else { 2542 SET_SQUEUE(attrmp, connp->conn_recv, 2543 connp); 2544 attrmp->b_cont = mp; 2545 ASSERT(ira->ira_target_sqp_mp == NULL); 2546 ira->ira_target_sqp_mp = attrmp; 2547 /* 2548 * Conn ref release when drained from 2549 * the squeue. 2550 */ 2551 } 2552 } else { 2553 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 2554 connp->conn_recv, connp, ira, SQ_FILL, 2555 SQTAG_IP_TCP_INPUT); 2556 } 2557 } else { 2558 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, 2559 connp, ira, ip_squeue_flag, SQTAG_IP_TCP_INPUT); 2560 } 2561 ira->ira_ill = ill; 2562 ira->ira_rill = rill; 2563 return; 2564 2565 case IPPROTO_SCTP: { 2566 sctp_hdr_t *sctph; 2567 in6_addr_t map_src, map_dst; 2568 uint32_t ports; /* Source and destination ports */ 2569 sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp; 2570 2571 /* For SCTP, discard broadcast and multicast packets. */ 2572 if (iraflags & IRAF_MULTIBROADCAST) 2573 goto discard; 2574 2575 /* 2576 * Since there is no SCTP h/w cksum support yet, just 2577 * clear the flag. 2578 */ 2579 DB_CKSUMFLAGS(mp) = 0; 2580 2581 /* Length ensured above */ 2582 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH); 2583 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length); 2584 2585 /* get the ports */ 2586 ports = *(uint32_t *)&sctph->sh_sport; 2587 2588 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst); 2589 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src); 2590 if (iraflags & IRAF_SCTP_CSUM_ERR) { 2591 /* 2592 * No potential sctp checksum errors go to the Sun 2593 * sctp stack however they might be Adler-32 summed 2594 * packets a userland stack bound to a raw IP socket 2595 * could reasonably use. Note though that Adler-32 is 2596 * a long deprecated algorithm and customer sctp 2597 * networks should eventually migrate to CRC-32 at 2598 * which time this facility should be removed. 2599 */ 2600 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2601 return; 2602 } 2603 connp = sctp_fanout(&map_src, &map_dst, ports, ira, mp, 2604 sctps, sctph); 2605 if (connp == NULL) { 2606 /* Check for raw socket or OOTB handling */ 2607 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2608 return; 2609 } 2610 if (connp->conn_incoming_ifindex != 0 && 2611 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2612 CONN_DEC_REF(connp); 2613 /* Check for raw socket or OOTB handling */ 2614 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2615 return; 2616 } 2617 2618 /* Found a client; up it goes */ 2619 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2620 sctp_input(connp, ipha, NULL, mp, ira); 2621 /* sctp_input does a rele of the sctp_t */ 2622 return; 2623 } 2624 2625 case IPPROTO_UDP: 2626 /* First mblk contains IP+UDP headers as checked above */ 2627 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE); 2628 2629 if (iraflags & IRAF_MULTIBROADCAST) { 2630 uint16_t *up; /* Pointer to ports in ULP header */ 2631 2632 up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length); 2633 ip_fanout_udp_multi_v4(mp, ipha, up[1], up[0], ira); 2634 return; 2635 } 2636 2637 /* Look for AF_INET or AF_INET6 that matches */ 2638 connp = ipcl_classify_v4(mp, IPPROTO_UDP, ip_hdr_length, 2639 ira, ipst); 2640 if (connp == NULL) { 2641 no_udp_match: 2642 if (ipst->ips_ipcl_proto_fanout_v4[IPPROTO_UDP]. 2643 connf_head != NULL) { 2644 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2645 ip_fanout_proto_v4(mp, ipha, ira); 2646 } else { 2647 ip_fanout_send_icmp_v4(mp, 2648 ICMP_DEST_UNREACHABLE, 2649 ICMP_PORT_UNREACHABLE, ira); 2650 } 2651 return; 2652 2653 } 2654 if (connp->conn_incoming_ifindex != 0 && 2655 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2656 CONN_DEC_REF(connp); 2657 goto no_udp_match; 2658 } 2659 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld : 2660 !canputnext(connp->conn_rq)) { 2661 CONN_DEC_REF(connp); 2662 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 2663 ip_drop_input("udpIfStatsInOverflows", mp, ill); 2664 freemsg(mp); 2665 return; 2666 } 2667 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || 2668 (iraflags & IRAF_IPSEC_SECURE)) { 2669 mp = ipsec_check_inbound_policy(mp, connp, 2670 ipha, NULL, ira); 2671 if (mp == NULL) { 2672 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2673 /* Note that mp is NULL */ 2674 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2675 CONN_DEC_REF(connp); 2676 return; 2677 } 2678 } 2679 /* 2680 * Remove 0-spi if it's 0, or move everything behind 2681 * the UDP header over it and forward to ESP via 2682 * ip_fanout_v4(). 2683 */ 2684 if (connp->conn_udp->udp_nat_t_endpoint) { 2685 if (iraflags & IRAF_IPSEC_SECURE) { 2686 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 2687 DROPPER(ipss, ipds_esp_nat_t_ipsec), 2688 &ipss->ipsec_dropper); 2689 CONN_DEC_REF(connp); 2690 return; 2691 } 2692 2693 mp = zero_spi_check(mp, ira); 2694 if (mp == NULL) { 2695 /* 2696 * Packet was consumed - probably sent to 2697 * ip_fanout_v4. 2698 */ 2699 CONN_DEC_REF(connp); 2700 return; 2701 } 2702 /* Else continue like a normal UDP packet. */ 2703 ipha = (ipha_t *)mp->b_rptr; 2704 protocol = ipha->ipha_protocol; 2705 ira->ira_protocol = protocol; 2706 } 2707 /* Found a client; up it goes */ 2708 IP_STAT(ipst, ip_udp_fannorm); 2709 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2710 ira->ira_ill = ira->ira_rill = NULL; 2711 (connp->conn_recv)(connp, mp, NULL, ira); 2712 CONN_DEC_REF(connp); 2713 ira->ira_ill = ill; 2714 ira->ira_rill = rill; 2715 return; 2716 default: 2717 break; 2718 } 2719 2720 /* 2721 * Clear hardware checksumming flag as it is currently only 2722 * used by TCP and UDP. 2723 */ 2724 DB_CKSUMFLAGS(mp) = 0; 2725 2726 switch (protocol) { 2727 case IPPROTO_ICMP: 2728 /* 2729 * We need to accomodate icmp messages coming in clear 2730 * until we get everything secure from the wire. If 2731 * icmp_accept_clear_messages is zero we check with 2732 * the global policy and act accordingly. If it is 2733 * non-zero, we accept the message without any checks. 2734 * But *this does not mean* that this will be delivered 2735 * to RAW socket clients. By accepting we might send 2736 * replies back, change our MTU value etc., 2737 * but delivery to the ULP/clients depends on their 2738 * policy dispositions. 2739 */ 2740 if (ipst->ips_icmp_accept_clear_messages == 0) { 2741 mp = ipsec_check_global_policy(mp, NULL, 2742 ipha, NULL, ira, ns); 2743 if (mp == NULL) 2744 return; 2745 } 2746 2747 /* 2748 * On a labeled system, we have to check whether the zone 2749 * itself is permitted to receive raw traffic. 2750 */ 2751 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2752 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2753 BUMP_MIB(&ipst->ips_icmp_mib, icmpInErrors); 2754 ip_drop_input("tsol_can_accept_raw", mp, ill); 2755 freemsg(mp); 2756 return; 2757 } 2758 } 2759 2760 /* 2761 * ICMP header checksum, including checksum field, 2762 * should be zero. 2763 */ 2764 if (IP_CSUM(mp, ip_hdr_length, 0)) { 2765 BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs); 2766 ip_drop_input("icmpInCksumErrs", mp, ill); 2767 freemsg(mp); 2768 return; 2769 } 2770 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2771 mp = icmp_inbound_v4(mp, ira); 2772 if (mp == NULL) { 2773 /* No need to pass to RAW sockets */ 2774 return; 2775 } 2776 break; 2777 2778 case IPPROTO_IGMP: 2779 /* 2780 * If we are not willing to accept IGMP packets in clear, 2781 * then check with global policy. 2782 */ 2783 if (ipst->ips_igmp_accept_clear_messages == 0) { 2784 mp = ipsec_check_global_policy(mp, NULL, 2785 ipha, NULL, ira, ns); 2786 if (mp == NULL) 2787 return; 2788 } 2789 if ((ira->ira_flags & IRAF_SYSTEM_LABELED) && 2790 !tsol_can_accept_raw(mp, ira, B_TRUE)) { 2791 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2792 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2793 freemsg(mp); 2794 return; 2795 } 2796 /* 2797 * Validate checksum 2798 */ 2799 if (IP_CSUM(mp, ip_hdr_length, 0)) { 2800 ++ipst->ips_igmpstat.igps_rcv_badsum; 2801 ip_drop_input("igps_rcv_badsum", mp, ill); 2802 freemsg(mp); 2803 return; 2804 } 2805 2806 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2807 mp = igmp_input(mp, ira); 2808 if (mp == NULL) { 2809 /* Bad packet - discarded by igmp_input */ 2810 return; 2811 } 2812 break; 2813 case IPPROTO_PIM: 2814 /* 2815 * If we are not willing to accept PIM packets in clear, 2816 * then check with global policy. 2817 */ 2818 if (ipst->ips_pim_accept_clear_messages == 0) { 2819 mp = ipsec_check_global_policy(mp, NULL, 2820 ipha, NULL, ira, ns); 2821 if (mp == NULL) 2822 return; 2823 } 2824 if ((ira->ira_flags & IRAF_SYSTEM_LABELED) && 2825 !tsol_can_accept_raw(mp, ira, B_TRUE)) { 2826 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2827 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2828 freemsg(mp); 2829 return; 2830 } 2831 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2832 2833 /* Checksum is verified in pim_input */ 2834 mp = pim_input(mp, ira); 2835 if (mp == NULL) { 2836 /* Bad packet - discarded by pim_input */ 2837 return; 2838 } 2839 break; 2840 case IPPROTO_AH: 2841 case IPPROTO_ESP: { 2842 /* 2843 * Fast path for AH/ESP. 2844 */ 2845 netstack_t *ns = ipst->ips_netstack; 2846 ipsec_stack_t *ipss = ns->netstack_ipsec; 2847 2848 IP_STAT(ipst, ipsec_proto_ahesp); 2849 2850 if (!ipsec_loaded(ipss)) { 2851 ip_proto_not_sup(mp, ira); 2852 return; 2853 } 2854 2855 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2856 /* select inbound SA and have IPsec process the pkt */ 2857 if (protocol == IPPROTO_ESP) { 2858 esph_t *esph; 2859 boolean_t esp_in_udp_sa; 2860 boolean_t esp_in_udp_packet; 2861 2862 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 2863 if (mp == NULL) 2864 return; 2865 2866 ASSERT(esph != NULL); 2867 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2868 ASSERT(ira->ira_ipsec_esp_sa != NULL); 2869 ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL); 2870 2871 esp_in_udp_sa = ((ira->ira_ipsec_esp_sa->ipsa_flags & 2872 IPSA_F_NATT) != 0); 2873 esp_in_udp_packet = 2874 (ira->ira_flags & IRAF_ESP_UDP_PORTS) != 0; 2875 2876 /* 2877 * The following is a fancy, but quick, way of saying: 2878 * ESP-in-UDP SA and Raw ESP packet --> drop 2879 * OR 2880 * ESP SA and ESP-in-UDP packet --> drop 2881 */ 2882 if (esp_in_udp_sa != esp_in_udp_packet) { 2883 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2884 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 2885 DROPPER(ipss, ipds_esp_no_sa), 2886 &ipss->ipsec_dropper); 2887 return; 2888 } 2889 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, 2890 ira); 2891 } else { 2892 ah_t *ah; 2893 2894 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 2895 if (mp == NULL) 2896 return; 2897 2898 ASSERT(ah != NULL); 2899 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2900 ASSERT(ira->ira_ipsec_ah_sa != NULL); 2901 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 2902 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, 2903 ira); 2904 } 2905 2906 if (mp == NULL) { 2907 /* 2908 * Either it failed or is pending. In the former case 2909 * ipIfStatsInDiscards was increased. 2910 */ 2911 return; 2912 } 2913 /* we're done with IPsec processing, send it up */ 2914 ip_input_post_ipsec(mp, ira); 2915 return; 2916 } 2917 case IPPROTO_ENCAP: { 2918 ipha_t *inner_ipha; 2919 2920 /* 2921 * Handle self-encapsulated packets (IP-in-IP where 2922 * the inner addresses == the outer addresses). 2923 */ 2924 if ((uchar_t *)ipha + ip_hdr_length + sizeof (ipha_t) > 2925 mp->b_wptr) { 2926 if (ira->ira_pktlen < 2927 ip_hdr_length + sizeof (ipha_t)) { 2928 BUMP_MIB(ill->ill_ip_mib, 2929 ipIfStatsInTruncatedPkts); 2930 ip_drop_input("ipIfStatsInTruncatedPkts", 2931 mp, ill); 2932 freemsg(mp); 2933 return; 2934 } 2935 ipha = ip_pullup(mp, (uchar_t *)ipha + ip_hdr_length + 2936 sizeof (ipha_t) - mp->b_rptr, ira); 2937 if (ipha == NULL) { 2938 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2939 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2940 freemsg(mp); 2941 return; 2942 } 2943 } 2944 inner_ipha = (ipha_t *)((uchar_t *)ipha + ip_hdr_length); 2945 /* 2946 * Check the sanity of the inner IP header. 2947 */ 2948 if ((IPH_HDR_VERSION(inner_ipha) != IPV4_VERSION)) { 2949 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2950 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2951 freemsg(mp); 2952 return; 2953 } 2954 if (IPH_HDR_LENGTH(inner_ipha) < sizeof (ipha_t)) { 2955 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2956 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2957 freemsg(mp); 2958 return; 2959 } 2960 if (inner_ipha->ipha_src != ipha->ipha_src || 2961 inner_ipha->ipha_dst != ipha->ipha_dst) { 2962 /* We fallthru to iptun fanout below */ 2963 goto iptun; 2964 } 2965 2966 /* 2967 * Self-encapsulated tunnel packet. Remove 2968 * the outer IP header and fanout again. 2969 * We also need to make sure that the inner 2970 * header is pulled up until options. 2971 */ 2972 mp->b_rptr = (uchar_t *)inner_ipha; 2973 ipha = inner_ipha; 2974 ip_hdr_length = IPH_HDR_LENGTH(ipha); 2975 if ((uchar_t *)ipha + ip_hdr_length > mp->b_wptr) { 2976 if (ira->ira_pktlen < 2977 (uchar_t *)ipha + ip_hdr_length - mp->b_rptr) { 2978 BUMP_MIB(ill->ill_ip_mib, 2979 ipIfStatsInTruncatedPkts); 2980 ip_drop_input("ipIfStatsInTruncatedPkts", 2981 mp, ill); 2982 freemsg(mp); 2983 return; 2984 } 2985 ipha = ip_pullup(mp, 2986 (uchar_t *)ipha + ip_hdr_length - mp->b_rptr, ira); 2987 if (ipha == NULL) { 2988 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2989 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2990 freemsg(mp); 2991 return; 2992 } 2993 } 2994 if (ip_hdr_length > sizeof (ipha_t)) { 2995 /* We got options on the inner packet. */ 2996 ipaddr_t dst = ipha->ipha_dst; 2997 int error = 0; 2998 2999 dst = ip_input_options(ipha, dst, mp, ira, &error); 3000 if (error != 0) { 3001 /* 3002 * An ICMP error has been sent and the packet 3003 * has been dropped. 3004 */ 3005 return; 3006 } 3007 if (dst != ipha->ipha_dst) { 3008 /* 3009 * Someone put a source-route in 3010 * the inside header of a self- 3011 * encapsulated packet. Drop it 3012 * with extreme prejudice and let 3013 * the sender know. 3014 */ 3015 ip_drop_input("ICMP_SOURCE_ROUTE_FAILED", 3016 mp, ill); 3017 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, 3018 ira); 3019 return; 3020 } 3021 } 3022 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 3023 /* 3024 * This means that somebody is sending 3025 * Self-encapsualted packets without AH/ESP. 3026 * 3027 * Send this packet to find a tunnel endpoint. 3028 * if I can't find one, an ICMP 3029 * PROTOCOL_UNREACHABLE will get sent. 3030 */ 3031 protocol = ipha->ipha_protocol; 3032 ira->ira_protocol = protocol; 3033 goto iptun; 3034 } 3035 3036 /* Update based on removed IP header */ 3037 ira->ira_ip_hdr_length = ip_hdr_length; 3038 ira->ira_pktlen = ntohs(ipha->ipha_length); 3039 3040 if (ira->ira_flags & IRAF_IPSEC_DECAPS) { 3041 /* 3042 * This packet is self-encapsulated multiple 3043 * times. We don't want to recurse infinitely. 3044 * To keep it simple, drop the packet. 3045 */ 3046 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3047 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3048 freemsg(mp); 3049 return; 3050 } 3051 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3052 ira->ira_flags |= IRAF_IPSEC_DECAPS; 3053 3054 ip_input_post_ipsec(mp, ira); 3055 return; 3056 } 3057 3058 iptun: /* IPPROTO_ENCAPS that is not self-encapsulated */ 3059 case IPPROTO_IPV6: 3060 /* iptun will verify trusted label */ 3061 connp = ipcl_classify_v4(mp, protocol, ip_hdr_length, 3062 ira, ipst); 3063 if (connp != NULL) { 3064 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3065 ira->ira_ill = ira->ira_rill = NULL; 3066 (connp->conn_recv)(connp, mp, NULL, ira); 3067 CONN_DEC_REF(connp); 3068 ira->ira_ill = ill; 3069 ira->ira_rill = rill; 3070 return; 3071 } 3072 /* FALLTHRU */ 3073 default: 3074 /* 3075 * On a labeled system, we have to check whether the zone 3076 * itself is permitted to receive raw traffic. 3077 */ 3078 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 3079 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 3080 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3081 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3082 freemsg(mp); 3083 return; 3084 } 3085 } 3086 break; 3087 } 3088 3089 /* 3090 * The above input functions may have returned the pulled up message. 3091 * So ipha need to be reinitialized. 3092 */ 3093 ipha = (ipha_t *)mp->b_rptr; 3094 ira->ira_protocol = protocol = ipha->ipha_protocol; 3095 if (ipst->ips_ipcl_proto_fanout_v4[protocol].connf_head == NULL) { 3096 /* 3097 * No user-level listener for these packets packets. 3098 * Check for IPPROTO_ENCAP... 3099 */ 3100 if (protocol == IPPROTO_ENCAP && ipst->ips_ip_g_mrouter) { 3101 /* 3102 * Check policy here, 3103 * THEN ship off to ip_mroute_decap(). 3104 * 3105 * BTW, If I match a configured IP-in-IP 3106 * tunnel above, this path will not be reached, and 3107 * ip_mroute_decap will never be called. 3108 */ 3109 mp = ipsec_check_global_policy(mp, connp, 3110 ipha, NULL, ira, ns); 3111 if (mp != NULL) { 3112 ip_mroute_decap(mp, ira); 3113 } /* Else we already freed everything! */ 3114 } else { 3115 ip_proto_not_sup(mp, ira); 3116 } 3117 return; 3118 } 3119 3120 /* 3121 * Handle fanout to raw sockets. There 3122 * can be more than one stream bound to a particular 3123 * protocol. When this is the case, each one gets a copy 3124 * of any incoming packets. 3125 */ 3126 ASSERT(ira->ira_protocol == ipha->ipha_protocol); 3127 ip_fanout_proto_v4(mp, ipha, ira); 3128 return; 3129 3130 discard: 3131 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3132 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3133 freemsg(mp); 3134 #undef rptr 3135 }