1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 * 25 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 26 */ 27 /* Copyright (c) 1990 Mentat Inc. */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsubr.h> 35 #include <sys/strlog.h> 36 #include <sys/strsun.h> 37 #include <sys/zone.h> 38 #define _SUN_TPI_VERSION 2 39 #include <sys/tihdr.h> 40 #include <sys/xti_inet.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/kobj.h> 46 #include <sys/modctl.h> 47 #include <sys/atomic.h> 48 #include <sys/policy.h> 49 #include <sys/priv.h> 50 51 #include <sys/systm.h> 52 #include <sys/param.h> 53 #include <sys/kmem.h> 54 #include <sys/sdt.h> 55 #include <sys/socket.h> 56 #include <sys/vtrace.h> 57 #include <sys/isa_defs.h> 58 #include <sys/mac.h> 59 #include <net/if.h> 60 #include <net/if_arp.h> 61 #include <net/route.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <net/if_dl.h> 65 66 #include <inet/common.h> 67 #include <inet/mi.h> 68 #include <inet/mib2.h> 69 #include <inet/nd.h> 70 #include <inet/arp.h> 71 #include <inet/snmpcom.h> 72 #include <inet/kstatcom.h> 73 74 #include <netinet/igmp_var.h> 75 #include <netinet/ip6.h> 76 #include <netinet/icmp6.h> 77 #include <netinet/sctp.h> 78 79 #include <inet/ip.h> 80 #include <inet/ip_impl.h> 81 #include <inet/ip6.h> 82 #include <inet/ip6_asp.h> 83 #include <inet/optcom.h> 84 #include <inet/tcp.h> 85 #include <inet/tcp_impl.h> 86 #include <inet/ip_multi.h> 87 #include <inet/ip_if.h> 88 #include <inet/ip_ire.h> 89 #include <inet/ip_ftable.h> 90 #include <inet/ip_rts.h> 91 #include <inet/ip_ndp.h> 92 #include <inet/ip_listutils.h> 93 #include <netinet/igmp.h> 94 #include <netinet/ip_mroute.h> 95 #include <inet/ipp_common.h> 96 97 #include <net/pfkeyv2.h> 98 #include <inet/sadb.h> 99 #include <inet/ipsec_impl.h> 100 #include <inet/ipdrop.h> 101 #include <inet/ip_netinfo.h> 102 #include <inet/ilb_ip.h> 103 #include <sys/squeue_impl.h> 104 #include <sys/squeue.h> 105 106 #include <sys/ethernet.h> 107 #include <net/if_types.h> 108 #include <sys/cpuvar.h> 109 110 #include <ipp/ipp.h> 111 #include <ipp/ipp_impl.h> 112 #include <ipp/ipgpc/ipgpc.h> 113 114 #include <sys/pattr.h> 115 #include <inet/ipclassifier.h> 116 #include <inet/sctp_ip.h> 117 #include <inet/sctp/sctp_impl.h> 118 #include <inet/udp_impl.h> 119 #include <sys/sunddi.h> 120 121 #include <sys/tsol/label.h> 122 #include <sys/tsol/tnet.h> 123 124 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */ 125 126 #ifdef DEBUG 127 extern boolean_t skip_sctp_cksum; 128 #endif 129 130 static void ip_input_local_v4(ire_t *, mblk_t *, ipha_t *, 131 ip_recv_attr_t *); 132 133 static void ip_input_broadcast_v4(ire_t *, mblk_t *, ipha_t *, 134 ip_recv_attr_t *); 135 static void ip_input_multicast_v4(ire_t *, mblk_t *, ipha_t *, 136 ip_recv_attr_t *); 137 138 #pragma inline(ip_input_common_v4, ip_input_local_v4, ip_forward_xmit_v4) 139 140 /* 141 * Direct read side procedure capable of dealing with chains. GLDv3 based 142 * drivers call this function directly with mblk chains while STREAMS 143 * read side procedure ip_rput() calls this for single packet with ip_ring 144 * set to NULL to process one packet at a time. 145 * 146 * The ill will always be valid if this function is called directly from 147 * the driver. 148 * 149 * If ip_input() is called from GLDv3: 150 * 151 * - This must be a non-VLAN IP stream. 152 * - 'mp' is either an untagged or a special priority-tagged packet. 153 * - Any VLAN tag that was in the MAC header has been stripped. 154 * 155 * If the IP header in packet is not 32-bit aligned, every message in the 156 * chain will be aligned before further operations. This is required on SPARC 157 * platform. 158 */ 159 void 160 ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 161 struct mac_header_info_s *mhip) 162 { 163 (void) ip_input_common_v4(ill, ip_ring, mp_chain, mhip, NULL, NULL, 164 NULL); 165 } 166 167 /* 168 * ip_accept_tcp() - This function is called by the squeue when it retrieves 169 * a chain of packets in the poll mode. The packets have gone through the 170 * data link processing but not IP processing. For performance and latency 171 * reasons, the squeue wants to process the chain in line instead of feeding 172 * it back via ip_input path. 173 * 174 * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v4 175 * will pass back any TCP packets matching the target sqp to 176 * ip_input_common_v4 using ira_target_sqp_mp. Other packets are handled by 177 * ip_input_v4 and ip_fanout_v4 as normal. 178 * The TCP packets that match the target squeue are returned to the caller 179 * as a b_next chain after each packet has been prepend with an mblk 180 * from ip_recv_attr_to_mblk. 181 */ 182 mblk_t * 183 ip_accept_tcp(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp, 184 mblk_t *mp_chain, mblk_t **last, uint_t *cnt) 185 { 186 return (ip_input_common_v4(ill, ip_ring, mp_chain, NULL, target_sqp, 187 last, cnt)); 188 } 189 190 /* 191 * Used by ip_input and ip_accept_tcp 192 * The last three arguments are only used by ip_accept_tcp, and mhip is 193 * only used by ip_input. 194 */ 195 mblk_t * 196 ip_input_common_v4(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 197 struct mac_header_info_s *mhip, squeue_t *target_sqp, 198 mblk_t **last, uint_t *cnt) 199 { 200 mblk_t *mp; 201 ipha_t *ipha; 202 ip_recv_attr_t iras; /* Receive attributes */ 203 rtc_t rtc; 204 iaflags_t chain_flags = 0; /* Fixed for chain */ 205 mblk_t *ahead = NULL; /* Accepted head */ 206 mblk_t *atail = NULL; /* Accepted tail */ 207 uint_t acnt = 0; /* Accepted count */ 208 209 ASSERT(mp_chain != NULL); 210 ASSERT(ill != NULL); 211 212 /* These ones do not change as we loop over packets */ 213 iras.ira_ill = iras.ira_rill = ill; 214 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 215 iras.ira_rifindex = iras.ira_ruifindex; 216 iras.ira_sqp = NULL; 217 iras.ira_ring = ip_ring; 218 /* For ECMP and outbound transmit ring selection */ 219 iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring); 220 221 iras.ira_target_sqp = target_sqp; 222 iras.ira_target_sqp_mp = NULL; 223 if (target_sqp != NULL) 224 chain_flags |= IRAF_TARGET_SQP; 225 226 /* 227 * We try to have a mhip pointer when possible, but 228 * it might be NULL in some cases. In those cases we 229 * have to assume unicast. 230 */ 231 iras.ira_mhip = mhip; 232 iras.ira_flags = 0; 233 if (mhip != NULL) { 234 switch (mhip->mhi_dsttype) { 235 case MAC_ADDRTYPE_MULTICAST : 236 chain_flags |= IRAF_L2DST_MULTICAST; 237 break; 238 case MAC_ADDRTYPE_BROADCAST : 239 chain_flags |= IRAF_L2DST_BROADCAST; 240 break; 241 } 242 } 243 244 /* 245 * Initialize the one-element route cache. 246 * 247 * We do ire caching from one iteration to 248 * another. In the event the packet chain contains 249 * all packets from the same dst, this caching saves 250 * an ire_route_recursive for each of the succeeding 251 * packets in a packet chain. 252 */ 253 rtc.rtc_ire = NULL; 254 rtc.rtc_ipaddr = INADDR_ANY; 255 256 /* Loop over b_next */ 257 for (mp = mp_chain; mp != NULL; mp = mp_chain) { 258 mp_chain = mp->b_next; 259 mp->b_next = NULL; 260 261 ASSERT(DB_TYPE(mp) == M_DATA); 262 263 264 /* 265 * if db_ref > 1 then copymsg and free original. Packet 266 * may be changed and we do not want the other entity 267 * who has a reference to this message to trip over the 268 * changes. This is a blind change because trying to 269 * catch all places that might change the packet is too 270 * difficult. 271 * 272 * This corresponds to the fast path case, where we have 273 * a chain of M_DATA mblks. We check the db_ref count 274 * of only the 1st data block in the mblk chain. There 275 * doesn't seem to be a reason why a device driver would 276 * send up data with varying db_ref counts in the mblk 277 * chain. In any case the Fast path is a private 278 * interface, and our drivers don't do such a thing. 279 * Given the above assumption, there is no need to walk 280 * down the entire mblk chain (which could have a 281 * potential performance problem) 282 * 283 * The "(DB_REF(mp) > 1)" check was moved from ip_rput() 284 * to here because of exclusive ip stacks and vnics. 285 * Packets transmitted from exclusive stack over vnic 286 * can have db_ref > 1 and when it gets looped back to 287 * another vnic in a different zone, you have ip_input() 288 * getting dblks with db_ref > 1. So if someone 289 * complains of TCP performance under this scenario, 290 * take a serious look here on the impact of copymsg(). 291 */ 292 if (DB_REF(mp) > 1) { 293 if ((mp = ip_fix_dbref(mp, &iras)) == NULL) { 294 /* mhip might point into 1st packet in chain */ 295 iras.ira_mhip = NULL; 296 continue; 297 } 298 } 299 300 /* 301 * IP header ptr not aligned? 302 * OR IP header not complete in first mblk 303 */ 304 ipha = (ipha_t *)mp->b_rptr; 305 if (!OK_32PTR(ipha) || MBLKL(mp) < IP_SIMPLE_HDR_LENGTH) { 306 mp = ip_check_and_align_header(mp, IP_SIMPLE_HDR_LENGTH, 307 &iras); 308 if (mp == NULL) { 309 /* mhip might point into 1st packet in chain */ 310 iras.ira_mhip = NULL; 311 continue; 312 } 313 ipha = (ipha_t *)mp->b_rptr; 314 } 315 316 /* Protect against a mix of Ethertypes and IP versions */ 317 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 318 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 319 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 320 freemsg(mp); 321 /* mhip might point into 1st packet in the chain. */ 322 iras.ira_mhip = NULL; 323 continue; 324 } 325 326 /* 327 * Check for Martian addrs; we have to explicitly 328 * test for for zero dst since this is also used as 329 * an indication that the rtc is not used. 330 */ 331 if (ipha->ipha_dst == INADDR_ANY) { 332 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 333 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 334 freemsg(mp); 335 /* mhip might point into 1st packet in the chain. */ 336 iras.ira_mhip = NULL; 337 continue; 338 } 339 340 /* 341 * Keep L2SRC from a previous packet in chain since mhip 342 * might point into an earlier packet in the chain. 343 * Keep IRAF_VERIFIED_SRC to avoid redoing broadcast 344 * source check in forwarding path. 345 */ 346 chain_flags |= (iras.ira_flags & 347 (IRAF_L2SRC_SET|IRAF_VERIFIED_SRC)); 348 349 iras.ira_flags = IRAF_IS_IPV4 | IRAF_VERIFY_IP_CKSUM | 350 IRAF_VERIFY_ULP_CKSUM | chain_flags; 351 iras.ira_free_flags = 0; 352 iras.ira_cred = NULL; 353 iras.ira_cpid = NOPID; 354 iras.ira_tsl = NULL; 355 iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */ 356 357 /* 358 * We must count all incoming packets, even if they end 359 * up being dropped later on. Defer counting bytes until 360 * we have the whole IP header in first mblk. 361 */ 362 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 363 364 iras.ira_pktlen = ntohs(ipha->ipha_length); 365 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 366 iras.ira_pktlen); 367 368 /* 369 * Call one of: 370 * ill_input_full_v4 371 * ill_input_short_v4 372 * The former is used in unusual cases. See ill_set_inputfn(). 373 */ 374 (*ill->ill_inputfn)(mp, ipha, &ipha->ipha_dst, &iras, &rtc); 375 376 /* Any references to clean up? No hold on ira_ill */ 377 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED)) 378 ira_cleanup(&iras, B_FALSE); 379 380 if (iras.ira_target_sqp_mp != NULL) { 381 /* Better be called from ip_accept_tcp */ 382 ASSERT(target_sqp != NULL); 383 384 /* Found one packet to accept */ 385 mp = iras.ira_target_sqp_mp; 386 iras.ira_target_sqp_mp = NULL; 387 ASSERT(ip_recv_attr_is_mblk(mp)); 388 389 if (atail != NULL) 390 atail->b_next = mp; 391 else 392 ahead = mp; 393 atail = mp; 394 acnt++; 395 mp = NULL; 396 } 397 /* mhip might point into 1st packet in the chain. */ 398 iras.ira_mhip = NULL; 399 } 400 /* Any remaining references to the route cache? */ 401 if (rtc.rtc_ire != NULL) { 402 ASSERT(rtc.rtc_ipaddr != INADDR_ANY); 403 ire_refrele(rtc.rtc_ire); 404 } 405 406 if (ahead != NULL) { 407 /* Better be called from ip_accept_tcp */ 408 ASSERT(target_sqp != NULL); 409 *last = atail; 410 *cnt = acnt; 411 return (ahead); 412 } 413 414 return (NULL); 415 } 416 417 /* 418 * This input function is used when 419 * - is_system_labeled() 420 * - CGTP filtering 421 * - DHCP unicast before we have an IP address configured 422 * - there is an listener for IPPROTO_RSVP 423 */ 424 void 425 ill_input_full_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg, 426 ip_recv_attr_t *ira, rtc_t *rtc) 427 { 428 ipha_t *ipha = (ipha_t *)iph_arg; 429 ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg; 430 ill_t *ill = ira->ira_ill; 431 ip_stack_t *ipst = ill->ill_ipst; 432 int cgtp_flt_pkt; 433 434 ASSERT(ira->ira_tsl == NULL); 435 436 /* 437 * Attach any necessary label information to 438 * this packet 439 */ 440 if (is_system_labeled()) { 441 ira->ira_flags |= IRAF_SYSTEM_LABELED; 442 443 /* 444 * This updates ira_cred, ira_tsl and ira_free_flags based 445 * on the label. 446 */ 447 if (!tsol_get_pkt_label(mp, IPV4_VERSION, ira)) { 448 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 449 ip_drop_input("ipIfStatsInDiscards", mp, ill); 450 freemsg(mp); 451 return; 452 } 453 /* Note that ira_tsl can be NULL here. */ 454 455 /* tsol_get_pkt_label sometimes does pullupmsg */ 456 ipha = (ipha_t *)mp->b_rptr; 457 } 458 459 /* 460 * Invoke the CGTP (multirouting) filtering module to process 461 * the incoming packet. Packets identified as duplicates 462 * must be discarded. Filtering is active only if the 463 * the ip_cgtp_filter ndd variable is non-zero. 464 */ 465 cgtp_flt_pkt = CGTP_IP_PKT_NOT_CGTP; 466 if (ipst->ips_ip_cgtp_filter && 467 ipst->ips_ip_cgtp_filter_ops != NULL) { 468 netstackid_t stackid; 469 470 stackid = ipst->ips_netstack->netstack_stackid; 471 /* 472 * CGTP and IPMP are mutually exclusive so 473 * phyint_ifindex is fine here. 474 */ 475 cgtp_flt_pkt = 476 ipst->ips_ip_cgtp_filter_ops->cfo_filter(stackid, 477 ill->ill_phyint->phyint_ifindex, mp); 478 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 479 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill); 480 freemsg(mp); 481 return; 482 } 483 } 484 485 /* 486 * Brutal hack for DHCPv4 unicast: RFC2131 allows a DHCP 487 * server to unicast DHCP packets to a DHCP client using the 488 * IP address it is offering to the client. This can be 489 * disabled through the "broadcast bit", but not all DHCP 490 * servers honor that bit. Therefore, to interoperate with as 491 * many DHCP servers as possible, the DHCP client allows the 492 * server to unicast, but we treat those packets as broadcast 493 * here. Note that we don't rewrite the packet itself since 494 * (a) that would mess up the checksums and (b) the DHCP 495 * client conn is bound to INADDR_ANY so ip_fanout_udp() will 496 * hand it the packet regardless. 497 */ 498 if (ill->ill_dhcpinit != 0 && 499 ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION && 500 ipha->ipha_protocol == IPPROTO_UDP) { 501 udpha_t *udpha; 502 503 ipha = ip_pullup(mp, sizeof (ipha_t) + sizeof (udpha_t), ira); 504 if (ipha == NULL) { 505 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 506 ip_drop_input("ipIfStatsInDiscards - dhcp", mp, ill); 507 freemsg(mp); 508 return; 509 } 510 /* Reload since pullupmsg() can change b_rptr. */ 511 udpha = (udpha_t *)&ipha[1]; 512 513 if (ntohs(udpha->uha_dst_port) == IPPORT_BOOTPC) { 514 DTRACE_PROBE2(ip4__dhcpinit__pkt, ill_t *, ill, 515 mblk_t *, mp); 516 /* 517 * This assumes that we deliver to all conns for 518 * multicast and broadcast packets. 519 */ 520 nexthop = INADDR_BROADCAST; 521 ira->ira_flags |= IRAF_DHCP_UNICAST; 522 } 523 } 524 525 /* 526 * If rsvpd is running, let RSVP daemon handle its processing 527 * and forwarding of RSVP multicast/unicast packets. 528 * If rsvpd is not running but mrouted is running, RSVP 529 * multicast packets are forwarded as multicast traffic 530 * and RSVP unicast packets are forwarded by unicast router. 531 * If neither rsvpd nor mrouted is running, RSVP multicast 532 * packets are not forwarded, but the unicast packets are 533 * forwarded like unicast traffic. 534 */ 535 if (ipha->ipha_protocol == IPPROTO_RSVP && 536 ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) { 537 /* RSVP packet and rsvpd running. Treat as ours */ 538 ip2dbg(("ip_input: RSVP for us: 0x%x\n", ntohl(nexthop))); 539 /* 540 * We use a multicast address to get the packet to 541 * ire_recv_multicast_v4. There will not be a membership 542 * check since we set IRAF_RSVP 543 */ 544 nexthop = htonl(INADDR_UNSPEC_GROUP); 545 ira->ira_flags |= IRAF_RSVP; 546 } 547 548 ill_input_short_v4(mp, ipha, &nexthop, ira, rtc); 549 } 550 551 /* 552 * This is the tail-end of the full receive side packet handling. 553 * It can be used directly when the configuration is simple. 554 */ 555 void 556 ill_input_short_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg, 557 ip_recv_attr_t *ira, rtc_t *rtc) 558 { 559 ire_t *ire; 560 uint_t opt_len; 561 ill_t *ill = ira->ira_ill; 562 ip_stack_t *ipst = ill->ill_ipst; 563 uint_t pkt_len; 564 ssize_t len; 565 ipha_t *ipha = (ipha_t *)iph_arg; 566 ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg; 567 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; 568 uint_t irr_flags; 569 #define rptr ((uchar_t *)ipha) 570 571 ASSERT(DB_TYPE(mp) == M_DATA); 572 573 /* 574 * The following test for loopback is faster than 575 * IP_LOOPBACK_ADDR(), because it avoids any bitwise 576 * operations. 577 * Note that these addresses are always in network byte order 578 */ 579 if (((*(uchar_t *)&ipha->ipha_dst) == IN_LOOPBACKNET) || 580 ((*(uchar_t *)&ipha->ipha_src) == IN_LOOPBACKNET)) { 581 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 582 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 583 freemsg(mp); 584 return; 585 } 586 587 len = mp->b_wptr - rptr; 588 pkt_len = ira->ira_pktlen; 589 590 /* multiple mblk or too short */ 591 len -= pkt_len; 592 if (len != 0) { 593 mp = ip_check_length(mp, rptr, len, pkt_len, 594 IP_SIMPLE_HDR_LENGTH, ira); 595 if (mp == NULL) 596 return; 597 ipha = (ipha_t *)mp->b_rptr; 598 } 599 600 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 601 ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL, 602 int, 0); 603 604 /* 605 * The event for packets being received from a 'physical' 606 * interface is placed after validation of the source and/or 607 * destination address as being local so that packets can be 608 * redirected to loopback addresses using ipnat. 609 */ 610 DTRACE_PROBE4(ip4__physical__in__start, 611 ill_t *, ill, ill_t *, NULL, 612 ipha_t *, ipha, mblk_t *, mp); 613 614 if (HOOKS4_INTERESTED_PHYSICAL_IN(ipst)) { 615 int ll_multicast = 0; 616 int error; 617 ipaddr_t orig_dst = ipha->ipha_dst; 618 619 if (ira->ira_flags & IRAF_L2DST_MULTICAST) 620 ll_multicast = HPE_MULTICAST; 621 else if (ira->ira_flags & IRAF_L2DST_BROADCAST) 622 ll_multicast = HPE_BROADCAST; 623 624 FW_HOOKS(ipst->ips_ip4_physical_in_event, 625 ipst->ips_ipv4firewall_physical_in, 626 ill, NULL, ipha, mp, mp, ll_multicast, ipst, error); 627 628 DTRACE_PROBE1(ip4__physical__in__end, mblk_t *, mp); 629 630 if (mp == NULL) 631 return; 632 /* The length could have changed */ 633 ipha = (ipha_t *)mp->b_rptr; 634 ira->ira_pktlen = ntohs(ipha->ipha_length); 635 pkt_len = ira->ira_pktlen; 636 637 /* 638 * In case the destination changed we override any previous 639 * change to nexthop. 640 */ 641 if (orig_dst != ipha->ipha_dst) 642 nexthop = ipha->ipha_dst; 643 if (nexthop == INADDR_ANY) { 644 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 645 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 646 freemsg(mp); 647 return; 648 } 649 } 650 651 if (ipst->ips_ip4_observe.he_interested) { 652 zoneid_t dzone; 653 654 /* 655 * On the inbound path the src zone will be unknown as 656 * this packet has come from the wire. 657 */ 658 dzone = ip_get_zoneid_v4(nexthop, mp, ira, ALL_ZONES); 659 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst); 660 } 661 662 /* 663 * If there is a good HW IP header checksum we clear the need 664 * look at the IP header checksum. 665 */ 666 if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) && 667 ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 668 /* Header checksum was ok. Clear the flag */ 669 DB_CKSUMFLAGS(mp) &= ~HCK_IPV4_HDRCKSUM; 670 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 671 } 672 673 /* 674 * Here we check to see if we machine is setup as 675 * L3 loadbalancer and if the incoming packet is for a VIP 676 * 677 * Check the following: 678 * - there is at least a rule 679 * - protocol of the packet is supported 680 */ 681 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ipha->ipha_protocol)) { 682 ipaddr_t lb_dst; 683 int lb_ret; 684 685 /* For convenience, we pull up the mblk. */ 686 if (mp->b_cont != NULL) { 687 if (pullupmsg(mp, -1) == 0) { 688 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 689 ip_drop_input("ipIfStatsInDiscards - pullupmsg", 690 mp, ill); 691 freemsg(mp); 692 return; 693 } 694 ipha = (ipha_t *)mp->b_rptr; 695 } 696 697 /* 698 * We just drop all fragments going to any VIP, at 699 * least for now.... 700 */ 701 if (ntohs(ipha->ipha_fragment_offset_and_flags) & 702 (IPH_MF | IPH_OFFSET)) { 703 if (!ilb_rule_match_vip_v4(ilbs, nexthop, NULL)) { 704 goto after_ilb; 705 } 706 707 ILB_KSTAT_UPDATE(ilbs, ip_frag_in, 1); 708 ILB_KSTAT_UPDATE(ilbs, ip_frag_dropped, 1); 709 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 710 ip_drop_input("ILB fragment", mp, ill); 711 freemsg(mp); 712 return; 713 } 714 lb_ret = ilb_check_v4(ilbs, ill, mp, ipha, ipha->ipha_protocol, 715 (uint8_t *)ipha + IPH_HDR_LENGTH(ipha), &lb_dst); 716 717 if (lb_ret == ILB_DROPPED) { 718 /* Is this the right counter to increase? */ 719 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 720 ip_drop_input("ILB_DROPPED", mp, ill); 721 freemsg(mp); 722 return; 723 } 724 if (lb_ret == ILB_BALANCED) { 725 /* Set the dst to that of the chosen server */ 726 nexthop = lb_dst; 727 DB_CKSUMFLAGS(mp) = 0; 728 } 729 } 730 731 after_ilb: 732 opt_len = ipha->ipha_version_and_hdr_length - IP_SIMPLE_HDR_VERSION; 733 ira->ira_ip_hdr_length = IP_SIMPLE_HDR_LENGTH; 734 if (opt_len != 0) { 735 int error = 0; 736 737 ira->ira_ip_hdr_length += (opt_len << 2); 738 ira->ira_flags |= IRAF_IPV4_OPTIONS; 739 740 /* IP Options present! Validate the length. */ 741 mp = ip_check_optlen(mp, ipha, opt_len, pkt_len, ira); 742 if (mp == NULL) 743 return; 744 745 /* Might have changed */ 746 ipha = (ipha_t *)mp->b_rptr; 747 748 /* Verify IP header checksum before parsing the options */ 749 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && 750 ip_csum_hdr(ipha)) { 751 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 752 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 753 freemsg(mp); 754 return; 755 } 756 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 757 758 /* 759 * Go off to ip_input_options which returns the next hop 760 * destination address, which may have been affected 761 * by source routing. 762 */ 763 IP_STAT(ipst, ip_opt); 764 765 nexthop = ip_input_options(ipha, nexthop, mp, ira, &error); 766 if (error != 0) { 767 /* 768 * An ICMP error has been sent and the packet has 769 * been dropped. 770 */ 771 return; 772 } 773 } 774 775 if (ill->ill_flags & ILLF_ROUTER) 776 irr_flags = IRR_ALLOCATE; 777 else 778 irr_flags = IRR_NONE; 779 780 /* Can not use route cache with TX since the labels can differ */ 781 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 782 if (CLASSD(nexthop)) { 783 ire = ire_multicast(ill); 784 } else { 785 /* Match destination and label */ 786 ire = ire_route_recursive_v4(nexthop, 0, NULL, 787 ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR, 788 irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, 789 NULL); 790 } 791 /* Update the route cache so we do the ire_refrele */ 792 ASSERT(ire != NULL); 793 if (rtc->rtc_ire != NULL) 794 ire_refrele(rtc->rtc_ire); 795 rtc->rtc_ire = ire; 796 rtc->rtc_ipaddr = nexthop; 797 } else if (nexthop == rtc->rtc_ipaddr && rtc->rtc_ire != NULL) { 798 /* Use the route cache */ 799 ire = rtc->rtc_ire; 800 } else { 801 /* Update the route cache */ 802 if (CLASSD(nexthop)) { 803 ire = ire_multicast(ill); 804 } else { 805 /* Just match the destination */ 806 ire = ire_route_recursive_dstonly_v4(nexthop, irr_flags, 807 ira->ira_xmit_hint, ipst); 808 } 809 ASSERT(ire != NULL); 810 if (rtc->rtc_ire != NULL) 811 ire_refrele(rtc->rtc_ire); 812 rtc->rtc_ire = ire; 813 rtc->rtc_ipaddr = nexthop; 814 } 815 816 ire->ire_ib_pkt_count++; 817 818 /* 819 * Based on ire_type and ire_flags call one of: 820 * ire_recv_local_v4 - for IRE_LOCAL 821 * ire_recv_loopback_v4 - for IRE_LOOPBACK 822 * ire_recv_multirt_v4 - if RTF_MULTIRT 823 * ire_recv_noroute_v4 - if RTF_REJECT or RTF_BLACHOLE 824 * ire_recv_multicast_v4 - for IRE_MULTICAST 825 * ire_recv_broadcast_v4 - for IRE_BROADCAST 826 * ire_recv_noaccept_v4 - for ire_noaccept ones 827 * ire_recv_forward_v4 - for the rest. 828 */ 829 (*ire->ire_recvfn)(ire, mp, ipha, ira); 830 } 831 #undef rptr 832 833 /* 834 * ire_recvfn for IREs that need forwarding 835 */ 836 void 837 ire_recv_forward_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 838 { 839 ipha_t *ipha = (ipha_t *)iph_arg; 840 ill_t *ill = ira->ira_ill; 841 ip_stack_t *ipst = ill->ill_ipst; 842 ill_t *dst_ill; 843 nce_t *nce; 844 ipaddr_t src = ipha->ipha_src; 845 uint32_t added_tx_len; 846 uint32_t mtu, iremtu; 847 848 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 849 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 850 ip_drop_input("l2 multicast not forwarded", mp, ill); 851 freemsg(mp); 852 return; 853 } 854 855 if (!(ill->ill_flags & ILLF_ROUTER) && !ip_source_routed(ipha, ipst)) { 856 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 857 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 858 freemsg(mp); 859 return; 860 } 861 862 /* 863 * Either ire_nce_capable or ire_dep_parent would be set for the IRE 864 * when it is found by ire_route_recursive, but that some other thread 865 * could have changed the routes with the effect of clearing 866 * ire_dep_parent. In that case we'd end up dropping the packet, or 867 * finding a new nce below. 868 * Get, allocate, or update the nce. 869 * We get a refhold on ire_nce_cache as a result of this to avoid races 870 * where ire_nce_cache is deleted. 871 * 872 * This ensures that we don't forward if the interface is down since 873 * ipif_down removes all the nces. 874 */ 875 mutex_enter(&ire->ire_lock); 876 nce = ire->ire_nce_cache; 877 if (nce == NULL) { 878 /* Not yet set up - try to set one up */ 879 mutex_exit(&ire->ire_lock); 880 (void) ire_revalidate_nce(ire); 881 mutex_enter(&ire->ire_lock); 882 nce = ire->ire_nce_cache; 883 if (nce == NULL) { 884 mutex_exit(&ire->ire_lock); 885 /* The ire_dep_parent chain went bad, or no memory */ 886 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 887 ip_drop_input("No ire_dep_parent", mp, ill); 888 freemsg(mp); 889 return; 890 } 891 } 892 nce_refhold(nce); 893 mutex_exit(&ire->ire_lock); 894 895 if (nce->nce_is_condemned) { 896 nce_t *nce1; 897 898 nce1 = ire_handle_condemned_nce(nce, ire, ipha, NULL, B_FALSE); 899 nce_refrele(nce); 900 if (nce1 == NULL) { 901 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 902 ip_drop_input("No nce", mp, ill); 903 freemsg(mp); 904 return; 905 } 906 nce = nce1; 907 } 908 dst_ill = nce->nce_ill; 909 910 /* 911 * Unless we are forwarding, drop the packet. 912 * We have to let source routed packets through if they go out 913 * the same interface i.e., they are 'ping -l' packets. 914 */ 915 if (!(dst_ill->ill_flags & ILLF_ROUTER) && 916 !(ip_source_routed(ipha, ipst) && dst_ill == ill)) { 917 if (ip_source_routed(ipha, ipst)) { 918 ip_drop_input("ICMP_SOURCE_ROUTE_FAILED", mp, ill); 919 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira); 920 nce_refrele(nce); 921 return; 922 } 923 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 924 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 925 freemsg(mp); 926 nce_refrele(nce); 927 return; 928 } 929 930 if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) { 931 ipaddr_t dst = ipha->ipha_dst; 932 933 ire->ire_ib_pkt_count--; 934 /* 935 * Should only use IREs that are visible from the 936 * global zone for forwarding. 937 * Take a source route into account the same way as ip_input 938 * did. 939 */ 940 if (ira->ira_flags & IRAF_IPV4_OPTIONS) { 941 int error = 0; 942 943 dst = ip_input_options(ipha, dst, mp, ira, &error); 944 ASSERT(error == 0); /* ip_input checked */ 945 } 946 ire = ire_route_recursive_v4(dst, 0, NULL, GLOBAL_ZONEID, 947 ira->ira_tsl, MATCH_IRE_SECATTR, 948 (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE, 949 ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 950 ire->ire_ib_pkt_count++; 951 (*ire->ire_recvfn)(ire, mp, ipha, ira); 952 ire_refrele(ire); 953 nce_refrele(nce); 954 return; 955 } 956 957 /* 958 * ipIfStatsHCInForwDatagrams should only be increment if there 959 * will be an attempt to forward the packet, which is why we 960 * increment after the above condition has been checked. 961 */ 962 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 963 964 /* Initiate Read side IPPF processing */ 965 if (IPP_ENABLED(IPP_FWD_IN, ipst)) { 966 /* ip_process translates an IS_UNDER_IPMP */ 967 mp = ip_process(IPP_FWD_IN, mp, ill, ill); 968 if (mp == NULL) { 969 /* ip_drop_packet and MIB done */ 970 ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred " 971 "during IPPF processing\n")); 972 nce_refrele(nce); 973 return; 974 } 975 } 976 977 DTRACE_PROBE4(ip4__forwarding__start, 978 ill_t *, ill, ill_t *, dst_ill, ipha_t *, ipha, mblk_t *, mp); 979 980 if (HOOKS4_INTERESTED_FORWARDING(ipst)) { 981 int error; 982 983 FW_HOOKS(ipst->ips_ip4_forwarding_event, 984 ipst->ips_ipv4firewall_forwarding, 985 ill, dst_ill, ipha, mp, mp, 0, ipst, error); 986 987 DTRACE_PROBE1(ip4__forwarding__end, mblk_t *, mp); 988 989 if (mp == NULL) { 990 nce_refrele(nce); 991 return; 992 } 993 /* 994 * Even if the destination was changed by the filter we use the 995 * forwarding decision that was made based on the address 996 * in ip_input. 997 */ 998 999 /* Might have changed */ 1000 ipha = (ipha_t *)mp->b_rptr; 1001 ira->ira_pktlen = ntohs(ipha->ipha_length); 1002 } 1003 1004 /* Packet is being forwarded. Turning off hwcksum flag. */ 1005 DB_CKSUMFLAGS(mp) = 0; 1006 1007 /* 1008 * Martian Address Filtering [RFC 1812, Section 5.3.7] 1009 * The loopback address check for both src and dst has already 1010 * been checked in ip_input 1011 * In the future one can envision adding RPF checks using number 3. 1012 * If we already checked the same source address we can skip this. 1013 */ 1014 if (!(ira->ira_flags & IRAF_VERIFIED_SRC) || 1015 src != ira->ira_verified_src) { 1016 switch (ipst->ips_src_check) { 1017 case 0: 1018 break; 1019 case 2: 1020 if (ip_type_v4(src, ipst) == IRE_BROADCAST) { 1021 BUMP_MIB(ill->ill_ip_mib, 1022 ipIfStatsForwProhibits); 1023 BUMP_MIB(ill->ill_ip_mib, 1024 ipIfStatsInAddrErrors); 1025 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1026 freemsg(mp); 1027 nce_refrele(nce); 1028 return; 1029 } 1030 /* FALLTHRU */ 1031 1032 case 1: 1033 if (CLASSD(src)) { 1034 BUMP_MIB(ill->ill_ip_mib, 1035 ipIfStatsForwProhibits); 1036 BUMP_MIB(ill->ill_ip_mib, 1037 ipIfStatsInAddrErrors); 1038 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1039 freemsg(mp); 1040 nce_refrele(nce); 1041 return; 1042 } 1043 break; 1044 } 1045 /* Remember for next packet */ 1046 ira->ira_flags |= IRAF_VERIFIED_SRC; 1047 ira->ira_verified_src = src; 1048 } 1049 1050 /* 1051 * Check if packet is going out the same link on which it arrived. 1052 * Means we might need to send a redirect. 1053 */ 1054 if (IS_ON_SAME_LAN(dst_ill, ill) && ipst->ips_ip_g_send_redirects) { 1055 ip_send_potential_redirect_v4(mp, ipha, ire, ira); 1056 } 1057 1058 added_tx_len = 0; 1059 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 1060 mblk_t *mp1; 1061 uint32_t old_pkt_len = ira->ira_pktlen; 1062 1063 /* Verify IP header checksum before adding/removing options */ 1064 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && 1065 ip_csum_hdr(ipha)) { 1066 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1067 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1068 freemsg(mp); 1069 nce_refrele(nce); 1070 return; 1071 } 1072 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 1073 1074 /* 1075 * Check if it can be forwarded and add/remove 1076 * CIPSO options as needed. 1077 */ 1078 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) { 1079 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1080 ip_drop_input("tsol_ip_forward", mp, ill); 1081 freemsg(mp); 1082 nce_refrele(nce); 1083 return; 1084 } 1085 /* 1086 * Size may have changed. Remember amount added in case 1087 * IP needs to send an ICMP too big. 1088 */ 1089 mp = mp1; 1090 ipha = (ipha_t *)mp->b_rptr; 1091 ira->ira_pktlen = ntohs(ipha->ipha_length); 1092 ira->ira_ip_hdr_length = IPH_HDR_LENGTH(ipha); 1093 if (ira->ira_pktlen > old_pkt_len) 1094 added_tx_len = ira->ira_pktlen - old_pkt_len; 1095 1096 /* Options can have been added or removed */ 1097 if (ira->ira_ip_hdr_length != IP_SIMPLE_HDR_LENGTH) 1098 ira->ira_flags |= IRAF_IPV4_OPTIONS; 1099 else 1100 ira->ira_flags &= ~IRAF_IPV4_OPTIONS; 1101 } 1102 1103 mtu = dst_ill->ill_mtu; 1104 if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu) 1105 mtu = iremtu; 1106 ip_forward_xmit_v4(nce, ill, mp, ipha, ira, mtu, added_tx_len); 1107 nce_refrele(nce); 1108 } 1109 1110 /* 1111 * Used for sending out unicast and multicast packets that are 1112 * forwarded. 1113 */ 1114 void 1115 ip_forward_xmit_v4(nce_t *nce, ill_t *ill, mblk_t *mp, ipha_t *ipha, 1116 ip_recv_attr_t *ira, uint32_t mtu, uint32_t added_tx_len) 1117 { 1118 ill_t *dst_ill = nce->nce_ill; 1119 uint32_t pkt_len; 1120 uint32_t sum; 1121 iaflags_t iraflags = ira->ira_flags; 1122 ip_stack_t *ipst = ill->ill_ipst; 1123 iaflags_t ixaflags; 1124 1125 if (ipha->ipha_ttl <= 1) { 1126 /* Perhaps the checksum was bad */ 1127 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1128 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1129 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1130 freemsg(mp); 1131 return; 1132 } 1133 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1134 ip_drop_input("ICMP_TTL_EXCEEDED", mp, ill); 1135 icmp_time_exceeded(mp, ICMP_TTL_EXCEEDED, ira); 1136 return; 1137 } 1138 ipha->ipha_ttl--; 1139 /* Adjust the checksum to reflect the ttl decrement. */ 1140 sum = (int)ipha->ipha_hdr_checksum + IP_HDR_CSUM_TTL_ADJUST; 1141 ipha->ipha_hdr_checksum = (uint16_t)(sum + (sum >> 16)); 1142 1143 /* Check if there are options to update */ 1144 if (iraflags & IRAF_IPV4_OPTIONS) { 1145 ASSERT(ipha->ipha_version_and_hdr_length != 1146 IP_SIMPLE_HDR_VERSION); 1147 ASSERT(!(iraflags & IRAF_VERIFY_IP_CKSUM)); 1148 1149 if (!ip_forward_options(mp, ipha, dst_ill, ira)) { 1150 /* ipIfStatsForwProhibits and ip_drop_input done */ 1151 return; 1152 } 1153 1154 ipha->ipha_hdr_checksum = 0; 1155 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1156 } 1157 1158 /* Initiate Write side IPPF processing before any fragmentation */ 1159 if (IPP_ENABLED(IPP_FWD_OUT, ipst)) { 1160 /* ip_process translates an IS_UNDER_IPMP */ 1161 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill); 1162 if (mp == NULL) { 1163 /* ip_drop_packet and MIB done */ 1164 ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred" \ 1165 " during IPPF processing\n")); 1166 return; 1167 } 1168 } 1169 1170 pkt_len = ira->ira_pktlen; 1171 1172 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 1173 1174 ixaflags = IXAF_IS_IPV4 | IXAF_NO_DEV_FLOW_CTL; 1175 1176 if (pkt_len > mtu) { 1177 /* 1178 * It needs fragging on its way out. If we haven't 1179 * verified the header checksum yet we do it now since 1180 * are going to put a surely good checksum in the 1181 * outgoing header, we have to make sure that it 1182 * was good coming in. 1183 */ 1184 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1185 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1186 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1187 freemsg(mp); 1188 return; 1189 } 1190 if (ipha->ipha_fragment_offset_and_flags & IPH_DF_HTONS) { 1191 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails); 1192 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill); 1193 if (iraflags & IRAF_SYSTEM_LABELED) { 1194 /* 1195 * Remove any CIPSO option added by 1196 * tsol_ip_forward, and make sure we report 1197 * a path MTU so that there 1198 * is room to add such a CIPSO option for future 1199 * packets. 1200 */ 1201 mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, 1202 AF_INET); 1203 } 1204 1205 icmp_frag_needed(mp, mtu, ira); 1206 return; 1207 } 1208 1209 (void) ip_fragment_v4(mp, nce, ixaflags, pkt_len, mtu, 1210 ira->ira_xmit_hint, GLOBAL_ZONEID, 0, ip_xmit, NULL); 1211 return; 1212 } 1213 1214 ASSERT(pkt_len == ntohs(((ipha_t *)mp->b_rptr)->ipha_length)); 1215 if (iraflags & IRAF_LOOPBACK_COPY) { 1216 /* 1217 * IXAF_NO_LOOP_ZONEID is not set hence 7th arg 1218 * is don't care 1219 */ 1220 (void) ip_postfrag_loopcheck(mp, nce, 1221 ixaflags | IXAF_LOOPBACK_COPY, 1222 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1223 } else { 1224 (void) ip_xmit(mp, nce, ixaflags, pkt_len, ira->ira_xmit_hint, 1225 GLOBAL_ZONEID, 0, NULL); 1226 } 1227 } 1228 1229 /* 1230 * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE, 1231 * which is what ire_route_recursive returns when there is no matching ire. 1232 * Send ICMP unreachable unless blackhole. 1233 */ 1234 void 1235 ire_recv_noroute_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1236 { 1237 ipha_t *ipha = (ipha_t *)iph_arg; 1238 ill_t *ill = ira->ira_ill; 1239 ip_stack_t *ipst = ill->ill_ipst; 1240 1241 /* Would we have forwarded this packet if we had a route? */ 1242 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 1243 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1244 ip_drop_input("l2 multicast not forwarded", mp, ill); 1245 freemsg(mp); 1246 return; 1247 } 1248 1249 if (!(ill->ill_flags & ILLF_ROUTER)) { 1250 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1251 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 1252 freemsg(mp); 1253 return; 1254 } 1255 /* 1256 * If we had a route this could have been forwarded. Count as such. 1257 * 1258 * ipIfStatsHCInForwDatagrams should only be increment if there 1259 * will be an attempt to forward the packet, which is why we 1260 * increment after the above condition has been checked. 1261 */ 1262 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 1263 1264 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1265 1266 ip_rts_change(RTM_MISS, ipha->ipha_dst, 0, 0, 0, 0, 0, 0, RTA_DST, 1267 ipst); 1268 1269 if (ire->ire_flags & RTF_BLACKHOLE) { 1270 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill); 1271 freemsg(mp); 1272 } else { 1273 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill); 1274 1275 if (ip_source_routed(ipha, ipst)) { 1276 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira); 1277 } else { 1278 icmp_unreachable(mp, ICMP_HOST_UNREACHABLE, ira); 1279 } 1280 } 1281 } 1282 1283 /* 1284 * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for 1285 * VRRP when in noaccept mode. 1286 * We silently drop the packet. ARP handles packets even if noaccept is set. 1287 */ 1288 /* ARGSUSED */ 1289 void 1290 ire_recv_noaccept_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1291 ip_recv_attr_t *ira) 1292 { 1293 ill_t *ill = ira->ira_ill; 1294 1295 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1296 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1297 freemsg(mp); 1298 } 1299 1300 /* 1301 * ire_recvfn for IRE_BROADCAST. 1302 */ 1303 void 1304 ire_recv_broadcast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1305 ip_recv_attr_t *ira) 1306 { 1307 ipha_t *ipha = (ipha_t *)iph_arg; 1308 ill_t *ill = ira->ira_ill; 1309 ill_t *dst_ill = ire->ire_ill; 1310 ip_stack_t *ipst = ill->ill_ipst; 1311 ire_t *alt_ire; 1312 nce_t *nce; 1313 ipaddr_t ipha_dst; 1314 1315 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInBcastPkts); 1316 1317 /* Tag for higher-level protocols */ 1318 ira->ira_flags |= IRAF_BROADCAST; 1319 1320 /* 1321 * Whether local or directed broadcast forwarding: don't allow 1322 * for TCP. 1323 */ 1324 if (ipha->ipha_protocol == IPPROTO_TCP) { 1325 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1326 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1327 freemsg(mp); 1328 return; 1329 } 1330 1331 /* 1332 * So that we don't end up with dups, only one ill an IPMP group is 1333 * nominated to receive broadcast traffic. 1334 * If we have no cast_ill we are liberal and accept everything. 1335 */ 1336 if (IS_UNDER_IPMP(ill)) { 1337 /* For an under ill_grp can change under lock */ 1338 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1339 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1340 ill->ill_grp->ig_cast_ill != NULL) { 1341 rw_exit(&ipst->ips_ill_g_lock); 1342 /* No MIB since this is normal operation */ 1343 ip_drop_input("not nom_cast", mp, ill); 1344 freemsg(mp); 1345 return; 1346 } 1347 rw_exit(&ipst->ips_ill_g_lock); 1348 1349 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1350 } 1351 1352 /* 1353 * After reassembly and IPsec we will need to duplicate the 1354 * broadcast packet for all matching zones on the ill. 1355 */ 1356 ira->ira_zoneid = ALL_ZONES; 1357 1358 /* 1359 * Check for directed broadcast i.e. ire->ire_ill is different than 1360 * the incoming ill. 1361 * The same broadcast address can be assigned to multiple interfaces 1362 * so have to check explicitly for that case by looking up the alt_ire 1363 */ 1364 if (dst_ill == ill && !(ire->ire_flags & RTF_MULTIRT)) { 1365 /* Reassemble on the ill on which the packet arrived */ 1366 ip_input_local_v4(ire, mp, ipha, ira); 1367 /* Restore */ 1368 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1369 return; 1370 } 1371 1372 /* Is there an IRE_BROADCAST on the incoming ill? */ 1373 ipha_dst = ((ira->ira_flags & IRAF_DHCP_UNICAST) ? INADDR_BROADCAST : 1374 ipha->ipha_dst); 1375 alt_ire = ire_ftable_lookup_v4(ipha_dst, 0, 0, IRE_BROADCAST, ill, 1376 ALL_ZONES, ira->ira_tsl, 1377 MATCH_IRE_TYPE|MATCH_IRE_ILL|MATCH_IRE_SECATTR, 0, ipst, NULL); 1378 if (alt_ire != NULL) { 1379 /* Not a directed broadcast */ 1380 /* 1381 * In the special case of multirouted broadcast 1382 * packets, we unconditionally need to "gateway" 1383 * them to the appropriate interface here so that reassembly 1384 * works. We know that the IRE_BROADCAST on cgtp0 doesn't 1385 * have RTF_MULTIRT set so we look for such an IRE in the 1386 * bucket. 1387 */ 1388 if (alt_ire->ire_flags & RTF_MULTIRT) { 1389 irb_t *irb; 1390 ire_t *ire1; 1391 1392 irb = ire->ire_bucket; 1393 irb_refhold(irb); 1394 for (ire1 = irb->irb_ire; ire1 != NULL; 1395 ire1 = ire1->ire_next) { 1396 if (IRE_IS_CONDEMNED(ire1)) 1397 continue; 1398 if (!(ire1->ire_type & IRE_BROADCAST) || 1399 (ire1->ire_flags & RTF_MULTIRT)) 1400 continue; 1401 ill = ire1->ire_ill; 1402 ill_refhold(ill); 1403 break; 1404 } 1405 irb_refrele(irb); 1406 if (ire1 != NULL) { 1407 ill_t *orig_ill = ira->ira_ill; 1408 1409 ire_refrele(alt_ire); 1410 /* Reassemble on the new ill */ 1411 ira->ira_ill = ill; 1412 ip_input_local_v4(ire, mp, ipha, ira); 1413 ill_refrele(ill); 1414 /* Restore */ 1415 ira->ira_ill = orig_ill; 1416 ira->ira_ruifindex = 1417 orig_ill->ill_phyint->phyint_ifindex; 1418 return; 1419 } 1420 } 1421 ire_refrele(alt_ire); 1422 /* Reassemble on the ill on which the packet arrived */ 1423 ip_input_local_v4(ire, mp, ipha, ira); 1424 goto done; 1425 } 1426 1427 /* 1428 * This is a directed broadcast 1429 * 1430 * If directed broadcast is allowed, then forward the packet out 1431 * the destination interface with IXAF_LOOPBACK_COPY set. That will 1432 * result in ip_input() receiving a copy of the packet on the 1433 * appropriate ill. (We could optimize this to avoid the extra trip 1434 * via ip_input(), but since directed broadcasts are normally disabled 1435 * it doesn't make sense to optimize it.) 1436 */ 1437 if (!ipst->ips_ip_g_forward_directed_bcast || 1438 (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST))) { 1439 ip_drop_input("directed broadcast not allowed", mp, ill); 1440 freemsg(mp); 1441 goto done; 1442 } 1443 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1444 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1445 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1446 freemsg(mp); 1447 goto done; 1448 } 1449 1450 /* 1451 * Clear the indication that this may have hardware 1452 * checksum as we are not using it for forwarding. 1453 */ 1454 DB_CKSUMFLAGS(mp) = 0; 1455 1456 /* 1457 * Adjust ttl to 2 (1+1 - the forward engine will decrement it by one. 1458 */ 1459 ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl + 1; 1460 ipha->ipha_hdr_checksum = 0; 1461 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1462 1463 /* 1464 * We use ip_forward_xmit to do any fragmentation. 1465 * and loopback copy on the outbound interface. 1466 * 1467 * Make it so that IXAF_LOOPBACK_COPY to be set on transmit side. 1468 */ 1469 ira->ira_flags |= IRAF_LOOPBACK_COPY; 1470 1471 nce = arp_nce_init(dst_ill, ipha->ipha_dst, IRE_BROADCAST); 1472 if (nce == NULL) { 1473 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutDiscards); 1474 ip_drop_output("No nce", mp, dst_ill); 1475 freemsg(mp); 1476 goto done; 1477 } 1478 1479 ip_forward_xmit_v4(nce, ill, mp, ipha, ira, dst_ill->ill_mc_mtu, 0); 1480 nce_refrele(nce); 1481 done: 1482 /* Restore */ 1483 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1484 } 1485 1486 /* 1487 * ire_recvfn for IRE_MULTICAST. 1488 */ 1489 void 1490 ire_recv_multicast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1491 ip_recv_attr_t *ira) 1492 { 1493 ipha_t *ipha = (ipha_t *)iph_arg; 1494 ill_t *ill = ira->ira_ill; 1495 ip_stack_t *ipst = ill->ill_ipst; 1496 1497 ASSERT(ire->ire_ill == ira->ira_ill); 1498 1499 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 1500 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen); 1501 1502 /* RSVP hook */ 1503 if (ira->ira_flags & IRAF_RSVP) 1504 goto forus; 1505 1506 /* Tag for higher-level protocols */ 1507 ira->ira_flags |= IRAF_MULTICAST; 1508 1509 /* 1510 * So that we don't end up with dups, only one ill an IPMP group is 1511 * nominated to receive multicast traffic. 1512 * If we have no cast_ill we are liberal and accept everything. 1513 */ 1514 if (IS_UNDER_IPMP(ill)) { 1515 ip_stack_t *ipst = ill->ill_ipst; 1516 1517 /* For an under ill_grp can change under lock */ 1518 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1519 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1520 ill->ill_grp->ig_cast_ill != NULL) { 1521 rw_exit(&ipst->ips_ill_g_lock); 1522 ip_drop_input("not on cast ill", mp, ill); 1523 freemsg(mp); 1524 return; 1525 } 1526 rw_exit(&ipst->ips_ill_g_lock); 1527 /* 1528 * We switch to the upper ill so that mrouter and hasmembers 1529 * can operate on upper here and in ip_input_multicast. 1530 */ 1531 ill = ipmp_ill_hold_ipmp_ill(ill); 1532 if (ill != NULL) { 1533 ASSERT(ill != ira->ira_ill); 1534 ASSERT(ire->ire_ill == ira->ira_ill); 1535 ira->ira_ill = ill; 1536 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1537 } else { 1538 ill = ira->ira_ill; 1539 } 1540 } 1541 1542 /* 1543 * Check if we are a multicast router - send ip_mforward a copy of 1544 * the packet. 1545 * Due to mroute_decap tunnels we consider forwarding packets even if 1546 * mrouted has not joined the allmulti group on this interface. 1547 */ 1548 if (ipst->ips_ip_g_mrouter) { 1549 int retval; 1550 1551 /* 1552 * Clear the indication that this may have hardware 1553 * checksum as we are not using it for forwarding. 1554 */ 1555 DB_CKSUMFLAGS(mp) = 0; 1556 1557 /* 1558 * ip_mforward helps us make these distinctions: If received 1559 * on tunnel and not IGMP, then drop. 1560 * If IGMP packet, then don't check membership 1561 * If received on a phyint and IGMP or PIM, then 1562 * don't check membership 1563 */ 1564 retval = ip_mforward(mp, ira); 1565 /* ip_mforward updates mib variables if needed */ 1566 1567 switch (retval) { 1568 case 0: 1569 /* 1570 * pkt is okay and arrived on phyint. 1571 * 1572 * If we are running as a multicast router 1573 * we need to see all IGMP and/or PIM packets. 1574 */ 1575 if ((ipha->ipha_protocol == IPPROTO_IGMP) || 1576 (ipha->ipha_protocol == IPPROTO_PIM)) { 1577 goto forus; 1578 } 1579 break; 1580 case -1: 1581 /* pkt is mal-formed, toss it */ 1582 freemsg(mp); 1583 goto done; 1584 case 1: 1585 /* 1586 * pkt is okay and arrived on a tunnel 1587 * 1588 * If we are running a multicast router 1589 * we need to see all igmp packets. 1590 */ 1591 if (ipha->ipha_protocol == IPPROTO_IGMP) { 1592 goto forus; 1593 } 1594 ip_drop_input("Multicast on tunnel ignored", mp, ill); 1595 freemsg(mp); 1596 goto done; 1597 } 1598 } 1599 1600 /* 1601 * Check if we have members on this ill. This is not necessary for 1602 * correctness because even if the NIC/GLD had a leaky filter, we 1603 * filter before passing to each conn_t. 1604 */ 1605 if (!ill_hasmembers_v4(ill, ipha->ipha_dst)) { 1606 /* 1607 * Nobody interested 1608 * 1609 * This might just be caused by the fact that 1610 * multiple IP Multicast addresses map to the same 1611 * link layer multicast - no need to increment counter! 1612 */ 1613 ip_drop_input("Multicast with no members", mp, ill); 1614 freemsg(mp); 1615 goto done; 1616 } 1617 forus: 1618 ip2dbg(("ire_recv_multicast_v4: multicast for us: 0x%x\n", 1619 ntohl(ipha->ipha_dst))); 1620 1621 /* 1622 * After reassembly and IPsec we will need to duplicate the 1623 * multicast packet for all matching zones on the ill. 1624 */ 1625 ira->ira_zoneid = ALL_ZONES; 1626 1627 /* Reassemble on the ill on which the packet arrived */ 1628 ip_input_local_v4(ire, mp, ipha, ira); 1629 done: 1630 if (ill != ire->ire_ill) { 1631 ill_refrele(ill); 1632 ira->ira_ill = ire->ire_ill; 1633 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 1634 } 1635 } 1636 1637 /* 1638 * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT. 1639 * Drop packets since we don't forward out multirt routes. 1640 */ 1641 /* ARGSUSED */ 1642 void 1643 ire_recv_multirt_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1644 { 1645 ill_t *ill = ira->ira_ill; 1646 1647 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1648 ip_drop_input("Not forwarding out MULTIRT", mp, ill); 1649 freemsg(mp); 1650 } 1651 1652 /* 1653 * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK 1654 * has rewritten the packet to have a loopback destination address (We 1655 * filter out packet with a loopback destination from arriving over the wire). 1656 * We don't know what zone to use, thus we always use the GLOBAL_ZONEID. 1657 */ 1658 void 1659 ire_recv_loopback_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1660 { 1661 ipha_t *ipha = (ipha_t *)iph_arg; 1662 ill_t *ill = ira->ira_ill; 1663 ill_t *ire_ill = ire->ire_ill; 1664 1665 ira->ira_zoneid = GLOBAL_ZONEID; 1666 1667 /* Switch to the lo0 ill for further processing */ 1668 if (ire_ill != ill) { 1669 /* 1670 * Update ira_ill to be the ILL on which the IP address 1671 * is hosted. 1672 * No need to hold the ill since we have a hold on the ire 1673 */ 1674 ASSERT(ira->ira_ill == ira->ira_rill); 1675 ira->ira_ill = ire_ill; 1676 1677 ip_input_local_v4(ire, mp, ipha, ira); 1678 1679 /* Restore */ 1680 ASSERT(ira->ira_ill == ire_ill); 1681 ira->ira_ill = ill; 1682 return; 1683 1684 } 1685 ip_input_local_v4(ire, mp, ipha, ira); 1686 } 1687 1688 /* 1689 * ire_recvfn for IRE_LOCAL. 1690 */ 1691 void 1692 ire_recv_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1693 { 1694 ipha_t *ipha = (ipha_t *)iph_arg; 1695 ill_t *ill = ira->ira_ill; 1696 ill_t *ire_ill = ire->ire_ill; 1697 1698 /* Make a note for DAD that this address is in use */ 1699 ire->ire_last_used_time = LBOLT_FASTPATH; 1700 1701 /* Only target the IRE_LOCAL with the right zoneid. */ 1702 ira->ira_zoneid = ire->ire_zoneid; 1703 1704 /* 1705 * If the packet arrived on the wrong ill, we check that 1706 * this is ok. 1707 * If it is, then we ensure that we do the reassembly on 1708 * the ill on which the address is hosted. We keep ira_rill as 1709 * the one on which the packet arrived, so that IP_PKTINFO and 1710 * friends can report this. 1711 */ 1712 if (ire_ill != ill) { 1713 ire_t *new_ire; 1714 1715 new_ire = ip_check_multihome(&ipha->ipha_dst, ire, ill); 1716 if (new_ire == NULL) { 1717 /* Drop packet */ 1718 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1719 ip_drop_input("ipIfStatsInForwProhibits", mp, ill); 1720 freemsg(mp); 1721 return; 1722 } 1723 /* 1724 * Update ira_ill to be the ILL on which the IP address 1725 * is hosted. No need to hold the ill since we have a 1726 * hold on the ire. Note that we do the switch even if 1727 * new_ire == ire (for IPMP, ire would be the one corresponding 1728 * to the IPMP ill). 1729 */ 1730 ASSERT(ira->ira_ill == ira->ira_rill); 1731 ira->ira_ill = new_ire->ire_ill; 1732 1733 /* ira_ruifindex tracks the upper for ira_rill */ 1734 if (IS_UNDER_IPMP(ill)) 1735 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1736 1737 ip_input_local_v4(new_ire, mp, ipha, ira); 1738 1739 /* Restore */ 1740 ASSERT(ira->ira_ill == new_ire->ire_ill); 1741 ira->ira_ill = ill; 1742 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1743 1744 if (new_ire != ire) 1745 ire_refrele(new_ire); 1746 return; 1747 } 1748 1749 ip_input_local_v4(ire, mp, ipha, ira); 1750 } 1751 1752 /* 1753 * Common function for packets arriving for the host. Handles 1754 * checksum verification, reassembly checks, etc. 1755 */ 1756 static void 1757 ip_input_local_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1758 { 1759 ill_t *ill = ira->ira_ill; 1760 iaflags_t iraflags = ira->ira_flags; 1761 1762 /* 1763 * Verify IP header checksum. If the packet was AH or ESP then 1764 * this flag has already been cleared. Likewise if the packet 1765 * had a hardware checksum. 1766 */ 1767 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1768 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1769 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1770 freemsg(mp); 1771 return; 1772 } 1773 1774 if (iraflags & IRAF_IPV4_OPTIONS) { 1775 if (!ip_input_local_options(mp, ipha, ira)) { 1776 /* Error has been sent and mp consumed */ 1777 return; 1778 } 1779 /* 1780 * Some old hardware does partial checksum by including the 1781 * whole IP header, so the partial checksum value might have 1782 * become invalid if any option in the packet have been 1783 * updated. Always clear partial checksum flag here. 1784 */ 1785 DB_CKSUMFLAGS(mp) &= ~HCK_PARTIALCKSUM; 1786 } 1787 1788 /* 1789 * Is packet part of fragmented IP packet? 1790 * We compare against defined values in network byte order 1791 */ 1792 if (ipha->ipha_fragment_offset_and_flags & 1793 (IPH_MF_HTONS | IPH_OFFSET_HTONS)) { 1794 /* 1795 * Make sure we have ira_l2src before we loose the original 1796 * mblk 1797 */ 1798 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 1799 ip_setl2src(mp, ira, ira->ira_rill); 1800 1801 mp = ip_input_fragment(mp, ipha, ira); 1802 if (mp == NULL) 1803 return; 1804 /* Completed reassembly */ 1805 ipha = (ipha_t *)mp->b_rptr; 1806 } 1807 1808 /* 1809 * For broadcast and multicast we need some extra work before 1810 * we call ip_fanout_v4(), since in the case of shared-IP zones 1811 * we need to pretend that a packet arrived for each zoneid. 1812 */ 1813 if (iraflags & IRAF_MULTIBROADCAST) { 1814 if (iraflags & IRAF_BROADCAST) 1815 ip_input_broadcast_v4(ire, mp, ipha, ira); 1816 else 1817 ip_input_multicast_v4(ire, mp, ipha, ira); 1818 return; 1819 } 1820 ip_fanout_v4(mp, ipha, ira); 1821 } 1822 1823 1824 /* 1825 * Handle multiple zones which match the same broadcast address 1826 * and ill by delivering a packet to each of them. 1827 * Walk the bucket and look for different ire_zoneid but otherwise 1828 * the same IRE (same ill/addr/mask/type). 1829 * Note that ire_add() tracks IREs that are identical in all 1830 * fields (addr/mask/type/gw/ill/zoneid) within a single IRE by 1831 * increasing ire_identical_cnt. Thus we don't need to be concerned 1832 * about those. 1833 */ 1834 static void 1835 ip_input_broadcast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1836 { 1837 ill_t *ill = ira->ira_ill; 1838 ip_stack_t *ipst = ill->ill_ipst; 1839 netstack_t *ns = ipst->ips_netstack; 1840 irb_t *irb; 1841 ire_t *ire1; 1842 mblk_t *mp1; 1843 ipha_t *ipha1; 1844 uint_t ira_pktlen = ira->ira_pktlen; 1845 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1846 1847 irb = ire->ire_bucket; 1848 1849 /* 1850 * If we don't have more than one shared-IP zone, or if 1851 * there can't be more than one IRE_BROADCAST for this 1852 * IP address, then just set the zoneid and proceed. 1853 */ 1854 if (ns->netstack_numzones == 1 || irb->irb_ire_cnt == 1) { 1855 ira->ira_zoneid = ire->ire_zoneid; 1856 1857 ip_fanout_v4(mp, ipha, ira); 1858 return; 1859 } 1860 irb_refhold(irb); 1861 for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 1862 /* We do the main IRE after the end of the loop */ 1863 if (ire1 == ire) 1864 continue; 1865 1866 /* 1867 * Only IREs for the same IP address should be in the same 1868 * bucket. 1869 * But could have IRE_HOSTs in the case of CGTP. 1870 */ 1871 ASSERT(ire1->ire_addr == ire->ire_addr); 1872 if (!(ire1->ire_type & IRE_BROADCAST)) 1873 continue; 1874 1875 if (IRE_IS_CONDEMNED(ire1)) 1876 continue; 1877 1878 mp1 = copymsg(mp); 1879 if (mp1 == NULL) { 1880 /* Failed to deliver to one zone */ 1881 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1882 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1883 continue; 1884 } 1885 ira->ira_zoneid = ire1->ire_zoneid; 1886 ipha1 = (ipha_t *)mp1->b_rptr; 1887 ip_fanout_v4(mp1, ipha1, ira); 1888 /* 1889 * IPsec might have modified ira_pktlen and ira_ip_hdr_length 1890 * so we restore them for a potential next iteration 1891 */ 1892 ira->ira_pktlen = ira_pktlen; 1893 ira->ira_ip_hdr_length = ira_ip_hdr_length; 1894 } 1895 irb_refrele(irb); 1896 /* Do the main ire */ 1897 ira->ira_zoneid = ire->ire_zoneid; 1898 ip_fanout_v4(mp, ipha, ira); 1899 } 1900 1901 /* 1902 * Handle multiple zones which want to receive the same multicast packets 1903 * on this ill by delivering a packet to each of them. 1904 * 1905 * Note that for packets delivered to transports we could instead do this 1906 * as part of the fanout code, but since we need to handle icmp_inbound 1907 * it is simpler to have multicast work the same as broadcast. 1908 * 1909 * The ip_fanout matching for multicast matches based on ilm independent of 1910 * zoneid since the zoneid restriction is applied when joining a multicast 1911 * group. 1912 */ 1913 /* ARGSUSED */ 1914 static void 1915 ip_input_multicast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1916 { 1917 ill_t *ill = ira->ira_ill; 1918 iaflags_t iraflags = ira->ira_flags; 1919 ip_stack_t *ipst = ill->ill_ipst; 1920 netstack_t *ns = ipst->ips_netstack; 1921 zoneid_t zoneid; 1922 mblk_t *mp1; 1923 ipha_t *ipha1; 1924 uint_t ira_pktlen = ira->ira_pktlen; 1925 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1926 1927 /* ire_recv_multicast has switched to the upper ill for IPMP */ 1928 ASSERT(!IS_UNDER_IPMP(ill)); 1929 1930 /* 1931 * If we don't have more than one shared-IP zone, or if 1932 * there are no members in anything but the global zone, 1933 * then just set the zoneid and proceed. 1934 */ 1935 if (ns->netstack_numzones == 1 || 1936 !ill_hasmembers_otherzones_v4(ill, ipha->ipha_dst, 1937 GLOBAL_ZONEID)) { 1938 ira->ira_zoneid = GLOBAL_ZONEID; 1939 1940 /* If sender didn't want this zone to receive it, drop */ 1941 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1942 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1943 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1944 freemsg(mp); 1945 return; 1946 } 1947 ip_fanout_v4(mp, ipha, ira); 1948 return; 1949 } 1950 1951 /* 1952 * Here we loop over all zoneids that have members in the group 1953 * and deliver a packet to ip_fanout for each zoneid. 1954 * 1955 * First find any members in the lowest numeric zoneid by looking for 1956 * first zoneid larger than -1 (ALL_ZONES). 1957 * We terminate the loop when we receive -1 (ALL_ZONES). 1958 */ 1959 zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, ALL_ZONES); 1960 for (; zoneid != ALL_ZONES; 1961 zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, zoneid)) { 1962 /* 1963 * Avoid an extra copymsg/freemsg by skipping global zone here 1964 * and doing that at the end. 1965 */ 1966 if (zoneid == GLOBAL_ZONEID) 1967 continue; 1968 1969 ira->ira_zoneid = zoneid; 1970 1971 /* If sender didn't want this zone to receive it, skip */ 1972 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1973 ira->ira_no_loop_zoneid == ira->ira_zoneid) 1974 continue; 1975 1976 mp1 = copymsg(mp); 1977 if (mp1 == NULL) { 1978 /* Failed to deliver to one zone */ 1979 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1980 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1981 continue; 1982 } 1983 ipha1 = (ipha_t *)mp1->b_rptr; 1984 ip_fanout_v4(mp1, ipha1, ira); 1985 /* 1986 * IPsec might have modified ira_pktlen and ira_ip_hdr_length 1987 * so we restore them for a potential next iteration 1988 */ 1989 ira->ira_pktlen = ira_pktlen; 1990 ira->ira_ip_hdr_length = ira_ip_hdr_length; 1991 } 1992 1993 /* Do the main ire */ 1994 ira->ira_zoneid = GLOBAL_ZONEID; 1995 /* If sender didn't want this zone to receive it, drop */ 1996 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1997 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1998 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1999 freemsg(mp); 2000 } else { 2001 ip_fanout_v4(mp, ipha, ira); 2002 } 2003 } 2004 2005 2006 /* 2007 * Determine the zoneid and IRAF_TX_* flags if trusted extensions 2008 * is in use. Updates ira_zoneid and ira_flags as a result. 2009 */ 2010 static void 2011 ip_fanout_tx_v4(mblk_t *mp, ipha_t *ipha, uint8_t protocol, 2012 uint_t ip_hdr_length, ip_recv_attr_t *ira) 2013 { 2014 uint16_t *up; 2015 uint16_t lport; 2016 zoneid_t zoneid; 2017 2018 ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED); 2019 2020 /* 2021 * If the packet is unlabeled we might allow read-down 2022 * for MAC_EXEMPT. Below we clear this if it is a multi-level 2023 * port (MLP). 2024 * Note that ira_tsl can be NULL here. 2025 */ 2026 if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED) 2027 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE; 2028 2029 if (ira->ira_zoneid != ALL_ZONES) 2030 return; 2031 2032 ira->ira_flags |= IRAF_TX_SHARED_ADDR; 2033 2034 up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length); 2035 switch (protocol) { 2036 case IPPROTO_TCP: 2037 case IPPROTO_SCTP: 2038 case IPPROTO_UDP: 2039 /* Caller ensures this */ 2040 ASSERT(((uchar_t *)ipha) + ip_hdr_length +4 <= mp->b_wptr); 2041 2042 /* 2043 * Only these transports support MLP. 2044 * We know their destination port numbers is in 2045 * the same place in the header. 2046 */ 2047 lport = up[1]; 2048 2049 /* 2050 * No need to handle exclusive-stack zones 2051 * since ALL_ZONES only applies to the shared IP instance. 2052 */ 2053 zoneid = tsol_mlp_findzone(protocol, lport); 2054 /* 2055 * If no shared MLP is found, tsol_mlp_findzone returns 2056 * ALL_ZONES. In that case, we assume it's SLP, and 2057 * search for the zone based on the packet label. 2058 * 2059 * If there is such a zone, we prefer to find a 2060 * connection in it. Otherwise, we look for a 2061 * MAC-exempt connection in any zone whose label 2062 * dominates the default label on the packet. 2063 */ 2064 if (zoneid == ALL_ZONES) 2065 zoneid = tsol_attr_to_zoneid(ira); 2066 else 2067 ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE; 2068 break; 2069 default: 2070 /* Handle shared address for other protocols */ 2071 zoneid = tsol_attr_to_zoneid(ira); 2072 break; 2073 } 2074 ira->ira_zoneid = zoneid; 2075 } 2076 2077 /* 2078 * Increment checksum failure statistics 2079 */ 2080 static void 2081 ip_input_cksum_err_v4(uint8_t protocol, uint16_t hck_flags, ill_t *ill) 2082 { 2083 ip_stack_t *ipst = ill->ill_ipst; 2084 2085 switch (protocol) { 2086 case IPPROTO_TCP: 2087 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 2088 2089 if (hck_flags & HCK_FULLCKSUM) 2090 IP_STAT(ipst, ip_tcp_in_full_hw_cksum_err); 2091 else if (hck_flags & HCK_PARTIALCKSUM) 2092 IP_STAT(ipst, ip_tcp_in_part_hw_cksum_err); 2093 else 2094 IP_STAT(ipst, ip_tcp_in_sw_cksum_err); 2095 break; 2096 case IPPROTO_UDP: 2097 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 2098 if (hck_flags & HCK_FULLCKSUM) 2099 IP_STAT(ipst, ip_udp_in_full_hw_cksum_err); 2100 else if (hck_flags & HCK_PARTIALCKSUM) 2101 IP_STAT(ipst, ip_udp_in_part_hw_cksum_err); 2102 else 2103 IP_STAT(ipst, ip_udp_in_sw_cksum_err); 2104 break; 2105 case IPPROTO_ICMP: 2106 BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs); 2107 break; 2108 default: 2109 ASSERT(0); 2110 break; 2111 } 2112 } 2113 2114 /* Calculate the IPv4 pseudo-header checksum */ 2115 uint32_t 2116 ip_input_cksum_pseudo_v4(ipha_t *ipha, ip_recv_attr_t *ira) 2117 { 2118 uint_t ulp_len; 2119 uint32_t cksum; 2120 uint8_t protocol = ira->ira_protocol; 2121 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2122 2123 #define iphs ((uint16_t *)ipha) 2124 2125 switch (protocol) { 2126 case IPPROTO_TCP: 2127 ulp_len = ira->ira_pktlen - ip_hdr_length; 2128 2129 /* Protocol and length */ 2130 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP; 2131 /* IP addresses */ 2132 cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9]; 2133 break; 2134 2135 case IPPROTO_UDP: { 2136 udpha_t *udpha; 2137 2138 udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length); 2139 2140 /* Protocol and length */ 2141 cksum = udpha->uha_length + IP_UDP_CSUM_COMP; 2142 /* IP addresses */ 2143 cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9]; 2144 break; 2145 } 2146 2147 default: 2148 cksum = 0; 2149 break; 2150 } 2151 #undef iphs 2152 return (cksum); 2153 } 2154 2155 2156 /* 2157 * Software verification of the ULP checksums. 2158 * Returns B_TRUE if ok. 2159 * Increments statistics of failed. 2160 */ 2161 static boolean_t 2162 ip_input_sw_cksum_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 2163 { 2164 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 2165 uint32_t cksum; 2166 uint8_t protocol = ira->ira_protocol; 2167 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2168 2169 IP_STAT(ipst, ip_in_sw_cksum); 2170 2171 ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP); 2172 2173 cksum = ip_input_cksum_pseudo_v4(ipha, ira); 2174 cksum = IP_CSUM(mp, ip_hdr_length, cksum); 2175 if (cksum == 0) 2176 return (B_TRUE); 2177 2178 ip_input_cksum_err_v4(protocol, 0, ira->ira_ill); 2179 return (B_FALSE); 2180 } 2181 2182 /* 2183 * Verify the ULP checksums. 2184 * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum 2185 * algorithm. 2186 * Increments statistics if failed. 2187 */ 2188 static boolean_t 2189 ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, 2190 ip_recv_attr_t *ira) 2191 { 2192 ill_t *ill = ira->ira_rill; 2193 uint16_t hck_flags; 2194 uint32_t cksum; 2195 mblk_t *mp1; 2196 int32_t len; 2197 uint8_t protocol = ira->ira_protocol; 2198 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2199 2200 2201 switch (protocol) { 2202 case IPPROTO_TCP: 2203 break; 2204 2205 case IPPROTO_UDP: { 2206 udpha_t *udpha; 2207 2208 udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length); 2209 if (udpha->uha_checksum == 0) { 2210 /* Packet doesn't have a UDP checksum */ 2211 return (B_TRUE); 2212 } 2213 break; 2214 } 2215 case IPPROTO_SCTP: { 2216 sctp_hdr_t *sctph; 2217 uint32_t pktsum; 2218 2219 sctph = (sctp_hdr_t *)((uchar_t *)ipha + ip_hdr_length); 2220 #ifdef DEBUG 2221 if (skip_sctp_cksum) 2222 return (B_TRUE); 2223 #endif 2224 pktsum = sctph->sh_chksum; 2225 sctph->sh_chksum = 0; 2226 cksum = sctp_cksum(mp, ip_hdr_length); 2227 sctph->sh_chksum = pktsum; 2228 if (cksum == pktsum) 2229 return (B_TRUE); 2230 2231 /* 2232 * Defer until later whether a bad checksum is ok 2233 * in order to allow RAW sockets to use Adler checksum 2234 * with SCTP. 2235 */ 2236 ira->ira_flags |= IRAF_SCTP_CSUM_ERR; 2237 return (B_TRUE); 2238 } 2239 2240 default: 2241 /* No ULP checksum to verify. */ 2242 return (B_TRUE); 2243 } 2244 /* 2245 * Revert to software checksum calculation if the interface 2246 * isn't capable of checksum offload. 2247 * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout. 2248 * Note: IRAF_NO_HW_CKSUM is not currently used. 2249 */ 2250 ASSERT(!IS_IPMP(ill)); 2251 if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 2252 !dohwcksum) { 2253 return (ip_input_sw_cksum_v4(mp, ipha, ira)); 2254 } 2255 2256 /* 2257 * We apply this for all ULP protocols. Does the HW know to 2258 * not set the flags for SCTP and other protocols. 2259 */ 2260 2261 hck_flags = DB_CKSUMFLAGS(mp); 2262 2263 if (hck_flags & HCK_FULLCKSUM_OK) { 2264 /* 2265 * Hardware has already verified the checksum. 2266 */ 2267 return (B_TRUE); 2268 } 2269 2270 if (hck_flags & HCK_FULLCKSUM) { 2271 /* 2272 * Full checksum has been computed by the hardware 2273 * and has been attached. If the driver wants us to 2274 * verify the correctness of the attached value, in 2275 * order to protect against faulty hardware, compare 2276 * it against -0 (0xFFFF) to see if it's valid. 2277 */ 2278 cksum = DB_CKSUM16(mp); 2279 if (cksum == 0xFFFF) 2280 return (B_TRUE); 2281 ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill); 2282 return (B_FALSE); 2283 } 2284 2285 mp1 = mp->b_cont; 2286 if ((hck_flags & HCK_PARTIALCKSUM) && 2287 (mp1 == NULL || mp1->b_cont == NULL) && 2288 ip_hdr_length >= DB_CKSUMSTART(mp) && 2289 ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) { 2290 uint32_t adj; 2291 uchar_t *cksum_start; 2292 2293 cksum = ip_input_cksum_pseudo_v4(ipha, ira); 2294 2295 cksum_start = ((uchar_t *)ipha + DB_CKSUMSTART(mp)); 2296 2297 /* 2298 * Partial checksum has been calculated by hardware 2299 * and attached to the packet; in addition, any 2300 * prepended extraneous data is even byte aligned, 2301 * and there are at most two mblks associated with 2302 * the packet. If any such data exists, we adjust 2303 * the checksum; also take care any postpended data. 2304 */ 2305 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj); 2306 /* 2307 * One's complement subtract extraneous checksum 2308 */ 2309 cksum += DB_CKSUM16(mp); 2310 if (adj >= cksum) 2311 cksum = ~(adj - cksum) & 0xFFFF; 2312 else 2313 cksum -= adj; 2314 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 2315 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 2316 if (!(~cksum & 0xFFFF)) 2317 return (B_TRUE); 2318 2319 ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill); 2320 return (B_FALSE); 2321 } 2322 return (ip_input_sw_cksum_v4(mp, ipha, ira)); 2323 } 2324 2325 2326 /* 2327 * Handle fanout of received packets. 2328 * Unicast packets that are looped back (from ire_send_local_v4) and packets 2329 * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM. 2330 * 2331 * IPQoS Notes 2332 * Before sending it to the client, invoke IPPF processing. Policy processing 2333 * takes place only if the callout_position, IPP_LOCAL_IN, is enabled. 2334 */ 2335 void 2336 ip_fanout_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 2337 { 2338 ill_t *ill = ira->ira_ill; 2339 iaflags_t iraflags = ira->ira_flags; 2340 ip_stack_t *ipst = ill->ill_ipst; 2341 uint8_t protocol = ipha->ipha_protocol; 2342 conn_t *connp; 2343 #define rptr ((uchar_t *)ipha) 2344 uint_t ip_hdr_length; 2345 uint_t min_ulp_header_length; 2346 int offset; 2347 ssize_t len; 2348 netstack_t *ns = ipst->ips_netstack; 2349 ipsec_stack_t *ipss = ns->netstack_ipsec; 2350 ill_t *rill = ira->ira_rill; 2351 2352 ASSERT(ira->ira_pktlen == ntohs(ipha->ipha_length)); 2353 2354 ip_hdr_length = ira->ira_ip_hdr_length; 2355 ira->ira_protocol = protocol; 2356 2357 /* 2358 * Time for IPP once we've done reassembly and IPsec. 2359 * We skip this for loopback packets since we don't do IPQoS 2360 * on loopback. 2361 */ 2362 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 2363 !(iraflags & IRAF_LOOPBACK) && 2364 (protocol != IPPROTO_ESP && protocol != IPPROTO_AH)) { 2365 /* 2366 * Use the interface on which the packet arrived - not where 2367 * the IP address is hosted. 2368 */ 2369 /* ip_process translates an IS_UNDER_IPMP */ 2370 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill); 2371 if (mp == NULL) { 2372 /* ip_drop_packet and MIB done */ 2373 return; 2374 } 2375 } 2376 2377 /* Determine the minimum required size of the upper-layer header */ 2378 /* Need to do this for at least the set of ULPs that TX handles. */ 2379 switch (protocol) { 2380 case IPPROTO_TCP: 2381 min_ulp_header_length = TCP_MIN_HEADER_LENGTH; 2382 break; 2383 case IPPROTO_SCTP: 2384 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH; 2385 break; 2386 case IPPROTO_UDP: 2387 min_ulp_header_length = UDPH_SIZE; 2388 break; 2389 case IPPROTO_ICMP: 2390 min_ulp_header_length = ICMPH_SIZE; 2391 break; 2392 default: 2393 min_ulp_header_length = 0; 2394 break; 2395 } 2396 /* Make sure we have the min ULP header length */ 2397 len = mp->b_wptr - rptr; 2398 if (len < ip_hdr_length + min_ulp_header_length) { 2399 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length) { 2400 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 2401 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 2402 freemsg(mp); 2403 return; 2404 } 2405 IP_STAT(ipst, ip_recv_pullup); 2406 ipha = ip_pullup(mp, ip_hdr_length + min_ulp_header_length, 2407 ira); 2408 if (ipha == NULL) 2409 goto discard; 2410 len = mp->b_wptr - rptr; 2411 } 2412 2413 /* 2414 * If trusted extensions then determine the zoneid and TX specific 2415 * ira_flags. 2416 */ 2417 if (iraflags & IRAF_SYSTEM_LABELED) { 2418 /* This can update ira->ira_flags and ira->ira_zoneid */ 2419 ip_fanout_tx_v4(mp, ipha, protocol, ip_hdr_length, ira); 2420 iraflags = ira->ira_flags; 2421 } 2422 2423 2424 /* Verify ULP checksum. Handles TCP, UDP, and SCTP */ 2425 if (iraflags & IRAF_VERIFY_ULP_CKSUM) { 2426 if (!ip_input_cksum_v4(iraflags, mp, ipha, ira)) { 2427 /* Bad checksum. Stats are already incremented */ 2428 ip_drop_input("Bad ULP checksum", mp, ill); 2429 freemsg(mp); 2430 return; 2431 } 2432 /* IRAF_SCTP_CSUM_ERR could have been set */ 2433 iraflags = ira->ira_flags; 2434 } 2435 switch (protocol) { 2436 case IPPROTO_TCP: 2437 /* For TCP, discard broadcast and multicast packets. */ 2438 if (iraflags & IRAF_MULTIBROADCAST) 2439 goto discard; 2440 2441 /* First mblk contains IP+TCP headers per above check */ 2442 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH); 2443 2444 /* TCP options present? */ 2445 offset = ((uchar_t *)ipha)[ip_hdr_length + 12] >> 4; 2446 if (offset != 5) { 2447 if (offset < 5) 2448 goto discard; 2449 2450 /* 2451 * There must be TCP options. 2452 * Make sure we can grab them. 2453 */ 2454 offset <<= 2; 2455 offset += ip_hdr_length; 2456 if (len < offset) { 2457 if (ira->ira_pktlen < offset) { 2458 BUMP_MIB(ill->ill_ip_mib, 2459 ipIfStatsInTruncatedPkts); 2460 ip_drop_input( 2461 "ipIfStatsInTruncatedPkts", 2462 mp, ill); 2463 freemsg(mp); 2464 return; 2465 } 2466 IP_STAT(ipst, ip_recv_pullup); 2467 ipha = ip_pullup(mp, offset, ira); 2468 if (ipha == NULL) 2469 goto discard; 2470 len = mp->b_wptr - rptr; 2471 } 2472 } 2473 2474 /* 2475 * Pass up a squeue hint to tcp. 2476 * If ira_sqp is already set (this is loopback) we leave it 2477 * alone. 2478 */ 2479 if (ira->ira_sqp == NULL) { 2480 ira->ira_sqp = ip_squeue_get(ira->ira_ring); 2481 } 2482 2483 /* Look for AF_INET or AF_INET6 that matches */ 2484 connp = ipcl_classify_v4(mp, IPPROTO_TCP, ip_hdr_length, 2485 ira, ipst); 2486 if (connp == NULL) { 2487 /* Send the TH_RST */ 2488 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2489 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2490 return; 2491 } 2492 if (connp->conn_incoming_ifindex != 0 && 2493 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2494 CONN_DEC_REF(connp); 2495 2496 /* Send the TH_RST */ 2497 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2498 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2499 return; 2500 } 2501 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || 2502 (iraflags & IRAF_IPSEC_SECURE)) { 2503 mp = ipsec_check_inbound_policy(mp, connp, 2504 ipha, NULL, ira); 2505 if (mp == NULL) { 2506 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2507 /* Note that mp is NULL */ 2508 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2509 CONN_DEC_REF(connp); 2510 return; 2511 } 2512 } 2513 /* Found a client; up it goes */ 2514 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2515 ira->ira_ill = ira->ira_rill = NULL; 2516 if (!IPCL_IS_TCP(connp)) { 2517 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 2518 (connp->conn_recv)(connp, mp, NULL, ira); 2519 CONN_DEC_REF(connp); 2520 ira->ira_ill = ill; 2521 ira->ira_rill = rill; 2522 return; 2523 } 2524 2525 /* 2526 * We do different processing whether called from 2527 * ip_accept_tcp and we match the target, don't match 2528 * the target, and when we are called by ip_input. 2529 */ 2530 if (iraflags & IRAF_TARGET_SQP) { 2531 if (ira->ira_target_sqp == connp->conn_sqp) { 2532 mblk_t *attrmp; 2533 2534 attrmp = ip_recv_attr_to_mblk(ira); 2535 if (attrmp == NULL) { 2536 BUMP_MIB(ill->ill_ip_mib, 2537 ipIfStatsInDiscards); 2538 ip_drop_input("ipIfStatsInDiscards", 2539 mp, ill); 2540 freemsg(mp); 2541 CONN_DEC_REF(connp); 2542 } else { 2543 SET_SQUEUE(attrmp, connp->conn_recv, 2544 connp); 2545 attrmp->b_cont = mp; 2546 ASSERT(ira->ira_target_sqp_mp == NULL); 2547 ira->ira_target_sqp_mp = attrmp; 2548 /* 2549 * Conn ref release when drained from 2550 * the squeue. 2551 */ 2552 } 2553 } else { 2554 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 2555 connp->conn_recv, connp, ira, SQ_FILL, 2556 SQTAG_IP_TCP_INPUT); 2557 } 2558 } else { 2559 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, 2560 connp, ira, ip_squeue_flag, SQTAG_IP_TCP_INPUT); 2561 } 2562 ira->ira_ill = ill; 2563 ira->ira_rill = rill; 2564 return; 2565 2566 case IPPROTO_SCTP: { 2567 sctp_hdr_t *sctph; 2568 in6_addr_t map_src, map_dst; 2569 uint32_t ports; /* Source and destination ports */ 2570 sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp; 2571 2572 /* For SCTP, discard broadcast and multicast packets. */ 2573 if (iraflags & IRAF_MULTIBROADCAST) 2574 goto discard; 2575 2576 /* 2577 * Since there is no SCTP h/w cksum support yet, just 2578 * clear the flag. 2579 */ 2580 DB_CKSUMFLAGS(mp) = 0; 2581 2582 /* Length ensured above */ 2583 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH); 2584 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length); 2585 2586 /* get the ports */ 2587 ports = *(uint32_t *)&sctph->sh_sport; 2588 2589 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst); 2590 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src); 2591 if (iraflags & IRAF_SCTP_CSUM_ERR) { 2592 /* 2593 * No potential sctp checksum errors go to the Sun 2594 * sctp stack however they might be Adler-32 summed 2595 * packets a userland stack bound to a raw IP socket 2596 * could reasonably use. Note though that Adler-32 is 2597 * a long deprecated algorithm and customer sctp 2598 * networks should eventually migrate to CRC-32 at 2599 * which time this facility should be removed. 2600 */ 2601 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2602 return; 2603 } 2604 connp = sctp_fanout(&map_src, &map_dst, ports, ira, mp, 2605 sctps, sctph); 2606 if (connp == NULL) { 2607 /* Check for raw socket or OOTB handling */ 2608 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2609 return; 2610 } 2611 if (connp->conn_incoming_ifindex != 0 && 2612 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2613 CONN_DEC_REF(connp); 2614 /* Check for raw socket or OOTB handling */ 2615 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2616 return; 2617 } 2618 2619 /* Found a client; up it goes */ 2620 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2621 sctp_input(connp, ipha, NULL, mp, ira); 2622 /* sctp_input does a rele of the sctp_t */ 2623 return; 2624 } 2625 2626 case IPPROTO_UDP: 2627 /* First mblk contains IP+UDP headers as checked above */ 2628 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE); 2629 2630 if (iraflags & IRAF_MULTIBROADCAST) { 2631 uint16_t *up; /* Pointer to ports in ULP header */ 2632 2633 up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length); 2634 ip_fanout_udp_multi_v4(mp, ipha, up[1], up[0], ira); 2635 return; 2636 } 2637 2638 /* Look for AF_INET or AF_INET6 that matches */ 2639 connp = ipcl_classify_v4(mp, IPPROTO_UDP, ip_hdr_length, 2640 ira, ipst); 2641 if (connp == NULL) { 2642 no_udp_match: 2643 if (ipst->ips_ipcl_proto_fanout_v4[IPPROTO_UDP]. 2644 connf_head != NULL) { 2645 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2646 ip_fanout_proto_v4(mp, ipha, ira); 2647 } else { 2648 ip_fanout_send_icmp_v4(mp, 2649 ICMP_DEST_UNREACHABLE, 2650 ICMP_PORT_UNREACHABLE, ira); 2651 } 2652 return; 2653 2654 } 2655 if (connp->conn_incoming_ifindex != 0 && 2656 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2657 CONN_DEC_REF(connp); 2658 goto no_udp_match; 2659 } 2660 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld : 2661 !canputnext(connp->conn_rq)) { 2662 CONN_DEC_REF(connp); 2663 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 2664 ip_drop_input("udpIfStatsInOverflows", mp, ill); 2665 freemsg(mp); 2666 return; 2667 } 2668 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || 2669 (iraflags & IRAF_IPSEC_SECURE)) { 2670 mp = ipsec_check_inbound_policy(mp, connp, 2671 ipha, NULL, ira); 2672 if (mp == NULL) { 2673 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2674 /* Note that mp is NULL */ 2675 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2676 CONN_DEC_REF(connp); 2677 return; 2678 } 2679 } 2680 /* 2681 * Remove 0-spi if it's 0, or move everything behind 2682 * the UDP header over it and forward to ESP via 2683 * ip_fanout_v4(). 2684 */ 2685 if (connp->conn_udp->udp_nat_t_endpoint) { 2686 if (iraflags & IRAF_IPSEC_SECURE) { 2687 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 2688 DROPPER(ipss, ipds_esp_nat_t_ipsec), 2689 &ipss->ipsec_dropper); 2690 CONN_DEC_REF(connp); 2691 return; 2692 } 2693 2694 mp = zero_spi_check(mp, ira); 2695 if (mp == NULL) { 2696 /* 2697 * Packet was consumed - probably sent to 2698 * ip_fanout_v4. 2699 */ 2700 CONN_DEC_REF(connp); 2701 return; 2702 } 2703 /* Else continue like a normal UDP packet. */ 2704 ipha = (ipha_t *)mp->b_rptr; 2705 protocol = ipha->ipha_protocol; 2706 ira->ira_protocol = protocol; 2707 } 2708 /* Found a client; up it goes */ 2709 IP_STAT(ipst, ip_udp_fannorm); 2710 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2711 ira->ira_ill = ira->ira_rill = NULL; 2712 (connp->conn_recv)(connp, mp, NULL, ira); 2713 CONN_DEC_REF(connp); 2714 ira->ira_ill = ill; 2715 ira->ira_rill = rill; 2716 return; 2717 default: 2718 break; 2719 } 2720 2721 /* 2722 * Clear hardware checksumming flag as it is currently only 2723 * used by TCP and UDP. 2724 */ 2725 DB_CKSUMFLAGS(mp) = 0; 2726 2727 switch (protocol) { 2728 case IPPROTO_ICMP: 2729 /* 2730 * We need to accomodate icmp messages coming in clear 2731 * until we get everything secure from the wire. If 2732 * icmp_accept_clear_messages is zero we check with 2733 * the global policy and act accordingly. If it is 2734 * non-zero, we accept the message without any checks. 2735 * But *this does not mean* that this will be delivered 2736 * to RAW socket clients. By accepting we might send 2737 * replies back, change our MTU value etc., 2738 * but delivery to the ULP/clients depends on their 2739 * policy dispositions. 2740 */ 2741 if (ipst->ips_icmp_accept_clear_messages == 0) { 2742 mp = ipsec_check_global_policy(mp, NULL, 2743 ipha, NULL, ira, ns); 2744 if (mp == NULL) 2745 return; 2746 } 2747 2748 /* 2749 * On a labeled system, we have to check whether the zone 2750 * itself is permitted to receive raw traffic. 2751 */ 2752 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2753 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2754 BUMP_MIB(&ipst->ips_icmp_mib, icmpInErrors); 2755 ip_drop_input("tsol_can_accept_raw", mp, ill); 2756 freemsg(mp); 2757 return; 2758 } 2759 } 2760 2761 /* 2762 * ICMP header checksum, including checksum field, 2763 * should be zero. 2764 */ 2765 if (IP_CSUM(mp, ip_hdr_length, 0)) { 2766 BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs); 2767 ip_drop_input("icmpInCksumErrs", mp, ill); 2768 freemsg(mp); 2769 return; 2770 } 2771 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2772 mp = icmp_inbound_v4(mp, ira); 2773 if (mp == NULL) { 2774 /* No need to pass to RAW sockets */ 2775 return; 2776 } 2777 break; 2778 2779 case IPPROTO_IGMP: 2780 /* 2781 * If we are not willing to accept IGMP packets in clear, 2782 * then check with global policy. 2783 */ 2784 if (ipst->ips_igmp_accept_clear_messages == 0) { 2785 mp = ipsec_check_global_policy(mp, NULL, 2786 ipha, NULL, ira, ns); 2787 if (mp == NULL) 2788 return; 2789 } 2790 if ((ira->ira_flags & IRAF_SYSTEM_LABELED) && 2791 !tsol_can_accept_raw(mp, ira, B_TRUE)) { 2792 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2793 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2794 freemsg(mp); 2795 return; 2796 } 2797 /* 2798 * Validate checksum 2799 */ 2800 if (IP_CSUM(mp, ip_hdr_length, 0)) { 2801 ++ipst->ips_igmpstat.igps_rcv_badsum; 2802 ip_drop_input("igps_rcv_badsum", mp, ill); 2803 freemsg(mp); 2804 return; 2805 } 2806 2807 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2808 mp = igmp_input(mp, ira); 2809 if (mp == NULL) { 2810 /* Bad packet - discarded by igmp_input */ 2811 return; 2812 } 2813 break; 2814 case IPPROTO_PIM: 2815 /* 2816 * If we are not willing to accept PIM packets in clear, 2817 * then check with global policy. 2818 */ 2819 if (ipst->ips_pim_accept_clear_messages == 0) { 2820 mp = ipsec_check_global_policy(mp, NULL, 2821 ipha, NULL, ira, ns); 2822 if (mp == NULL) 2823 return; 2824 } 2825 if ((ira->ira_flags & IRAF_SYSTEM_LABELED) && 2826 !tsol_can_accept_raw(mp, ira, B_TRUE)) { 2827 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2828 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2829 freemsg(mp); 2830 return; 2831 } 2832 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2833 2834 /* Checksum is verified in pim_input */ 2835 mp = pim_input(mp, ira); 2836 if (mp == NULL) { 2837 /* Bad packet - discarded by pim_input */ 2838 return; 2839 } 2840 break; 2841 case IPPROTO_AH: 2842 case IPPROTO_ESP: { 2843 /* 2844 * Fast path for AH/ESP. 2845 */ 2846 netstack_t *ns = ipst->ips_netstack; 2847 ipsec_stack_t *ipss = ns->netstack_ipsec; 2848 2849 IP_STAT(ipst, ipsec_proto_ahesp); 2850 2851 if (!ipsec_loaded(ipss)) { 2852 ip_proto_not_sup(mp, ira); 2853 return; 2854 } 2855 2856 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2857 /* select inbound SA and have IPsec process the pkt */ 2858 if (protocol == IPPROTO_ESP) { 2859 esph_t *esph; 2860 boolean_t esp_in_udp_sa; 2861 boolean_t esp_in_udp_packet; 2862 2863 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 2864 if (mp == NULL) 2865 return; 2866 2867 ASSERT(esph != NULL); 2868 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2869 ASSERT(ira->ira_ipsec_esp_sa != NULL); 2870 ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL); 2871 2872 esp_in_udp_sa = ((ira->ira_ipsec_esp_sa->ipsa_flags & 2873 IPSA_F_NATT) != 0); 2874 esp_in_udp_packet = 2875 (ira->ira_flags & IRAF_ESP_UDP_PORTS) != 0; 2876 2877 /* 2878 * The following is a fancy, but quick, way of saying: 2879 * ESP-in-UDP SA and Raw ESP packet --> drop 2880 * OR 2881 * ESP SA and ESP-in-UDP packet --> drop 2882 */ 2883 if (esp_in_udp_sa != esp_in_udp_packet) { 2884 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2885 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 2886 DROPPER(ipss, ipds_esp_no_sa), 2887 &ipss->ipsec_dropper); 2888 return; 2889 } 2890 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, 2891 ira); 2892 } else { 2893 ah_t *ah; 2894 2895 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 2896 if (mp == NULL) 2897 return; 2898 2899 ASSERT(ah != NULL); 2900 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2901 ASSERT(ira->ira_ipsec_ah_sa != NULL); 2902 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 2903 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, 2904 ira); 2905 } 2906 2907 if (mp == NULL) { 2908 /* 2909 * Either it failed or is pending. In the former case 2910 * ipIfStatsInDiscards was increased. 2911 */ 2912 return; 2913 } 2914 /* we're done with IPsec processing, send it up */ 2915 ip_input_post_ipsec(mp, ira); 2916 return; 2917 } 2918 case IPPROTO_ENCAP: { 2919 ipha_t *inner_ipha; 2920 2921 /* 2922 * Handle self-encapsulated packets (IP-in-IP where 2923 * the inner addresses == the outer addresses). 2924 */ 2925 if ((uchar_t *)ipha + ip_hdr_length + sizeof (ipha_t) > 2926 mp->b_wptr) { 2927 if (ira->ira_pktlen < 2928 ip_hdr_length + sizeof (ipha_t)) { 2929 BUMP_MIB(ill->ill_ip_mib, 2930 ipIfStatsInTruncatedPkts); 2931 ip_drop_input("ipIfStatsInTruncatedPkts", 2932 mp, ill); 2933 freemsg(mp); 2934 return; 2935 } 2936 ipha = ip_pullup(mp, (uchar_t *)ipha + ip_hdr_length + 2937 sizeof (ipha_t) - mp->b_rptr, ira); 2938 if (ipha == NULL) { 2939 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2940 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2941 freemsg(mp); 2942 return; 2943 } 2944 } 2945 inner_ipha = (ipha_t *)((uchar_t *)ipha + ip_hdr_length); 2946 /* 2947 * Check the sanity of the inner IP header. 2948 */ 2949 if ((IPH_HDR_VERSION(inner_ipha) != IPV4_VERSION)) { 2950 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2951 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2952 freemsg(mp); 2953 return; 2954 } 2955 if (IPH_HDR_LENGTH(inner_ipha) < sizeof (ipha_t)) { 2956 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2957 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2958 freemsg(mp); 2959 return; 2960 } 2961 if (inner_ipha->ipha_src != ipha->ipha_src || 2962 inner_ipha->ipha_dst != ipha->ipha_dst) { 2963 /* We fallthru to iptun fanout below */ 2964 goto iptun; 2965 } 2966 2967 /* 2968 * Self-encapsulated tunnel packet. Remove 2969 * the outer IP header and fanout again. 2970 * We also need to make sure that the inner 2971 * header is pulled up until options. 2972 */ 2973 mp->b_rptr = (uchar_t *)inner_ipha; 2974 ipha = inner_ipha; 2975 ip_hdr_length = IPH_HDR_LENGTH(ipha); 2976 if ((uchar_t *)ipha + ip_hdr_length > mp->b_wptr) { 2977 if (ira->ira_pktlen < 2978 (uchar_t *)ipha + ip_hdr_length - mp->b_rptr) { 2979 BUMP_MIB(ill->ill_ip_mib, 2980 ipIfStatsInTruncatedPkts); 2981 ip_drop_input("ipIfStatsInTruncatedPkts", 2982 mp, ill); 2983 freemsg(mp); 2984 return; 2985 } 2986 ipha = ip_pullup(mp, 2987 (uchar_t *)ipha + ip_hdr_length - mp->b_rptr, ira); 2988 if (ipha == NULL) { 2989 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2990 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2991 freemsg(mp); 2992 return; 2993 } 2994 } 2995 if (ip_hdr_length > sizeof (ipha_t)) { 2996 /* We got options on the inner packet. */ 2997 ipaddr_t dst = ipha->ipha_dst; 2998 int error = 0; 2999 3000 dst = ip_input_options(ipha, dst, mp, ira, &error); 3001 if (error != 0) { 3002 /* 3003 * An ICMP error has been sent and the packet 3004 * has been dropped. 3005 */ 3006 return; 3007 } 3008 if (dst != ipha->ipha_dst) { 3009 /* 3010 * Someone put a source-route in 3011 * the inside header of a self- 3012 * encapsulated packet. Drop it 3013 * with extreme prejudice and let 3014 * the sender know. 3015 */ 3016 ip_drop_input("ICMP_SOURCE_ROUTE_FAILED", 3017 mp, ill); 3018 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, 3019 ira); 3020 return; 3021 } 3022 } 3023 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 3024 /* 3025 * This means that somebody is sending 3026 * Self-encapsualted packets without AH/ESP. 3027 * 3028 * Send this packet to find a tunnel endpoint. 3029 * if I can't find one, an ICMP 3030 * PROTOCOL_UNREACHABLE will get sent. 3031 */ 3032 protocol = ipha->ipha_protocol; 3033 ira->ira_protocol = protocol; 3034 goto iptun; 3035 } 3036 3037 /* Update based on removed IP header */ 3038 ira->ira_ip_hdr_length = ip_hdr_length; 3039 ira->ira_pktlen = ntohs(ipha->ipha_length); 3040 3041 if (ira->ira_flags & IRAF_IPSEC_DECAPS) { 3042 /* 3043 * This packet is self-encapsulated multiple 3044 * times. We don't want to recurse infinitely. 3045 * To keep it simple, drop the packet. 3046 */ 3047 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3048 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3049 freemsg(mp); 3050 return; 3051 } 3052 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3053 ira->ira_flags |= IRAF_IPSEC_DECAPS; 3054 3055 ip_input_post_ipsec(mp, ira); 3056 return; 3057 } 3058 3059 iptun: /* IPPROTO_ENCAPS that is not self-encapsulated */ 3060 case IPPROTO_IPV6: 3061 /* iptun will verify trusted label */ 3062 connp = ipcl_classify_v4(mp, protocol, ip_hdr_length, 3063 ira, ipst); 3064 if (connp != NULL) { 3065 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3066 ira->ira_ill = ira->ira_rill = NULL; 3067 (connp->conn_recv)(connp, mp, NULL, ira); 3068 CONN_DEC_REF(connp); 3069 ira->ira_ill = ill; 3070 ira->ira_rill = rill; 3071 return; 3072 } 3073 /* FALLTHRU */ 3074 default: 3075 /* 3076 * On a labeled system, we have to check whether the zone 3077 * itself is permitted to receive raw traffic. 3078 */ 3079 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 3080 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 3081 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3082 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3083 freemsg(mp); 3084 return; 3085 } 3086 } 3087 break; 3088 } 3089 3090 /* 3091 * The above input functions may have returned the pulled up message. 3092 * So ipha need to be reinitialized. 3093 */ 3094 ipha = (ipha_t *)mp->b_rptr; 3095 ira->ira_protocol = protocol = ipha->ipha_protocol; 3096 if (ipst->ips_ipcl_proto_fanout_v4[protocol].connf_head == NULL) { 3097 /* 3098 * No user-level listener for these packets packets. 3099 * Check for IPPROTO_ENCAP... 3100 */ 3101 if (protocol == IPPROTO_ENCAP && ipst->ips_ip_g_mrouter) { 3102 /* 3103 * Check policy here, 3104 * THEN ship off to ip_mroute_decap(). 3105 * 3106 * BTW, If I match a configured IP-in-IP 3107 * tunnel above, this path will not be reached, and 3108 * ip_mroute_decap will never be called. 3109 */ 3110 mp = ipsec_check_global_policy(mp, connp, 3111 ipha, NULL, ira, ns); 3112 if (mp != NULL) { 3113 ip_mroute_decap(mp, ira); 3114 } /* Else we already freed everything! */ 3115 } else { 3116 ip_proto_not_sup(mp, ira); 3117 } 3118 return; 3119 } 3120 3121 /* 3122 * Handle fanout to raw sockets. There 3123 * can be more than one stream bound to a particular 3124 * protocol. When this is the case, each one gets a copy 3125 * of any incoming packets. 3126 */ 3127 ASSERT(ira->ira_protocol == ipha->ipha_protocol); 3128 ip_fanout_proto_v4(mp, ipha, ira); 3129 return; 3130 3131 discard: 3132 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3133 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3134 freemsg(mp); 3135 #undef rptr 3136 }