1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved
  24  *
  25  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  26  * Copyright 2018 Joyent, Inc.
  27  */
  28 /* Copyright (c) 1990 Mentat Inc. */
  29 
  30 #include <sys/types.h>
  31 #include <sys/stream.h>
  32 #include <sys/dlpi.h>
  33 #include <sys/stropts.h>
  34 #include <sys/sysmacros.h>
  35 #include <sys/strsubr.h>
  36 #include <sys/strlog.h>
  37 #include <sys/strsun.h>
  38 #include <sys/zone.h>
  39 #define _SUN_TPI_VERSION 2
  40 #include <sys/tihdr.h>
  41 #include <sys/xti_inet.h>
  42 #include <sys/ddi.h>
  43 #include <sys/sunddi.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/debug.h>
  46 #include <sys/kobj.h>
  47 #include <sys/modctl.h>
  48 #include <sys/atomic.h>
  49 #include <sys/policy.h>
  50 #include <sys/priv.h>
  51 
  52 #include <sys/systm.h>
  53 #include <sys/param.h>
  54 #include <sys/kmem.h>
  55 #include <sys/sdt.h>
  56 #include <sys/socket.h>
  57 #include <sys/vtrace.h>
  58 #include <sys/isa_defs.h>
  59 #include <sys/mac.h>
  60 #include <net/if.h>
  61 #include <net/if_arp.h>
  62 #include <net/route.h>
  63 #include <sys/sockio.h>
  64 #include <netinet/in.h>
  65 #include <net/if_dl.h>
  66 
  67 #include <inet/common.h>
  68 #include <inet/mi.h>
  69 #include <inet/mib2.h>
  70 #include <inet/nd.h>
  71 #include <inet/arp.h>
  72 #include <inet/snmpcom.h>
  73 #include <inet/kstatcom.h>
  74 
  75 #include <netinet/igmp_var.h>
  76 #include <netinet/ip6.h>
  77 #include <netinet/icmp6.h>
  78 #include <netinet/sctp.h>
  79 
  80 #include <inet/ip.h>
  81 #include <inet/ip_impl.h>
  82 #include <inet/ip6.h>
  83 #include <inet/ip6_asp.h>
  84 #include <inet/optcom.h>
  85 #include <inet/tcp.h>
  86 #include <inet/tcp_impl.h>
  87 #include <inet/ip_multi.h>
  88 #include <inet/ip_if.h>
  89 #include <inet/ip_ire.h>
  90 #include <inet/ip_ftable.h>
  91 #include <inet/ip_rts.h>
  92 #include <inet/ip_ndp.h>
  93 #include <inet/ip_listutils.h>
  94 #include <netinet/igmp.h>
  95 #include <netinet/ip_mroute.h>
  96 #include <inet/ipp_common.h>
  97 
  98 #include <net/pfkeyv2.h>
  99 #include <inet/sadb.h>
 100 #include <inet/ipsec_impl.h>
 101 #include <inet/ipdrop.h>
 102 #include <inet/ip_netinfo.h>
 103 #include <inet/ilb_ip.h>
 104 #include <sys/squeue_impl.h>
 105 #include <sys/squeue.h>
 106 
 107 #include <sys/ethernet.h>
 108 #include <net/if_types.h>
 109 #include <sys/cpuvar.h>
 110 
 111 #include <ipp/ipp.h>
 112 #include <ipp/ipp_impl.h>
 113 #include <ipp/ipgpc/ipgpc.h>
 114 
 115 #include <sys/pattr.h>
 116 #include <inet/ipclassifier.h>
 117 #include <inet/sctp_ip.h>
 118 #include <inet/sctp/sctp_impl.h>
 119 #include <inet/udp_impl.h>
 120 #include <sys/sunddi.h>
 121 
 122 #include <sys/tsol/label.h>
 123 #include <sys/tsol/tnet.h>
 124 
 125 #include <sys/clock_impl.h>       /* For LBOLT_FASTPATH{,64} */
 126 
 127 #ifdef  DEBUG
 128 extern boolean_t skip_sctp_cksum;
 129 #endif
 130 
 131 static void     ip_input_local_v6(ire_t *, mblk_t *, ip6_t *, ip_recv_attr_t *);
 132 
 133 static void     ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *,
 134     ip_recv_attr_t *);
 135 
 136 #pragma inline(ip_input_common_v6, ip_input_local_v6, ip_forward_xmit_v6)
 137 
 138 /*
 139  * Direct read side procedure capable of dealing with chains. GLDv3 based
 140  * drivers call this function directly with mblk chains while STREAMS
 141  * read side procedure ip_rput() calls this for single packet with ip_ring
 142  * set to NULL to process one packet at a time.
 143  *
 144  * The ill will always be valid if this function is called directly from
 145  * the driver.
 146  *
 147  * If this chain is part of a VLAN stream, then the VLAN tag is
 148  * stripped from the MAC header before being delivered to this
 149  * function.
 150  *
 151  * If the IP header in packet is not 32-bit aligned, every message in the
 152  * chain will be aligned before further operations. This is required on SPARC
 153  * platform.
 154  */
 155 void
 156 ip_input_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
 157     struct mac_header_info_s *mhip)
 158 {
 159         (void) ip_input_common_v6(ill, ip_ring, mp_chain, mhip, NULL, NULL,
 160             NULL);
 161 }
 162 
 163 /*
 164  * ip_accept_tcp_v6() - This function is called by the squeue when it retrieves
 165  * a chain of packets in the poll mode. The packets have gone through the
 166  * data link processing but not IP processing. For performance and latency
 167  * reasons, the squeue wants to process the chain in line instead of feeding
 168  * it back via ip_input path.
 169  *
 170  * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v6
 171  * will pass back any TCP packets matching the target sqp to
 172  * ip_input_common_v6 using ira_target_sqp_mp. Other packets are handled by
 173  * ip_input_v6 and ip_fanout_v6 as normal.
 174  * The TCP packets that match the target squeue are returned to the caller
 175  * as a b_next chain after each packet has been prepend with an mblk
 176  * from ip_recv_attr_to_mblk.
 177  */
 178 mblk_t *
 179 ip_accept_tcp_v6(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp,
 180     mblk_t *mp_chain, mblk_t **last, uint_t *cnt)
 181 {
 182         return (ip_input_common_v6(ill, ip_ring, mp_chain, NULL, target_sqp,
 183             last, cnt));
 184 }
 185 
 186 /*
 187  * Used by ip_input_v6 and ip_accept_tcp_v6
 188  * The last three arguments are only used by ip_accept_tcp_v6, and mhip is
 189  * only used by ip_input_v6.
 190  */
 191 mblk_t *
 192 ip_input_common_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
 193     struct mac_header_info_s *mhip, squeue_t *target_sqp,
 194     mblk_t **last, uint_t *cnt)
 195 {
 196         mblk_t          *mp;
 197         ip6_t           *ip6h;
 198         ip_recv_attr_t  iras;   /* Receive attributes */
 199         rtc_t           rtc;
 200         iaflags_t       chain_flags = 0;        /* Fixed for chain */
 201         mblk_t          *ahead = NULL;  /* Accepted head */
 202         mblk_t          *atail = NULL;  /* Accepted tail */
 203         uint_t          acnt = 0;       /* Accepted count */
 204 
 205         ASSERT(mp_chain != NULL);
 206         ASSERT(ill != NULL);
 207 
 208         /* These ones do not change as we loop over packets */
 209         iras.ira_ill = iras.ira_rill = ill;
 210         iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
 211         iras.ira_rifindex = iras.ira_ruifindex;
 212         iras.ira_sqp = NULL;
 213         iras.ira_ring = ip_ring;
 214         /* For ECMP and outbound transmit ring selection */
 215         iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring);
 216 
 217         iras.ira_target_sqp = target_sqp;
 218         iras.ira_target_sqp_mp = NULL;
 219         if (target_sqp != NULL)
 220                 chain_flags |= IRAF_TARGET_SQP;
 221 
 222         /*
 223          * We try to have a mhip pointer when possible, but
 224          * it might be NULL in some cases. In those cases we
 225          * have to assume unicast.
 226          */
 227         iras.ira_mhip = mhip;
 228         iras.ira_flags = 0;
 229         if (mhip != NULL) {
 230                 switch (mhip->mhi_dsttype) {
 231                 case MAC_ADDRTYPE_MULTICAST :
 232                         chain_flags |= IRAF_L2DST_MULTICAST;
 233                         break;
 234                 case MAC_ADDRTYPE_BROADCAST :
 235                         chain_flags |= IRAF_L2DST_BROADCAST;
 236                         break;
 237                 }
 238         }
 239 
 240         /*
 241          * Initialize the one-element route cache.
 242          *
 243          * We do ire caching from one iteration to
 244          * another. In the event the packet chain contains
 245          * all packets from the same dst, this caching saves
 246          * an ire_route_recursive for each of the succeeding
 247          * packets in a packet chain.
 248          */
 249         rtc.rtc_ire = NULL;
 250         rtc.rtc_ip6addr = ipv6_all_zeros;
 251 
 252         /* Loop over b_next */
 253         for (mp = mp_chain; mp != NULL; mp = mp_chain) {
 254                 mp_chain = mp->b_next;
 255                 mp->b_next = NULL;
 256 
 257                 /*
 258                  * if db_ref > 1 then copymsg and free original. Packet
 259                  * may be changed and we do not want the other entity
 260                  * who has a reference to this message to trip over the
 261                  * changes. This is a blind change because trying to
 262                  * catch all places that might change the packet is too
 263                  * difficult.
 264                  *
 265                  * This corresponds to the fast path case, where we have
 266                  * a chain of M_DATA mblks.  We check the db_ref count
 267                  * of only the 1st data block in the mblk chain. There
 268                  * doesn't seem to be a reason why a device driver would
 269                  * send up data with varying db_ref counts in the mblk
 270                  * chain. In any case the Fast path is a private
 271                  * interface, and our drivers don't do such a thing.
 272                  * Given the above assumption, there is no need to walk
 273                  * down the entire mblk chain (which could have a
 274                  * potential performance problem)
 275                  *
 276                  * The "(DB_REF(mp) > 1)" check was moved from ip_rput()
 277                  * to here because of exclusive ip stacks and vnics.
 278                  * Packets transmitted from exclusive stack over vnic
 279                  * can have db_ref > 1 and when it gets looped back to
 280                  * another vnic in a different zone, you have ip_input()
 281                  * getting dblks with db_ref > 1. So if someone
 282                  * complains of TCP performance under this scenario,
 283                  * take a serious look here on the impact of copymsg().
 284                  */
 285                 if (DB_REF(mp) > 1) {
 286                         if ((mp = ip_fix_dbref(mp, &iras)) == NULL)
 287                                 continue;
 288                 }
 289 
 290                 /*
 291                  * IP header ptr not aligned?
 292                  * OR IP header not complete in first mblk
 293                  */
 294                 ip6h = (ip6_t *)mp->b_rptr;
 295                 if (!OK_32PTR(ip6h) || MBLKL(mp) < IPV6_HDR_LEN) {
 296                         mp = ip_check_and_align_header(mp, IPV6_HDR_LEN, &iras);
 297                         if (mp == NULL)
 298                                 continue;
 299                         ip6h = (ip6_t *)mp->b_rptr;
 300                 }
 301 
 302                 /* Protect against a mix of Ethertypes and IP versions */
 303                 if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) {
 304                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
 305                         ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
 306                         freemsg(mp);
 307                         /* mhip might point into 1st packet in the chain. */
 308                         iras.ira_mhip = NULL;
 309                         continue;
 310                 }
 311 
 312                 /*
 313                  * Check for Martian addrs; we have to explicitly
 314                  * test for for zero dst since this is also used as
 315                  * an indication that the rtc is not used.
 316                  */
 317                 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_dst)) {
 318                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 319                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 320                         freemsg(mp);
 321                         /* mhip might point into 1st packet in the chain. */
 322                         iras.ira_mhip = NULL;
 323                         continue;
 324                 }
 325                 /*
 326                  * Keep L2SRC from a previous packet in chain since mhip
 327                  * might point into an earlier packet in the chain.
 328                  */
 329                 chain_flags |= (iras.ira_flags & IRAF_L2SRC_SET);
 330 
 331                 iras.ira_flags = IRAF_VERIFY_ULP_CKSUM | chain_flags;
 332                 iras.ira_free_flags = 0;
 333                 iras.ira_cred = NULL;
 334                 iras.ira_cpid = NOPID;
 335                 iras.ira_tsl = NULL;
 336                 iras.ira_zoneid = ALL_ZONES;    /* Default for forwarding */
 337 
 338                 /*
 339                  * We must count all incoming packets, even if they end
 340                  * up being dropped later on. Defer counting bytes until
 341                  * we have the whole IP header in first mblk.
 342                  */
 343                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
 344 
 345                 iras.ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 346                 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets,
 347                     iras.ira_pktlen);
 348 
 349                 /*
 350                  * Call one of:
 351                  *      ill_input_full_v6
 352                  *      ill_input_short_v6
 353                  * The former is used in the case of TX. See ill_set_inputfn().
 354                  */
 355                 (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc);
 356 
 357                 /* Any references to clean up? No hold on ira_ill */
 358                 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED))
 359                         ira_cleanup(&iras, B_FALSE);
 360 
 361                 if (iras.ira_target_sqp_mp != NULL) {
 362                         /* Better be called from ip_accept_tcp */
 363                         ASSERT(target_sqp != NULL);
 364 
 365                         /* Found one packet to accept */
 366                         mp = iras.ira_target_sqp_mp;
 367                         iras.ira_target_sqp_mp = NULL;
 368                         ASSERT(ip_recv_attr_is_mblk(mp));
 369 
 370                         if (atail != NULL)
 371                                 atail->b_next = mp;
 372                         else
 373                                 ahead = mp;
 374                         atail = mp;
 375                         acnt++;
 376                         mp = NULL;
 377                 }
 378                 /* mhip might point into 1st packet in the chain. */
 379                 iras.ira_mhip = NULL;
 380         }
 381         /* Any remaining references to the route cache? */
 382         if (rtc.rtc_ire != NULL) {
 383                 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr));
 384                 ire_refrele(rtc.rtc_ire);
 385         }
 386 
 387         if (ahead != NULL) {
 388                 /* Better be called from ip_accept_tcp */
 389                 ASSERT(target_sqp != NULL);
 390                 *last = atail;
 391                 *cnt = acnt;
 392                 return (ahead);
 393         }
 394 
 395         return (NULL);
 396 }
 397 
 398 /*
 399  * This input function is used when
 400  *  - is_system_labeled()
 401  *
 402  * Note that for IPv6 CGTP filtering is handled only when receiving fragment
 403  * headers, and RSVP uses router alert options, thus we don't need anything
 404  * extra for them.
 405  */
 406 void
 407 ill_input_full_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
 408     ip_recv_attr_t *ira, rtc_t *rtc)
 409 {
 410         ip6_t           *ip6h = (ip6_t *)iph_arg;
 411         in6_addr_t      *nexthop = (in6_addr_t *)nexthop_arg;
 412         ill_t           *ill = ira->ira_ill;
 413 
 414         ASSERT(ira->ira_tsl == NULL);
 415 
 416         /*
 417          * Attach any necessary label information to
 418          * this packet
 419          */
 420         if (is_system_labeled()) {
 421                 ira->ira_flags |= IRAF_SYSTEM_LABELED;
 422 
 423                 /*
 424                  * This updates ira_cred, ira_tsl and ira_free_flags based
 425                  * on the label.
 426                  */
 427                 if (!tsol_get_pkt_label(mp, IPV6_VERSION, ira)) {
 428                         if (ip6opt_ls != 0)
 429                                 ip0dbg(("tsol_get_pkt_label v6 failed\n"));
 430                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 431                         ip_drop_input("ipIfStatsInDiscards", mp, ill);
 432                         freemsg(mp);
 433                         return;
 434                 }
 435                 /* Note that ira_tsl can be NULL here. */
 436 
 437                 /* tsol_get_pkt_label sometimes does pullupmsg */
 438                 ip6h = (ip6_t *)mp->b_rptr;
 439         }
 440         ill_input_short_v6(mp, ip6h, nexthop, ira, rtc);
 441 }
 442 
 443 /*
 444  * Check for IPv6 addresses that should not appear on the wire
 445  * as either source or destination.
 446  * If we ever implement Stateless IPv6 Translators (SIIT) we'd have
 447  * to revisit the IPv4-mapped part.
 448  */
 449 static boolean_t
 450 ip6_bad_address(in6_addr_t *addr, boolean_t is_src)
 451 {
 452         if (IN6_IS_ADDR_V4MAPPED(addr)) {
 453                 ip1dbg(("ip_input_v6: pkt with IPv4-mapped addr"));
 454                 return (B_TRUE);
 455         }
 456         if (IN6_IS_ADDR_LOOPBACK(addr)) {
 457                 ip1dbg(("ip_input_v6: pkt with loopback addr"));
 458                 return (B_TRUE);
 459         }
 460         if (!is_src && IN6_IS_ADDR_UNSPECIFIED(addr)) {
 461                 /*
 462                  * having :: in the src is ok: it's used for DAD.
 463                  */
 464                 ip1dbg(("ip_input_v6: pkt with unspecified addr"));
 465                 return (B_TRUE);
 466         }
 467         return (B_FALSE);
 468 }
 469 
 470 /*
 471  * Routing lookup for IPv6 link-locals.
 472  * First we look on the inbound interface, then we check for IPMP and
 473  * look on the upper interface.
 474  * We update ira_ruifindex if we find the IRE on the upper interface.
 475  */
 476 static ire_t *
 477 ire_linklocal(const in6_addr_t *nexthop, ill_t *ill, ip_recv_attr_t *ira,
 478     uint_t irr_flags, ip_stack_t *ipst)
 479 {
 480         int match_flags = MATCH_IRE_SECATTR | MATCH_IRE_ILL;
 481         ire_t *ire;
 482 
 483         ASSERT(IN6_IS_ADDR_LINKLOCAL(nexthop));
 484         ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
 485             match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 486         if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
 487             !IS_UNDER_IPMP(ill))
 488                 return (ire);
 489 
 490         /*
 491          * When we are using IMP we need to look for an IRE on both the
 492          * under and upper interfaces since there are different
 493          * link-local addresses for the under and upper.
 494          */
 495         ill = ipmp_ill_hold_ipmp_ill(ill);
 496         if (ill == NULL)
 497                 return (ire);
 498 
 499         ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
 500 
 501         ire_refrele(ire);
 502         ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
 503             match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 504         ill_refrele(ill);
 505         return (ire);
 506 }
 507 
 508 /*
 509  * This is the tail-end of the full receive side packet handling.
 510  * It can be used directly when the configuration is simple.
 511  */
 512 void
 513 ill_input_short_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
 514     ip_recv_attr_t *ira, rtc_t *rtc)
 515 {
 516         ire_t           *ire;
 517         ill_t           *ill = ira->ira_ill;
 518         ip_stack_t      *ipst = ill->ill_ipst;
 519         uint_t          pkt_len;
 520         ssize_t         len;
 521         ip6_t           *ip6h = (ip6_t *)iph_arg;
 522         in6_addr_t      nexthop = *(in6_addr_t *)nexthop_arg;
 523         ilb_stack_t     *ilbs = ipst->ips_netstack->netstack_ilb;
 524         uint_t          irr_flags;
 525 #define rptr    ((uchar_t *)ip6h)
 526 
 527         ASSERT(DB_TYPE(mp) == M_DATA);
 528 
 529         /*
 530          * Check for source/dest being a bad address: loopback, any, or
 531          * v4mapped. All of them start with a 64 bits of zero.
 532          */
 533         if (ip6h->ip6_src.s6_addr32[0] == 0 &&
 534             ip6h->ip6_src.s6_addr32[1] == 0) {
 535                 if (ip6_bad_address(&ip6h->ip6_src, B_TRUE)) {
 536                         ip1dbg(("ip_input_v6: pkt with bad src addr\n"));
 537                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 538                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 539                         freemsg(mp);
 540                         return;
 541                 }
 542         }
 543         if (ip6h->ip6_dst.s6_addr32[0] == 0 &&
 544             ip6h->ip6_dst.s6_addr32[1] == 0) {
 545                 if (ip6_bad_address(&ip6h->ip6_dst, B_FALSE)) {
 546                         ip1dbg(("ip_input_v6: pkt with bad dst addr\n"));
 547                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 548                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 549                         freemsg(mp);
 550                         return;
 551                 }
 552         }
 553 
 554         len = mp->b_wptr - rptr;
 555         pkt_len = ira->ira_pktlen;
 556 
 557         /* multiple mblk or too short */
 558         len -= pkt_len;
 559         if (len != 0) {
 560                 mp = ip_check_length(mp, rptr, len, pkt_len, IPV6_HDR_LEN, ira);
 561                 if (mp == NULL)
 562                         return;
 563                 ip6h = (ip6_t *)mp->b_rptr;
 564         }
 565 
 566         DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
 567             ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h,
 568             int, 0);
 569         /*
 570          * The event for packets being received from a 'physical'
 571          * interface is placed after validation of the source and/or
 572          * destination address as being local so that packets can be
 573          * redirected to loopback addresses using ipnat.
 574          */
 575         DTRACE_PROBE4(ip6__physical__in__start,
 576             ill_t *, ill, ill_t *, NULL,
 577             ip6_t *, ip6h, mblk_t *, mp);
 578 
 579         if (HOOKS6_INTERESTED_PHYSICAL_IN(ipst)) {
 580                 int     ll_multicast = 0;
 581                 int     error;
 582                 in6_addr_t orig_dst = ip6h->ip6_dst;
 583 
 584                 if (ira->ira_flags & IRAF_L2DST_MULTICAST)
 585                         ll_multicast = HPE_MULTICAST;
 586                 else if (ira->ira_flags & IRAF_L2DST_BROADCAST)
 587                         ll_multicast = HPE_BROADCAST;
 588 
 589                 FW_HOOKS6(ipst->ips_ip6_physical_in_event,
 590                     ipst->ips_ipv6firewall_physical_in,
 591                     ill, NULL, ip6h, mp, mp, ll_multicast, ipst, error);
 592 
 593                 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, mp);
 594 
 595                 if (mp == NULL)
 596                         return;
 597 
 598                 /* The length could have changed */
 599                 ip6h = (ip6_t *)mp->b_rptr;
 600                 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 601                 pkt_len = ira->ira_pktlen;
 602 
 603                 /*
 604                  * In case the destination changed we override any previous
 605                  * change to nexthop.
 606                  */
 607                 if (!IN6_ARE_ADDR_EQUAL(&orig_dst, &ip6h->ip6_dst))
 608                         nexthop = ip6h->ip6_dst;
 609 
 610                 if (IN6_IS_ADDR_UNSPECIFIED(&nexthop)) {
 611                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 612                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 613                         freemsg(mp);
 614                         return;
 615                 }
 616 
 617         }
 618 
 619         if (ipst->ips_ip6_observe.he_interested) {
 620                 zoneid_t dzone;
 621 
 622                 /*
 623                  * On the inbound path the src zone will be unknown as
 624                  * this packet has come from the wire.
 625                  */
 626                 dzone = ip_get_zoneid_v6(&nexthop, mp, ill, ira, ALL_ZONES);
 627                 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst);
 628         }
 629 
 630         if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) !=
 631             IPV6_DEFAULT_VERS_AND_FLOW) {
 632                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
 633                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion);
 634                 ip_drop_input("ipIfStatsInWrongIPVersion", mp, ill);
 635                 freemsg(mp);
 636                 return;
 637         }
 638 
 639         /*
 640          * For IPv6 we update ira_ip_hdr_length and ira_protocol as
 641          * we parse the headers, starting with the hop-by-hop options header.
 642          */
 643         ira->ira_ip_hdr_length = IPV6_HDR_LEN;
 644         if ((ira->ira_protocol = ip6h->ip6_nxt) == IPPROTO_HOPOPTS) {
 645                 ip6_hbh_t       *hbhhdr;
 646                 uint_t          ehdrlen;
 647                 uint8_t         *optptr;
 648 
 649                 if (pkt_len < IPV6_HDR_LEN + MIN_EHDR_LEN) {
 650                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
 651                         ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
 652                         freemsg(mp);
 653                         return;
 654                 }
 655                 if (mp->b_cont != NULL &&
 656                     rptr + IPV6_HDR_LEN + MIN_EHDR_LEN > mp->b_wptr) {
 657                         ip6h = ip_pullup(mp, IPV6_HDR_LEN + MIN_EHDR_LEN, ira);
 658                         if (ip6h == NULL) {
 659                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 660                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
 661                                 freemsg(mp);
 662                                 return;
 663                         }
 664                 }
 665                 hbhhdr = (ip6_hbh_t *)&ip6h[1];
 666                 ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
 667 
 668                 if (pkt_len < IPV6_HDR_LEN + ehdrlen) {
 669                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
 670                         ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
 671                         freemsg(mp);
 672                         return;
 673                 }
 674                 if (mp->b_cont != NULL &&
 675                     rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
 676                         ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
 677                         if (ip6h == NULL) {
 678                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 679                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
 680                                 freemsg(mp);
 681                                 return;
 682                         }
 683                         hbhhdr = (ip6_hbh_t *)&ip6h[1];
 684                 }
 685 
 686                 /*
 687                  * Update ira_ip_hdr_length to skip the hop-by-hop header
 688                  * once we get to ip_fanout_v6
 689                  */
 690                 ira->ira_ip_hdr_length += ehdrlen;
 691                 ira->ira_protocol = hbhhdr->ip6h_nxt;
 692 
 693                 optptr = (uint8_t *)&hbhhdr[1];
 694                 switch (ip_process_options_v6(mp, ip6h, optptr,
 695                     ehdrlen - 2, IPPROTO_HOPOPTS, ira)) {
 696                 case -1:
 697                         /*
 698                          * Packet has been consumed and any
 699                          * needed ICMP messages sent.
 700                          */
 701                         return;
 702                 case 0:
 703                         /* no action needed */
 704                         break;
 705                 case 1:
 706                         /*
 707                          * Known router alert. Make use handle it as local
 708                          * by setting the nexthop to be the all-host multicast
 709                          * address, and skip multicast membership filter by
 710                          * marking as a router alert.
 711                          */
 712                         ira->ira_flags |= IRAF_ROUTER_ALERT;
 713                         nexthop = ipv6_all_hosts_mcast;
 714                         break;
 715                 }
 716         }
 717 
 718         /*
 719          * Here we check to see if we machine is setup as
 720          * L3 loadbalancer and if the incoming packet is for a VIP
 721          *
 722          * Check the following:
 723          * - there is at least a rule
 724          * - protocol of the packet is supported
 725          *
 726          * We don't load balance IPv6 link-locals.
 727          */
 728         if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ira->ira_protocol) &&
 729             !IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 730                 in6_addr_t      lb_dst;
 731                 int             lb_ret;
 732 
 733                 /* For convenience, we just pull up the mblk. */
 734                 if (mp->b_cont != NULL) {
 735                         if (pullupmsg(mp, -1) == 0) {
 736                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 737                                 ip_drop_input("ipIfStatsInDiscards - pullupmsg",
 738                                     mp, ill);
 739                                 freemsg(mp);
 740                                 return;
 741                         }
 742                         ip6h = (ip6_t *)mp->b_rptr;
 743                 }
 744                 lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, ira->ira_protocol,
 745                     (uint8_t *)ip6h + ira->ira_ip_hdr_length, &lb_dst);
 746                 if (lb_ret == ILB_DROPPED) {
 747                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 748                         ip_drop_input("ILB_DROPPED", mp, ill);
 749                         freemsg(mp);
 750                         return;
 751                 }
 752                 if (lb_ret == ILB_BALANCED) {
 753                         /* Set the dst to that of the chosen server */
 754                         nexthop = lb_dst;
 755                         DB_CKSUMFLAGS(mp) = 0;
 756                 }
 757         }
 758 
 759         if (ill->ill_flags & ILLF_ROUTER)
 760                 irr_flags = IRR_ALLOCATE;
 761         else
 762                 irr_flags = IRR_NONE;
 763 
 764         /* Can not use route cache with TX since the labels can differ */
 765         if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
 766                 if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
 767                         ire = ire_multicast(ill);
 768                 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 769                         ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
 770                             ipst);
 771                 } else {
 772                         /* Match destination and label */
 773                         ire = ire_route_recursive_v6(&nexthop, 0, NULL,
 774                             ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR,
 775                             irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL,
 776                             NULL);
 777                 }
 778                 /* Update the route cache so we do the ire_refrele */
 779                 ASSERT(ire != NULL);
 780                 if (rtc->rtc_ire != NULL)
 781                         ire_refrele(rtc->rtc_ire);
 782                 rtc->rtc_ire = ire;
 783                 rtc->rtc_ip6addr = nexthop;
 784         } else if (IN6_ARE_ADDR_EQUAL(&nexthop, &rtc->rtc_ip6addr) &&
 785             rtc->rtc_ire != NULL) {
 786                 /* Use the route cache */
 787                 ire = rtc->rtc_ire;
 788         } else {
 789                 /* Update the route cache */
 790                 if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
 791                         ire = ire_multicast(ill);
 792                 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 793                         ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
 794                             ipst);
 795                 } else {
 796                         ire = ire_route_recursive_dstonly_v6(&nexthop,
 797                             irr_flags, ira->ira_xmit_hint, ipst);
 798                 }
 799                 ASSERT(ire != NULL);
 800                 if (rtc->rtc_ire != NULL)
 801                         ire_refrele(rtc->rtc_ire);
 802                 rtc->rtc_ire = ire;
 803                 rtc->rtc_ip6addr = nexthop;
 804         }
 805 
 806         ire->ire_ib_pkt_count++;
 807 
 808         /*
 809          * Based on ire_type and ire_flags call one of:
 810          *      ire_recv_local_v6 - for IRE_LOCAL
 811          *      ire_recv_loopback_v6 - for IRE_LOOPBACK
 812          *      ire_recv_multirt_v6 - if RTF_MULTIRT
 813          *      ire_recv_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE
 814          *      ire_recv_multicast_v6 - for IRE_MULTICAST
 815          *      ire_recv_noaccept_v6 - for ire_noaccept ones
 816          *      ire_recv_forward_v6 - for the rest.
 817          */
 818 
 819         (*ire->ire_recvfn)(ire, mp, ip6h, ira);
 820 }
 821 #undef rptr
 822 
 823 /*
 824  * ire_recvfn for IREs that need forwarding
 825  */
 826 void
 827 ire_recv_forward_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
 828 {
 829         ip6_t           *ip6h = (ip6_t *)iph_arg;
 830         ill_t           *ill = ira->ira_ill;
 831         ip_stack_t      *ipst = ill->ill_ipst;
 832         iaflags_t       iraflags = ira->ira_flags;
 833         ill_t           *dst_ill;
 834         nce_t           *nce;
 835         uint32_t        added_tx_len;
 836         uint32_t        mtu, iremtu;
 837 
 838         if (iraflags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
 839                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 840                 ip_drop_input("l2 multicast not forwarded", mp, ill);
 841                 freemsg(mp);
 842                 return;
 843         }
 844 
 845         if (!(ill->ill_flags & ILLF_ROUTER)) {
 846                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 847                 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
 848                 freemsg(mp);
 849                 return;
 850         }
 851 
 852         /*
 853          * Either ire_nce_capable or ire_dep_parent would be set for the IRE
 854          * when it is found by ire_route_recursive, but that some other thread
 855          * could have changed the routes with the effect of clearing
 856          * ire_dep_parent. In that case we'd end up dropping the packet, or
 857          * finding a new nce below.
 858          * Get, allocate, or update the nce.
 859          * We get a refhold on ire_nce_cache as a result of this to avoid races
 860          * where ire_nce_cache is deleted.
 861          *
 862          * This ensures that we don't forward if the interface is down since
 863          * ipif_down removes all the nces.
 864          */
 865         mutex_enter(&ire->ire_lock);
 866         nce = ire->ire_nce_cache;
 867         if (nce == NULL) {
 868                 /* Not yet set up - try to set one up */
 869                 mutex_exit(&ire->ire_lock);
 870                 (void) ire_revalidate_nce(ire);
 871                 mutex_enter(&ire->ire_lock);
 872                 nce = ire->ire_nce_cache;
 873                 if (nce == NULL) {
 874                         mutex_exit(&ire->ire_lock);
 875                         /* The ire_dep_parent chain went bad, or no memory */
 876                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 877                         ip_drop_input("No ire_dep_parent", mp, ill);
 878                         freemsg(mp);
 879                         return;
 880                 }
 881         }
 882         nce_refhold(nce);
 883         mutex_exit(&ire->ire_lock);
 884 
 885         if (nce->nce_is_condemned) {
 886                 nce_t *nce1;
 887 
 888                 nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_FALSE);
 889                 nce_refrele(nce);
 890                 if (nce1 == NULL) {
 891                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 892                         ip_drop_input("No nce", mp, ill);
 893                         freemsg(mp);
 894                         return;
 895                 }
 896                 nce = nce1;
 897         }
 898         dst_ill = nce->nce_ill;
 899 
 900         /*
 901          * Unless we are forwarding, drop the packet.
 902          * Unlike IPv4 we don't allow source routed packets out the same
 903          * interface when we are not a router.
 904          * Note that ill_forward_set() will set the ILLF_ROUTER on
 905          * all the group members when it gets an ipmp-ill or under-ill.
 906          */
 907         if (!(dst_ill->ill_flags & ILLF_ROUTER)) {
 908                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 909                 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
 910                 freemsg(mp);
 911                 nce_refrele(nce);
 912                 return;
 913         }
 914 
 915         if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) {
 916                 ire->ire_ib_pkt_count--;
 917                 /*
 918                  * Should only use IREs that are visible from the
 919                  * global zone for forwarding.
 920                  * For IPv6 any source route would have already been
 921                  * advanced in ip_fanout_v6
 922                  */
 923                 ire = ire_route_recursive_v6(&ip6h->ip6_dst, 0, NULL,
 924                     GLOBAL_ZONEID, ira->ira_tsl, MATCH_IRE_SECATTR,
 925                     (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE,
 926                     ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 927                 ire->ire_ib_pkt_count++;
 928                 (*ire->ire_recvfn)(ire, mp, ip6h, ira);
 929                 ire_refrele(ire);
 930                 nce_refrele(nce);
 931                 return;
 932         }
 933         /*
 934          * ipIfStatsHCInForwDatagrams should only be increment if there
 935          * will be an attempt to forward the packet, which is why we
 936          * increment after the above condition has been checked.
 937          */
 938         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
 939 
 940         /* Initiate Read side IPPF processing */
 941         if (IPP_ENABLED(IPP_FWD_IN, ipst)) {
 942                 /* ip_process translates an IS_UNDER_IPMP */
 943                 mp = ip_process(IPP_FWD_IN, mp, ill, ill);
 944                 if (mp == NULL) {
 945                         /* ip_drop_packet and MIB done */
 946                         ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred "
 947                             "during IPPF processing\n"));
 948                         nce_refrele(nce);
 949                         return;
 950                 }
 951         }
 952 
 953         DTRACE_PROBE4(ip6__forwarding__start,
 954             ill_t *, ill, ill_t *, dst_ill, ip6_t *, ip6h, mblk_t *, mp);
 955 
 956         if (HOOKS6_INTERESTED_FORWARDING(ipst)) {
 957                 int     error;
 958 
 959                 FW_HOOKS(ipst->ips_ip6_forwarding_event,
 960                     ipst->ips_ipv6firewall_forwarding,
 961                     ill, dst_ill, ip6h, mp, mp, 0, ipst, error);
 962 
 963                 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp);
 964 
 965                 if (mp == NULL) {
 966                         nce_refrele(nce);
 967                         return;
 968                 }
 969                 /*
 970                  * Even if the destination was changed by the filter we use the
 971                  * forwarding decision that was made based on the address
 972                  * in ip_input.
 973                  */
 974 
 975                 /* Might have changed */
 976                 ip6h = (ip6_t *)mp->b_rptr;
 977                 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 978         }
 979 
 980         /* Packet is being forwarded. Turning off hwcksum flag. */
 981         DB_CKSUMFLAGS(mp) = 0;
 982 
 983         /*
 984          * Per RFC 3513 section 2.5.2, we must not forward packets with
 985          * an unspecified source address.
 986          * The loopback address check for both src and dst has already
 987          * been checked in ip_input_v6
 988          * In the future one can envision adding RPF checks using number 3.
 989          */
 990         switch (ipst->ips_src_check) {
 991         case 0:
 992                 break;
 993         case 1:
 994         case 2:
 995                 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) ||
 996                     IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
 997                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 998                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 999                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1000                         nce_refrele(nce);
1001                         freemsg(mp);
1002                         return;
1003                 }
1004                 break;
1005         }
1006 
1007         /*
1008          * Check to see if we're forwarding the packet to a
1009          * different link from which it came.  If so, check the
1010          * source and destination addresses since routers must not
1011          * forward any packets with link-local source or
1012          * destination addresses to other links.  Otherwise (if
1013          * we're forwarding onto the same link), conditionally send
1014          * a redirect message.
1015          */
1016         if (!IS_ON_SAME_LAN(dst_ill, ill)) {
1017                 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ||
1018                     IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) {
1019                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1020                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1021                         freemsg(mp);
1022                         nce_refrele(nce);
1023                         return;
1024                 }
1025                 /* TBD add site-local check at site boundary? */
1026         } else if (ipst->ips_ipv6_send_redirects) {
1027                 ip_send_potential_redirect_v6(mp, ip6h, ire, ira);
1028         }
1029 
1030         added_tx_len = 0;
1031         if (iraflags & IRAF_SYSTEM_LABELED) {
1032                 mblk_t          *mp1;
1033                 uint32_t        old_pkt_len = ira->ira_pktlen;
1034 
1035                 /*
1036                  * Check if it can be forwarded and add/remove
1037                  * CIPSO options as needed.
1038                  */
1039                 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) {
1040                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1041                         ip_drop_input("tsol_ip_forward", mp, ill);
1042                         freemsg(mp);
1043                         nce_refrele(nce);
1044                         return;
1045                 }
1046                 /*
1047                  * Size may have changed. Remember amount added in case
1048                  * ip_fragment needs to send an ICMP too big.
1049                  */
1050                 mp = mp1;
1051                 ip6h = (ip6_t *)mp->b_rptr;
1052                 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
1053                 ira->ira_ip_hdr_length = IPV6_HDR_LEN;
1054                 if (ira->ira_pktlen > old_pkt_len)
1055                         added_tx_len = ira->ira_pktlen - old_pkt_len;
1056         }
1057 
1058         mtu = dst_ill->ill_mtu;
1059         if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu)
1060                 mtu = iremtu;
1061         ip_forward_xmit_v6(nce, mp, ip6h, ira, mtu, added_tx_len);
1062         nce_refrele(nce);
1063         return;
1064 
1065 }
1066 
1067 /*
1068  * Used for sending out unicast and multicast packets that are
1069  * forwarded.
1070  */
1071 void
1072 ip_forward_xmit_v6(nce_t *nce, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira,
1073     uint32_t mtu, uint32_t added_tx_len)
1074 {
1075         ill_t           *dst_ill = nce->nce_ill;
1076         uint32_t        pkt_len;
1077         iaflags_t       iraflags = ira->ira_flags;
1078         ip_stack_t      *ipst = dst_ill->ill_ipst;
1079 
1080         if (ip6h->ip6_hops-- <= 1) {
1081                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1082                 ip_drop_input("ICMP6_TIME_EXCEED_TRANSIT", mp, ira->ira_ill);
1083                 icmp_time_exceeded_v6(mp, ICMP6_TIME_EXCEED_TRANSIT, B_FALSE,
1084                     ira);
1085                 return;
1086         }
1087 
1088         /* Initiate Write side IPPF processing before any fragmentation */
1089         if (IPP_ENABLED(IPP_FWD_OUT, ipst)) {
1090                 /* ip_process translates an IS_UNDER_IPMP */
1091                 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill);
1092                 if (mp == NULL) {
1093                         /* ip_drop_packet and MIB done */
1094                         ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred" \
1095                             " during IPPF processing\n"));
1096                         return;
1097                 }
1098         }
1099 
1100         pkt_len = ira->ira_pktlen;
1101 
1102         BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams);
1103 
1104         if (pkt_len > mtu) {
1105                 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails);
1106                 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill);
1107                 if (iraflags & IRAF_SYSTEM_LABELED) {
1108                         /*
1109                          * Remove any CIPSO option added by
1110                          * tsol_ip_forward, and make sure we report
1111                          * a path MTU so that there
1112                          * is room to add such a CIPSO option for future
1113                          * packets.
1114                          */
1115                         mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, AF_INET6);
1116                 }
1117                 icmp_pkt2big_v6(mp, mtu, B_TRUE, ira);
1118                 return;
1119         }
1120 
1121         ASSERT(pkt_len ==
1122             ntohs(((ip6_t *)mp->b_rptr)->ip6_plen) + IPV6_HDR_LEN);
1123 
1124         if (iraflags & IRAF_LOOPBACK_COPY) {
1125                 /*
1126                  * IXAF_NO_LOOP_ZONEID is not set hence 6th arg
1127                  * is don't care
1128                  */
1129                 (void) ip_postfrag_loopcheck(mp, nce,
1130                     (IXAF_LOOPBACK_COPY | IXAF_NO_DEV_FLOW_CTL),
1131                     pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1132         } else {
1133                 (void) ip_xmit(mp, nce, IXAF_NO_DEV_FLOW_CTL,
1134                     pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1135         }
1136 }
1137 
1138 /*
1139  * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE,
1140  * which is what ire_route_recursive returns when there is no matching ire.
1141  * Send ICMP unreachable unless blackhole.
1142  */
1143 void
1144 ire_recv_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1145 {
1146         ip6_t           *ip6h = (ip6_t *)iph_arg;
1147         ill_t           *ill = ira->ira_ill;
1148         ip_stack_t      *ipst = ill->ill_ipst;
1149 
1150         /* Would we have forwarded this packet if we had a route? */
1151         if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
1152                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1153                 ip_drop_input("l2 multicast not forwarded", mp, ill);
1154                 freemsg(mp);
1155                 return;
1156         }
1157 
1158         if (!(ill->ill_flags & ILLF_ROUTER)) {
1159                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1160                 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
1161                 freemsg(mp);
1162                 return;
1163         }
1164         /*
1165          * If we had a route this could have been forwarded. Count as such.
1166          *
1167          * ipIfStatsHCInForwDatagrams should only be increment if there
1168          * will be an attempt to forward the packet, which is why we
1169          * increment after the above condition has been checked.
1170          */
1171         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
1172 
1173         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1174 
1175         ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0, RTA_DST,
1176             ipst);
1177 
1178         if (ire->ire_flags & RTF_BLACKHOLE) {
1179                 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill);
1180                 freemsg(mp);
1181         } else {
1182                 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill);
1183 
1184                 icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE,
1185                     ira);
1186         }
1187 }
1188 
1189 /*
1190  * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for
1191  * VRRP when in noaccept mode.
1192  * We silently drop packets except for Neighbor Solicitations and
1193  * Neighbor Advertisements.
1194  */
1195 void
1196 ire_recv_noaccept_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1197     ip_recv_attr_t *ira)
1198 {
1199         ip6_t           *ip6h = (ip6_t *)iph_arg;
1200         ill_t           *ill = ira->ira_ill;
1201         icmp6_t         *icmp6;
1202         int             ip_hdr_length;
1203 
1204         if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
1205                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1206                 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1207                 freemsg(mp);
1208                 return;
1209         }
1210         ip_hdr_length = ira->ira_ip_hdr_length;
1211         if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
1212                 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
1213                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
1214                         ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
1215                         freemsg(mp);
1216                         return;
1217                 }
1218                 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
1219                 if (ip6h == NULL) {
1220                         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1221                         freemsg(mp);
1222                         return;
1223                 }
1224         }
1225         icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
1226 
1227         if (icmp6->icmp6_type != ND_NEIGHBOR_SOLICIT &&
1228             icmp6->icmp6_type != ND_NEIGHBOR_ADVERT) {
1229                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1230                 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1231                 freemsg(mp);
1232                 return;
1233         }
1234         ire_recv_local_v6(ire, mp, ip6h, ira);
1235 }
1236 
1237 /*
1238  * ire_recvfn for IRE_MULTICAST.
1239  */
1240 void
1241 ire_recv_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1242     ip_recv_attr_t *ira)
1243 {
1244         ip6_t           *ip6h = (ip6_t *)iph_arg;
1245         ill_t           *ill = ira->ira_ill;
1246 
1247         ASSERT(ire->ire_ill == ira->ira_ill);
1248 
1249         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts);
1250         UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen);
1251 
1252         /* Tag for higher-level protocols */
1253         ira->ira_flags |= IRAF_MULTICAST;
1254 
1255         /*
1256          * So that we don't end up with dups, only one ill an IPMP group is
1257          * nominated to receive multicast traffic.
1258          * If we have no cast_ill we are liberal and accept everything.
1259          */
1260         if (IS_UNDER_IPMP(ill)) {
1261                 ip_stack_t      *ipst = ill->ill_ipst;
1262 
1263                 /* For an under ill_grp can change under lock */
1264                 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1265                 if (!ill->ill_nom_cast && ill->ill_grp != NULL &&
1266                     ill->ill_grp->ig_cast_ill != NULL) {
1267                         rw_exit(&ipst->ips_ill_g_lock);
1268                         ip_drop_input("not on cast ill", mp, ill);
1269                         freemsg(mp);
1270                         return;
1271                 }
1272                 rw_exit(&ipst->ips_ill_g_lock);
1273                 /*
1274                  * We switch to the upper ill so that mrouter and hasmembers
1275                  * can operate on upper here and in ip_input_multicast.
1276                  */
1277                 ill = ipmp_ill_hold_ipmp_ill(ill);
1278                 if (ill != NULL) {
1279                         ASSERT(ill != ira->ira_ill);
1280                         ASSERT(ire->ire_ill == ira->ira_ill);
1281                         ira->ira_ill = ill;
1282                         ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1283                 } else {
1284                         ill = ira->ira_ill;
1285                 }
1286         }
1287 
1288 #ifdef notdef
1289         /*
1290          * Check if we are a multicast router - send ip_mforward a copy of
1291          * the packet.
1292          * Due to mroute_decap tunnels we consider forwarding packets even if
1293          * mrouted has not joined the allmulti group on this interface.
1294          */
1295         if (ipst->ips_ip_g_mrouter) {
1296                 int retval;
1297 
1298                 /*
1299                  * Clear the indication that this may have hardware
1300                  * checksum as we are not using it for forwarding.
1301                  */
1302                 DB_CKSUMFLAGS(mp) = 0;
1303 
1304                 /*
1305                  * ip_mforward helps us make these distinctions: If received
1306                  * on tunnel and not IGMP, then drop.
1307                  * If IGMP packet, then don't check membership
1308                  * If received on a phyint and IGMP or PIM, then
1309                  * don't check membership
1310                  */
1311                 retval = ip_mforward_v6(mp, ira);
1312                 /* ip_mforward updates mib variables if needed */
1313 
1314                 switch (retval) {
1315                 case 0:
1316                         /*
1317                          * pkt is okay and arrived on phyint.
1318                          */
1319                         break;
1320                 case -1:
1321                         /* pkt is mal-formed, toss it */
1322                         freemsg(mp);
1323                         goto done;
1324                 case 1:
1325                         /*
1326                          * pkt is okay and arrived on a tunnel
1327                          *
1328                          * If we are running a multicast router
1329                          * we need to see all mld packets, which
1330                          * are marked with router alerts.
1331                          */
1332                         if (ira->ira_flags & IRAF_ROUTER_ALERT)
1333                                 goto forus;
1334                         ip_drop_input("Multicast on tunnel ignored", mp, ill);
1335                         freemsg(mp);
1336                         goto done;
1337                 }
1338         }
1339 #endif /* notdef */
1340 
1341         /*
1342          * If this was a router alert we skip the group membership check.
1343          */
1344         if (ira->ira_flags & IRAF_ROUTER_ALERT)
1345                 goto forus;
1346 
1347         /*
1348          * Check if we have members on this ill. This is not necessary for
1349          * correctness because even if the NIC/GLD had a leaky filter, we
1350          * filter before passing to each conn_t.
1351          */
1352         if (!ill_hasmembers_v6(ill, &ip6h->ip6_dst)) {
1353                 /*
1354                  * Nobody interested
1355                  *
1356                  * This might just be caused by the fact that
1357                  * multiple IP Multicast addresses map to the same
1358                  * link layer multicast - no need to increment counter!
1359                  */
1360                 ip_drop_input("Multicast with no members", mp, ill);
1361                 freemsg(mp);
1362                 goto done;
1363         }
1364 forus:
1365         ip2dbg(("ire_recv_multicast_v6: multicast for us\n"));
1366 
1367         /*
1368          * After reassembly and IPsec we will need to duplicate the
1369          * multicast packet for all matching zones on the ill.
1370          */
1371         ira->ira_zoneid = ALL_ZONES;
1372 
1373         /* Reassemble on the ill on which the packet arrived */
1374         ip_input_local_v6(ire, mp, ip6h, ira);
1375 done:
1376         if (ill != ire->ire_ill) {
1377                 ill_refrele(ill);
1378                 ira->ira_ill = ire->ire_ill;
1379                 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex;
1380         }
1381 }
1382 
1383 /*
1384  * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT.
1385  * Drop packets since we don't forward out multirt routes.
1386  */
1387 /* ARGSUSED */
1388 void
1389 ire_recv_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1390 {
1391         ill_t           *ill = ira->ira_ill;
1392 
1393         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1394         ip_drop_input("Not forwarding out MULTIRT", mp, ill);
1395         freemsg(mp);
1396 }
1397 
1398 /*
1399  * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK
1400  * has rewritten the packet to have a loopback destination address (We
1401  * filter out packet with a loopback destination from arriving over the wire).
1402  * We don't know what zone to use, thus we always use the GLOBAL_ZONEID.
1403  */
1404 void
1405 ire_recv_loopback_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1406 {
1407         ip6_t           *ip6h = (ip6_t *)iph_arg;
1408         ill_t           *ill = ira->ira_ill;
1409         ill_t           *ire_ill = ire->ire_ill;
1410 
1411         ira->ira_zoneid = GLOBAL_ZONEID;
1412 
1413         /* Switch to the lo0 ill for further processing  */
1414         if (ire_ill != ill) {
1415                 /*
1416                  * Update ira_ill to be the ILL on which the IP address
1417                  * is hosted.
1418                  * No need to hold the ill since we have a hold on the ire
1419                  */
1420                 ASSERT(ira->ira_ill == ira->ira_rill);
1421                 ira->ira_ill = ire_ill;
1422 
1423                 ip_input_local_v6(ire, mp, ip6h, ira);
1424 
1425                 /* Restore */
1426                 ASSERT(ira->ira_ill == ire_ill);
1427                 ira->ira_ill = ill;
1428                 return;
1429 
1430         }
1431         ip_input_local_v6(ire, mp, ip6h, ira);
1432 }
1433 
1434 /*
1435  * ire_recvfn for IRE_LOCAL.
1436  */
1437 void
1438 ire_recv_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1439 {
1440         ip6_t           *ip6h = (ip6_t *)iph_arg;
1441         ill_t           *ill = ira->ira_ill;
1442         ill_t           *ire_ill = ire->ire_ill;
1443 
1444         /* Make a note for DAD that this address is in use */
1445         ire->ire_last_used_time = LBOLT_FASTPATH;
1446 
1447         /* Only target the IRE_LOCAL with the right zoneid. */
1448         ira->ira_zoneid = ire->ire_zoneid;
1449 
1450         /*
1451          * If the packet arrived on the wrong ill, we check that
1452          * this is ok.
1453          * If it is, then we ensure that we do the reassembly on
1454          * the ill on which the address is hosted. We keep ira_rill as
1455          * the one on which the packet arrived, so that IP_PKTINFO and
1456          * friends can report this.
1457          */
1458         if (ire_ill != ill) {
1459                 ire_t *new_ire;
1460 
1461                 new_ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill);
1462                 if (new_ire == NULL) {
1463                         /* Drop packet */
1464                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1465                         ip_drop_input("ipIfStatsInForwProhibits", mp, ill);
1466                         freemsg(mp);
1467                         return;
1468                 }
1469                 /*
1470                  * Update ira_ill to be the ILL on which the IP address
1471                  * is hosted. No need to hold the ill since we have a
1472                  * hold on the ire. Note that we do the switch even if
1473                  * new_ire == ire (for IPMP, ire would be the one corresponding
1474                  * to the IPMP ill).
1475                  */
1476                 ASSERT(ira->ira_ill == ira->ira_rill);
1477                 ira->ira_ill = new_ire->ire_ill;
1478 
1479                 /* ira_ruifindex tracks the upper for ira_rill */
1480                 if (IS_UNDER_IPMP(ill))
1481                         ira->ira_ruifindex = ill_get_upper_ifindex(ill);
1482 
1483                 ip_input_local_v6(new_ire, mp, ip6h, ira);
1484 
1485                 /* Restore */
1486                 ASSERT(ira->ira_ill == new_ire->ire_ill);
1487                 ira->ira_ill = ill;
1488                 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1489 
1490                 if (new_ire != ire)
1491                         ire_refrele(new_ire);
1492                 return;
1493         }
1494 
1495         ip_input_local_v6(ire, mp, ip6h, ira);
1496 }
1497 
1498 /*
1499  * Common function for packets arriving for the host. Handles
1500  * checksum verification, reassembly checks, etc.
1501  */
1502 static void
1503 ip_input_local_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1504 {
1505         iaflags_t       iraflags = ira->ira_flags;
1506 
1507         /*
1508          * For multicast we need some extra work before
1509          * we call ip_fanout_v6(), since in the case of shared-IP zones
1510          * we need to pretend that a packet arrived for each zoneid.
1511          */
1512         if (iraflags & IRAF_MULTICAST) {
1513                 ip_input_multicast_v6(ire, mp, ip6h, ira);
1514                 return;
1515         }
1516         ip_fanout_v6(mp, ip6h, ira);
1517 }
1518 
1519 /*
1520  * Handle multiple zones which want to receive the same multicast packets
1521  * on this ill by delivering a packet to each of them.
1522  *
1523  * Note that for packets delivered to transports we could instead do this
1524  * as part of the fanout code, but since we need to handle icmp_inbound
1525  * it is simpler to have multicast work the same as IPv4 broadcast.
1526  *
1527  * The ip_fanout matching for multicast matches based on ilm independent of
1528  * zoneid since the zoneid restriction is applied when joining a multicast
1529  * group.
1530  */
1531 /* ARGSUSED */
1532 static void
1533 ip_input_multicast_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1534 {
1535         ill_t           *ill = ira->ira_ill;
1536         iaflags_t       iraflags = ira->ira_flags;
1537         ip_stack_t      *ipst = ill->ill_ipst;
1538         netstack_t      *ns = ipst->ips_netstack;
1539         zoneid_t        zoneid;
1540         mblk_t          *mp1;
1541         ip6_t           *ip6h1;
1542         uint_t          ira_pktlen = ira->ira_pktlen;
1543         uint16_t        ira_ip_hdr_length = ira->ira_ip_hdr_length;
1544 
1545         /* ire_recv_multicast has switched to the upper ill for IPMP */
1546         ASSERT(!IS_UNDER_IPMP(ill));
1547 
1548         /*
1549          * If we don't have more than one shared-IP zone, or if
1550          * there are no members in anything but the global zone,
1551          * then just set the zoneid and proceed.
1552          */
1553         if (ns->netstack_numzones == 1 ||
1554             !ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst,
1555             GLOBAL_ZONEID)) {
1556                 ira->ira_zoneid = GLOBAL_ZONEID;
1557 
1558                 /* If sender didn't want this zone to receive it, drop */
1559                 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1560                     ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1561                         ip_drop_input("Multicast but wrong zoneid", mp, ill);
1562                         freemsg(mp);
1563                         return;
1564                 }
1565                 ip_fanout_v6(mp, ip6h, ira);
1566                 return;
1567         }
1568 
1569         /*
1570          * Here we loop over all zoneids that have members in the group
1571          * and deliver a packet to ip_fanout for each zoneid.
1572          *
1573          * First find any members in the lowest numeric zoneid by looking for
1574          * first zoneid larger than -1 (ALL_ZONES).
1575          * We terminate the loop when we receive -1 (ALL_ZONES).
1576          */
1577         zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, ALL_ZONES);
1578         for (; zoneid != ALL_ZONES;
1579             zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, zoneid)) {
1580                 /*
1581                  * Avoid an extra copymsg/freemsg by skipping global zone here
1582                  * and doing that at the end.
1583                  */
1584                 if (zoneid == GLOBAL_ZONEID)
1585                         continue;
1586 
1587                 ira->ira_zoneid = zoneid;
1588 
1589                 /* If sender didn't want this zone to receive it, skip */
1590                 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1591                     ira->ira_no_loop_zoneid == ira->ira_zoneid)
1592                         continue;
1593 
1594                 mp1 = copymsg(mp);
1595                 if (mp1 == NULL) {
1596                         /* Failed to deliver to one zone */
1597                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1598                         ip_drop_input("ipIfStatsInDiscards", mp, ill);
1599                         continue;
1600                 }
1601                 ip6h1 = (ip6_t *)mp1->b_rptr;
1602                 ip_fanout_v6(mp1, ip6h1, ira);
1603                 /*
1604                  * IPsec might have modified ira_pktlen and ira_ip_hdr_length
1605                  * so we restore them for a potential next iteration
1606                  */
1607                 ira->ira_pktlen = ira_pktlen;
1608                 ira->ira_ip_hdr_length = ira_ip_hdr_length;
1609         }
1610 
1611         /* Do the main ire */
1612         ira->ira_zoneid = GLOBAL_ZONEID;
1613         /* If sender didn't want this zone to receive it, drop */
1614         if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1615             ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1616                 ip_drop_input("Multicast but wrong zoneid", mp, ill);
1617                 freemsg(mp);
1618         } else {
1619                 ip_fanout_v6(mp, ip6h, ira);
1620         }
1621 }
1622 
1623 
1624 /*
1625  * Determine the zoneid and IRAF_TX_MAC_EXEMPTABLE if trusted extensions
1626  * is in use. Updates ira_zoneid and ira_flags as a result.
1627  */
1628 static void
1629 ip_fanout_tx_v6(mblk_t *mp, ip6_t *ip6h, uint8_t protocol, uint_t ip_hdr_length,
1630     ip_recv_attr_t *ira)
1631 {
1632         uint16_t        *up;
1633         uint16_t        lport;
1634         zoneid_t        zoneid;
1635 
1636         ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED);
1637 
1638         /*
1639          * If the packet is unlabeled we might allow read-down
1640          * for MAC_EXEMPT. Below we clear this if it is a multi-level
1641          * port (MLP).
1642          * Note that ira_tsl can be NULL here.
1643          */
1644         if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED)
1645                 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE;
1646 
1647         if (ira->ira_zoneid != ALL_ZONES)
1648                 return;
1649 
1650         ira->ira_flags |= IRAF_TX_SHARED_ADDR;
1651 
1652         up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
1653         switch (protocol) {
1654         case IPPROTO_TCP:
1655         case IPPROTO_SCTP:
1656         case IPPROTO_UDP:
1657                 /* Caller ensures this */
1658                 ASSERT(((uchar_t *)ip6h) + ip_hdr_length +4 <= mp->b_wptr);
1659 
1660                 /*
1661                  * Only these transports support MLP.
1662                  * We know their destination port numbers is in
1663                  * the same place in the header.
1664                  */
1665                 lport = up[1];
1666 
1667                 /*
1668                  * No need to handle exclusive-stack zones
1669                  * since ALL_ZONES only applies to the shared IP instance.
1670                  */
1671                 zoneid = tsol_mlp_findzone(protocol, lport);
1672                 /*
1673                  * If no shared MLP is found, tsol_mlp_findzone returns
1674                  * ALL_ZONES.  In that case, we assume it's SLP, and
1675                  * search for the zone based on the packet label.
1676                  *
1677                  * If there is such a zone, we prefer to find a
1678                  * connection in it.  Otherwise, we look for a
1679                  * MAC-exempt connection in any zone whose label
1680                  * dominates the default label on the packet.
1681                  */
1682                 if (zoneid == ALL_ZONES)
1683                         zoneid = tsol_attr_to_zoneid(ira);
1684                 else
1685                         ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE;
1686                 break;
1687         default:
1688                 /* Handle shared address for other protocols */
1689                 zoneid = tsol_attr_to_zoneid(ira);
1690                 break;
1691         }
1692         ira->ira_zoneid = zoneid;
1693 }
1694 
1695 /*
1696  * Increment checksum failure statistics
1697  */
1698 static void
1699 ip_input_cksum_err_v6(uint8_t protocol, uint16_t hck_flags, ill_t *ill)
1700 {
1701         ip_stack_t      *ipst = ill->ill_ipst;
1702 
1703         switch (protocol) {
1704         case IPPROTO_TCP:
1705                 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs);
1706 
1707                 if (hck_flags & HCK_FULLCKSUM)
1708                         IP6_STAT(ipst, ip6_tcp_in_full_hw_cksum_err);
1709                 else if (hck_flags & HCK_PARTIALCKSUM)
1710                         IP6_STAT(ipst, ip6_tcp_in_part_hw_cksum_err);
1711                 else
1712                         IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err);
1713                 break;
1714         case IPPROTO_UDP:
1715                 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1716                 if (hck_flags & HCK_FULLCKSUM)
1717                         IP6_STAT(ipst, ip6_udp_in_full_hw_cksum_err);
1718                 else if (hck_flags & HCK_PARTIALCKSUM)
1719                         IP6_STAT(ipst, ip6_udp_in_part_hw_cksum_err);
1720                 else
1721                         IP6_STAT(ipst, ip6_udp_in_sw_cksum_err);
1722                 break;
1723         case IPPROTO_ICMPV6:
1724                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
1725                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1726                 break;
1727         default:
1728                 ASSERT(0);
1729                 break;
1730         }
1731 }
1732 
1733 /* Calculate the IPv6 pseudo-header checksum for TCP, UDP, and ICMPV6 */
1734 uint32_t
1735 ip_input_cksum_pseudo_v6(ip6_t *ip6h, ip_recv_attr_t *ira)
1736 {
1737         uint_t          ulp_len;
1738         uint32_t        cksum;
1739         uint8_t         protocol = ira->ira_protocol;
1740         uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1741 
1742 #define iphs    ((uint16_t *)ip6h)
1743 
1744         switch (protocol) {
1745         case IPPROTO_TCP:
1746                 ulp_len = ira->ira_pktlen - ip_hdr_length;
1747 
1748                 /* Protocol and length */
1749                 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP;
1750                 /* IP addresses */
1751                 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1752                     iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1753                     iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1754                     iphs[16] + iphs[17] + iphs[18] + iphs[19];
1755                 break;
1756 
1757         case IPPROTO_UDP: {
1758                 udpha_t         *udpha;
1759 
1760                 udpha = (udpha_t  *)((uchar_t *)ip6h + ip_hdr_length);
1761 
1762                 /* Protocol and length */
1763                 cksum = udpha->uha_length + IP_UDP_CSUM_COMP;
1764                 /* IP addresses */
1765                 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1766                     iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1767                     iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1768                     iphs[16] + iphs[17] + iphs[18] + iphs[19];
1769                 break;
1770         }
1771         case IPPROTO_ICMPV6:
1772                 ulp_len = ira->ira_pktlen - ip_hdr_length;
1773 
1774                 /* Protocol and length */
1775                 cksum = htons(ulp_len) + IP_ICMPV6_CSUM_COMP;
1776                 /* IP addresses */
1777                 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1778                     iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1779                     iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1780                     iphs[16] + iphs[17] + iphs[18] + iphs[19];
1781                 break;
1782         default:
1783                 cksum = 0;
1784                 break;
1785         }
1786 #undef  iphs
1787         return (cksum);
1788 }
1789 
1790 
1791 /*
1792  * Software verification of the ULP checksums.
1793  * Returns B_TRUE if ok.
1794  * Increments statistics of failed.
1795  */
1796 static boolean_t
1797 ip_input_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1798 {
1799         ip_stack_t      *ipst = ira->ira_ill->ill_ipst;
1800         uint32_t        cksum;
1801         uint8_t         protocol = ira->ira_protocol;
1802         uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1803 
1804         IP6_STAT(ipst, ip6_in_sw_cksum);
1805 
1806         ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP ||
1807             protocol == IPPROTO_ICMPV6);
1808 
1809         cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1810         cksum = IP_CSUM(mp, ip_hdr_length, cksum);
1811         if (cksum == 0)
1812                 return (B_TRUE);
1813 
1814         ip_input_cksum_err_v6(protocol, 0, ira->ira_ill);
1815         return (B_FALSE);
1816 }
1817 
1818 /*
1819  * Verify the ULP checksums.
1820  * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum
1821  * algorithm.
1822  * Increments statistics if failed.
1823  */
1824 static boolean_t
1825 ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h,
1826     ip_recv_attr_t *ira)
1827 {
1828         ill_t           *ill = ira->ira_rill;
1829         uint16_t        hck_flags;
1830         uint32_t        cksum;
1831         mblk_t          *mp1;
1832         uint_t          len;
1833         uint8_t         protocol = ira->ira_protocol;
1834         uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1835 
1836 
1837         switch (protocol) {
1838         case IPPROTO_TCP:
1839         case IPPROTO_ICMPV6:
1840                 break;
1841 
1842         case IPPROTO_UDP: {
1843                 udpha_t         *udpha;
1844 
1845                 udpha = (udpha_t  *)((uchar_t *)ip6h + ip_hdr_length);
1846                 /*
1847                  *  Before going through the regular checksum
1848                  *  calculation, make sure the received checksum
1849                  *  is non-zero. RFC 2460 says, a 0x0000 checksum
1850                  *  in a UDP packet (within IPv6 packet) is invalid
1851                  *  and should be replaced by 0xffff. This makes
1852                  *  sense as regular checksum calculation will
1853                  *  pass for both the cases i.e. 0x0000 and 0xffff.
1854                  *  Removing one of the case makes error detection
1855                  *  stronger.
1856                  */
1857                 if (udpha->uha_checksum == 0) {
1858                         /* 0x0000 checksum is invalid */
1859                         BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1860                         return (B_FALSE);
1861                 }
1862                 break;
1863         }
1864         case IPPROTO_SCTP: {
1865                 sctp_hdr_t      *sctph;
1866                 uint32_t        pktsum;
1867 
1868                 sctph = (sctp_hdr_t *)((uchar_t *)ip6h + ip_hdr_length);
1869 #ifdef  DEBUG
1870                 if (skip_sctp_cksum)
1871                         return (B_TRUE);
1872 #endif
1873                 pktsum = sctph->sh_chksum;
1874                 sctph->sh_chksum = 0;
1875                 cksum = sctp_cksum(mp, ip_hdr_length);
1876                 sctph->sh_chksum = pktsum;
1877                 if (cksum == pktsum)
1878                         return (B_TRUE);
1879 
1880                 /*
1881                  * Defer until later whether a bad checksum is ok
1882                  * in order to allow RAW sockets to use Adler checksum
1883                  * with SCTP.
1884                  */
1885                 ira->ira_flags |= IRAF_SCTP_CSUM_ERR;
1886                 return (B_TRUE);
1887         }
1888 
1889         default:
1890                 /* No ULP checksum to verify. */
1891                 return (B_TRUE);
1892         }
1893 
1894         /*
1895          * Revert to software checksum calculation if the interface
1896          * isn't capable of checksum offload.
1897          * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout.
1898          * Note: IRAF_NO_HW_CKSUM is not currently used.
1899          */
1900         ASSERT(!IS_IPMP(ill));
1901         if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) ||
1902             !dohwcksum) {
1903                 return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1904         }
1905 
1906         /*
1907          * We apply this for all ULP protocols. Does the HW know to
1908          * not set the flags for SCTP and other protocols.
1909          */
1910 
1911         hck_flags = DB_CKSUMFLAGS(mp);
1912 
1913         if (hck_flags & HCK_FULLCKSUM_OK) {
1914                 /*
1915                  * Hardware has already verified the checksum.
1916                  */
1917                 return (B_TRUE);
1918         }
1919 
1920         if (hck_flags & HCK_FULLCKSUM) {
1921                 /*
1922                  * Full checksum has been computed by the hardware
1923                  * and has been attached.  If the driver wants us to
1924                  * verify the correctness of the attached value, in
1925                  * order to protect against faulty hardware, compare
1926                  * it against -0 (0xFFFF) to see if it's valid.
1927                  */
1928                 cksum = DB_CKSUM16(mp);
1929                 if (cksum == 0xFFFF)
1930                         return (B_TRUE);
1931                 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1932                 return (B_FALSE);
1933         }
1934 
1935         mp1 = mp->b_cont;
1936         if ((hck_flags & HCK_PARTIALCKSUM) &&
1937             (mp1 == NULL || mp1->b_cont == NULL) &&
1938             ip_hdr_length >= DB_CKSUMSTART(mp) &&
1939             ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) {
1940                 uint32_t        adj;
1941                 uchar_t         *cksum_start;
1942 
1943                 cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1944 
1945                 cksum_start = ((uchar_t *)ip6h + DB_CKSUMSTART(mp));
1946 
1947                 /*
1948                  * Partial checksum has been calculated by hardware
1949                  * and attached to the packet; in addition, any
1950                  * prepended extraneous data is even byte aligned,
1951                  * and there are at most two mblks associated with
1952                  * the packet.  If any such data exists, we adjust
1953                  * the checksum; also take care any postpended data.
1954                  */
1955                 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj);
1956                 /*
1957                  * One's complement subtract extraneous checksum
1958                  */
1959                 cksum += DB_CKSUM16(mp);
1960                 if (adj >= cksum)
1961                         cksum = ~(adj - cksum) & 0xFFFF;
1962                 else
1963                         cksum -= adj;
1964                 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1965                 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1966                 if (!(~cksum & 0xFFFF))
1967                         return (B_TRUE);
1968 
1969                 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1970                 return (B_FALSE);
1971         }
1972         return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1973 }
1974 
1975 
1976 /*
1977  * Handle fanout of received packets.
1978  * Unicast packets that are looped back (from ire_send_local_v6) and packets
1979  * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM.
1980  *
1981  * IPQoS Notes
1982  * Before sending it to the client, invoke IPPF processing. Policy processing
1983  * takes place only if the callout_position, IPP_LOCAL_IN, is enabled.
1984  */
1985 void
1986 ip_fanout_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1987 {
1988         ill_t           *ill = ira->ira_ill;
1989         iaflags_t       iraflags = ira->ira_flags;
1990         ip_stack_t      *ipst = ill->ill_ipst;
1991         uint8_t         protocol;
1992         conn_t          *connp;
1993 #define rptr    ((uchar_t *)ip6h)
1994         uint_t          ip_hdr_length;
1995         uint_t          min_ulp_header_length;
1996         int             offset;
1997         ssize_t         len;
1998         netstack_t      *ns = ipst->ips_netstack;
1999         ipsec_stack_t   *ipss = ns->netstack_ipsec;
2000         ill_t           *rill = ira->ira_rill;
2001 
2002         ASSERT(ira->ira_pktlen == ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN);
2003 
2004         /*
2005          * We repeat this as we parse over destination options header and
2006          * fragment headers (earlier we've handled any hop-by-hop options
2007          * header.)
2008          * We update ira_protocol and ira_ip_hdr_length as we skip past
2009          * the intermediate headers; they already point past any
2010          * hop-by-hop header.
2011          */
2012 repeat:
2013         protocol = ira->ira_protocol;
2014         ip_hdr_length = ira->ira_ip_hdr_length;
2015 
2016         /*
2017          * Time for IPP once we've done reassembly and IPsec.
2018          * We skip this for loopback packets since we don't do IPQoS
2019          * on loopback.
2020          */
2021         if (IPP_ENABLED(IPP_LOCAL_IN, ipst) &&
2022             !(iraflags & IRAF_LOOPBACK) &&
2023             (protocol != IPPROTO_ESP && protocol != IPPROTO_AH &&
2024             protocol != IPPROTO_DSTOPTS && protocol != IPPROTO_ROUTING &&
2025             protocol != IPPROTO_FRAGMENT)) {
2026                 /*
2027                  * Use the interface on which the packet arrived - not where
2028                  * the IP address is hosted.
2029                  */
2030                 /* ip_process translates an IS_UNDER_IPMP */
2031                 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill);
2032                 if (mp == NULL) {
2033                         /* ip_drop_packet and MIB done */
2034                         return;
2035                 }
2036         }
2037 
2038         /* Determine the minimum required size of the upper-layer header */
2039         /* Need to do this for at least the set of ULPs that TX handles. */
2040         switch (protocol) {
2041         case IPPROTO_TCP:
2042                 min_ulp_header_length = TCP_MIN_HEADER_LENGTH;
2043                 break;
2044         case IPPROTO_SCTP:
2045                 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH;
2046                 break;
2047         case IPPROTO_UDP:
2048                 min_ulp_header_length = UDPH_SIZE;
2049                 break;
2050         case IPPROTO_ICMP:
2051         case IPPROTO_ICMPV6:
2052                 min_ulp_header_length = ICMPH_SIZE;
2053                 break;
2054         case IPPROTO_FRAGMENT:
2055         case IPPROTO_DSTOPTS:
2056         case IPPROTO_ROUTING:
2057                 min_ulp_header_length = MIN_EHDR_LEN;
2058                 break;
2059         default:
2060                 min_ulp_header_length = 0;
2061                 break;
2062         }
2063         /* Make sure we have the min ULP header length */
2064         len = mp->b_wptr - rptr;
2065         if (len < ip_hdr_length + min_ulp_header_length) {
2066                 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length)
2067                         goto pkt_too_short;
2068 
2069                 IP6_STAT(ipst, ip6_recv_pullup);
2070                 ip6h = ip_pullup(mp, ip_hdr_length + min_ulp_header_length,
2071                     ira);
2072                 if (ip6h == NULL)
2073                         goto discard;
2074                 len = mp->b_wptr - rptr;
2075         }
2076 
2077         /*
2078          * If trusted extensions then determine the zoneid and TX specific
2079          * ira_flags.
2080          */
2081         if (iraflags & IRAF_SYSTEM_LABELED) {
2082                 /* This can update ira->ira_flags and ira->ira_zoneid */
2083                 ip_fanout_tx_v6(mp, ip6h, protocol, ip_hdr_length, ira);
2084                 iraflags = ira->ira_flags;
2085         }
2086 
2087 
2088         /* Verify ULP checksum. Handles TCP, UDP, and SCTP */
2089         if (iraflags & IRAF_VERIFY_ULP_CKSUM) {
2090                 if (!ip_input_cksum_v6(iraflags, mp, ip6h, ira)) {
2091                         /* Bad checksum. Stats are already incremented */
2092                         ip_drop_input("Bad ULP checksum", mp, ill);
2093                         freemsg(mp);
2094                         return;
2095                 }
2096                 /* IRAF_SCTP_CSUM_ERR could have been set */
2097                 iraflags = ira->ira_flags;
2098         }
2099         switch (protocol) {
2100         case IPPROTO_TCP:
2101                 /* For TCP, discard multicast packets. */
2102                 if (iraflags & IRAF_MULTIBROADCAST)
2103                         goto discard;
2104 
2105                 /* First mblk contains IP+TCP headers per above check */
2106                 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH);
2107 
2108                 /* TCP options present? */
2109                 offset = ((uchar_t *)ip6h)[ip_hdr_length + 12] >> 4;
2110                 if (offset != 5) {
2111                         if (offset < 5)
2112                                 goto discard;
2113 
2114                         /*
2115                          * There must be TCP options.
2116                          * Make sure we can grab them.
2117                          */
2118                         offset <<= 2;
2119                         offset += ip_hdr_length;
2120                         if (len < offset) {
2121                                 if (ira->ira_pktlen < offset)
2122                                         goto pkt_too_short;
2123 
2124                                 IP6_STAT(ipst, ip6_recv_pullup);
2125                                 ip6h = ip_pullup(mp, offset, ira);
2126                                 if (ip6h == NULL)
2127                                         goto discard;
2128                                 len = mp->b_wptr - rptr;
2129                         }
2130                 }
2131 
2132                 /*
2133                  * Pass up a squeue hint to tcp.
2134                  * If ira_sqp is already set (this is loopback) we leave it
2135                  * alone.
2136                  */
2137                 if (ira->ira_sqp == NULL) {
2138                         ira->ira_sqp = ip_squeue_get(ira->ira_ring);
2139                 }
2140 
2141                 /* Look for AF_INET or AF_INET6 that matches */
2142                 connp = ipcl_classify_v6(mp, IPPROTO_TCP, ip_hdr_length,
2143                     ira, ipst);
2144                 if (connp == NULL) {
2145                         /* Send the TH_RST */
2146                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2147                         tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2148                         return;
2149                 }
2150                 if (connp->conn_incoming_ifindex != 0 &&
2151                     connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2152                         CONN_DEC_REF(connp);
2153 
2154                         /* Send the TH_RST */
2155                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2156                         tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2157                         return;
2158                 }
2159                 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2160                     (iraflags & IRAF_IPSEC_SECURE)) {
2161                         mp = ipsec_check_inbound_policy(mp, connp,
2162                             NULL, ip6h, ira);
2163                         if (mp == NULL) {
2164                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2165                                 /* Note that mp is NULL */
2166                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2167                                 CONN_DEC_REF(connp);
2168                                 return;
2169                         }
2170                 }
2171                 /* Found a client; up it goes */
2172                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2173                 ira->ira_ill = ira->ira_rill = NULL;
2174                 if (!IPCL_IS_TCP(connp)) {
2175                         /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
2176                         (connp->conn_recv)(connp, mp, NULL, ira);
2177                         CONN_DEC_REF(connp);
2178                         ira->ira_ill = ill;
2179                         ira->ira_rill = rill;
2180                         return;
2181                 }
2182 
2183                 /*
2184                  * We do different processing whether called from
2185                  * ip_accept_tcp and we match the target, don't match
2186                  * the target, and when we are called by ip_input.
2187                  */
2188                 if (iraflags & IRAF_TARGET_SQP) {
2189                         if (ira->ira_target_sqp == connp->conn_sqp) {
2190                                 mblk_t  *attrmp;
2191 
2192                                 attrmp = ip_recv_attr_to_mblk(ira);
2193                                 if (attrmp == NULL) {
2194                                         BUMP_MIB(ill->ill_ip_mib,
2195                                             ipIfStatsInDiscards);
2196                                         ip_drop_input("ipIfStatsInDiscards",
2197                                             mp, ill);
2198                                         freemsg(mp);
2199                                         CONN_DEC_REF(connp);
2200                                 } else {
2201                                         SET_SQUEUE(attrmp, connp->conn_recv,
2202                                             connp);
2203                                         attrmp->b_cont = mp;
2204                                         ASSERT(ira->ira_target_sqp_mp == NULL);
2205                                         ira->ira_target_sqp_mp = attrmp;
2206                                         /*
2207                                          * Conn ref release when drained from
2208                                          * the squeue.
2209                                          */
2210                                 }
2211                         } else {
2212                                 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
2213                                     connp->conn_recv, connp, ira, SQ_FILL,
2214                                     SQTAG_IP6_TCP_INPUT);
2215                         }
2216                 } else {
2217                         SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv,
2218                             connp, ira, ip_squeue_flag, SQTAG_IP6_TCP_INPUT);
2219                 }
2220                 ira->ira_ill = ill;
2221                 ira->ira_rill = rill;
2222                 return;
2223 
2224         case IPPROTO_SCTP: {
2225                 sctp_hdr_t      *sctph;
2226                 uint32_t        ports;  /* Source and destination ports */
2227                 sctp_stack_t    *sctps = ipst->ips_netstack->netstack_sctp;
2228 
2229                 /* For SCTP, discard multicast packets. */
2230                 if (iraflags & IRAF_MULTIBROADCAST)
2231                         goto discard;
2232 
2233                 /*
2234                  * Since there is no SCTP h/w cksum support yet, just
2235                  * clear the flag.
2236                  */
2237                 DB_CKSUMFLAGS(mp) = 0;
2238 
2239                 /* Length ensured above */
2240                 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH);
2241                 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length);
2242 
2243                 /* get the ports */
2244                 ports = *(uint32_t *)&sctph->sh_sport;
2245 
2246                 if (iraflags & IRAF_SCTP_CSUM_ERR) {
2247                         /*
2248                          * No potential sctp checksum errors go to the Sun
2249                          * sctp stack however they might be Adler-32 summed
2250                          * packets a userland stack bound to a raw IP socket
2251                          * could reasonably use. Note though that Adler-32 is
2252                          * a long deprecated algorithm and customer sctp
2253                          * networks should eventually migrate to CRC-32 at
2254                          * which time this facility should be removed.
2255                          */
2256                         ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2257                         return;
2258                 }
2259                 connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, ports,
2260                     ira, mp, sctps, sctph);
2261                 if (connp == NULL) {
2262                         /* Check for raw socket or OOTB handling */
2263                         ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2264                         return;
2265                 }
2266                 if (connp->conn_incoming_ifindex != 0 &&
2267                     connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2268                         CONN_DEC_REF(connp);
2269 
2270                         /* Check for raw socket or OOTB handling */
2271                         ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2272                         return;
2273                 }
2274 
2275                 /* Found a client; up it goes */
2276                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2277                 sctp_input(connp, NULL, ip6h, mp, ira);
2278                 /* sctp_input does a rele of the sctp_t */
2279                 return;
2280         }
2281 
2282         case IPPROTO_UDP:
2283                 /* First mblk contains IP+UDP headers as checked above */
2284                 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE);
2285 
2286                 if (iraflags & IRAF_MULTIBROADCAST) {
2287                         uint16_t *up;   /* Pointer to ports in ULP header */
2288 
2289                         up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
2290 
2291                         ip_fanout_udp_multi_v6(mp, ip6h, up[1], up[0], ira);
2292                         return;
2293                 }
2294 
2295                 /* Look for AF_INET or AF_INET6 that matches */
2296                 connp = ipcl_classify_v6(mp, IPPROTO_UDP, ip_hdr_length,
2297                     ira, ipst);
2298                 if (connp == NULL) {
2299         no_udp_match:
2300                         if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].
2301                             connf_head != NULL) {
2302                                 ASSERT(ira->ira_protocol == IPPROTO_UDP);
2303                                 ip_fanout_proto_v6(mp, ip6h, ira);
2304                         } else {
2305                                 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2306                                     ICMP6_DST_UNREACH_NOPORT, ira);
2307                         }
2308                         return;
2309 
2310                 }
2311                 if (connp->conn_incoming_ifindex != 0 &&
2312                     connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2313                         CONN_DEC_REF(connp);
2314                         goto no_udp_match;
2315                 }
2316                 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld :
2317                     !canputnext(connp->conn_rq)) {
2318                         CONN_DEC_REF(connp);
2319                         BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
2320                         ip_drop_input("udpIfStatsInOverflows", mp, ill);
2321                         freemsg(mp);
2322                         return;
2323                 }
2324                 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2325                     (iraflags & IRAF_IPSEC_SECURE)) {
2326                         mp = ipsec_check_inbound_policy(mp, connp,
2327                             NULL, ip6h, ira);
2328                         if (mp == NULL) {
2329                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2330                                 /* Note that mp is NULL */
2331                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2332                                 CONN_DEC_REF(connp);
2333                                 return;
2334                         }
2335                 }
2336 
2337                 /* Found a client; up it goes */
2338                 IP6_STAT(ipst, ip6_udp_fannorm);
2339                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2340                 ira->ira_ill = ira->ira_rill = NULL;
2341                 (connp->conn_recv)(connp, mp, NULL, ira);
2342                 CONN_DEC_REF(connp);
2343                 ira->ira_ill = ill;
2344                 ira->ira_rill = rill;
2345                 return;
2346         default:
2347                 break;
2348         }
2349 
2350         /*
2351          * Clear hardware checksumming flag as it is currently only
2352          * used by TCP and UDP.
2353          */
2354         DB_CKSUMFLAGS(mp) = 0;
2355 
2356         switch (protocol) {
2357         case IPPROTO_ICMPV6:
2358                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
2359 
2360                 /* Check variable for testing applications */
2361                 if (ipst->ips_ipv6_drop_inbound_icmpv6) {
2362                         ip_drop_input("ipv6_drop_inbound_icmpv6", mp, ill);
2363                         freemsg(mp);
2364                         return;
2365                 }
2366                 /*
2367                  * We need to accomodate icmp messages coming in clear
2368                  * until we get everything secure from the wire. If
2369                  * icmp_accept_clear_messages is zero we check with
2370                  * the global policy and act accordingly. If it is
2371                  * non-zero, we accept the message without any checks.
2372                  * But *this does not mean* that this will be delivered
2373                  * to RAW socket clients. By accepting we might send
2374                  * replies back, change our MTU value etc.,
2375                  * but delivery to the ULP/clients depends on their
2376                  * policy dispositions.
2377                  */
2378                 if (ipst->ips_icmp_accept_clear_messages == 0) {
2379                         mp = ipsec_check_global_policy(mp, NULL,
2380                             NULL, ip6h, ira, ns);
2381                         if (mp == NULL)
2382                                 return;
2383                 }
2384 
2385                 /*
2386                  * On a labeled system, we have to check whether the zone
2387                  * itself is permitted to receive raw traffic.
2388                  */
2389                 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2390                         if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2391                                 BUMP_MIB(ill->ill_icmp6_mib,
2392                                     ipv6IfIcmpInErrors);
2393                                 ip_drop_input("tsol_can_accept_raw", mp, ill);
2394                                 freemsg(mp);
2395                                 return;
2396                         }
2397                 }
2398 
2399                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2400                 mp = icmp_inbound_v6(mp, ira);
2401                 if (mp == NULL) {
2402                         /* No need to pass to RAW sockets */
2403                         return;
2404                 }
2405                 break;
2406 
2407         case IPPROTO_DSTOPTS: {
2408                 ip6_dest_t      *desthdr;
2409                 uint_t          ehdrlen;
2410                 uint8_t         *optptr;
2411 
2412                 /* We already check for MIN_EHDR_LEN above */
2413 
2414                 /* Check if AH is present and needs to be processed. */
2415                 mp = ipsec_early_ah_v6(mp, ira);
2416                 if (mp == NULL)
2417                         return;
2418 
2419                 /*
2420                  * Reinitialize pointers, as ipsec_early_ah_v6() does
2421                  * complete pullups.  We don't have to do more pullups
2422                  * as a result.
2423                  */
2424                 ip6h = (ip6_t *)mp->b_rptr;
2425 
2426                 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2427                         goto pkt_too_short;
2428 
2429                 if (mp->b_cont != NULL &&
2430                     rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2431                         ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2432                         if (ip6h == NULL)
2433                                 goto discard;
2434                 }
2435                 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2436                 ehdrlen = 8 * (desthdr->ip6d_len + 1);
2437                 if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2438                         goto pkt_too_short;
2439                 if (mp->b_cont != NULL &&
2440                     rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2441                         ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2442                         if (ip6h == NULL)
2443                                 goto discard;
2444 
2445                         desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2446                 }
2447                 optptr = (uint8_t *)&desthdr[1];
2448 
2449                 /*
2450                  * Update ira_ip_hdr_length to skip the destination header
2451                  * when we repeat.
2452                  */
2453                 ira->ira_ip_hdr_length += ehdrlen;
2454 
2455                 ira->ira_protocol = desthdr->ip6d_nxt;
2456 
2457                 /*
2458                  * Note: XXX This code does not seem to make
2459                  * distinction between Destination Options Header
2460                  * being before/after Routing Header which can
2461                  * happen if we are at the end of source route.
2462                  * This may become significant in future.
2463                  * (No real significant Destination Options are
2464                  * defined/implemented yet ).
2465                  */
2466                 switch (ip_process_options_v6(mp, ip6h, optptr,
2467                     ehdrlen - 2, IPPROTO_DSTOPTS, ira)) {
2468                 case -1:
2469                         /*
2470                          * Packet has been consumed and any needed
2471                          * ICMP errors sent.
2472                          */
2473                         return;
2474                 case 0:
2475                         /* No action needed  continue */
2476                         break;
2477                 case 1:
2478                         /*
2479                          * Unnexpected return value
2480                          * (Router alert is a Hop-by-Hop option)
2481                          */
2482 #ifdef DEBUG
2483                         panic("ip_fanout_v6: router "
2484                             "alert hbh opt indication in dest opt");
2485                         /*NOTREACHED*/
2486 #else
2487                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2488                         ip_drop_input("ipIfStatsInDiscards", mp, ill);
2489                         freemsg(mp);
2490                         return;
2491 #endif
2492                 }
2493                 goto repeat;
2494         }
2495         case IPPROTO_FRAGMENT: {
2496                 ip6_frag_t *fraghdr;
2497 
2498                 if (ira->ira_pktlen - ip_hdr_length < sizeof (ip6_frag_t))
2499                         goto pkt_too_short;
2500 
2501                 if (mp->b_cont != NULL &&
2502                     rptr + ip_hdr_length + sizeof (ip6_frag_t) > mp->b_wptr) {
2503                         ip6h = ip_pullup(mp,
2504                             ip_hdr_length + sizeof (ip6_frag_t), ira);
2505                         if (ip6h == NULL)
2506                                 goto discard;
2507                 }
2508 
2509                 fraghdr = (ip6_frag_t *)(rptr + ip_hdr_length);
2510                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds);
2511 
2512                 /*
2513                  * Invoke the CGTP (multirouting) filtering module to
2514                  * process the incoming packet. Packets identified as
2515                  * duplicates must be discarded. Filtering is active
2516                  * only if the ip_cgtp_filter ndd variable is
2517                  * non-zero.
2518                  */
2519                 if (ipst->ips_ip_cgtp_filter &&
2520                     ipst->ips_ip_cgtp_filter_ops != NULL) {
2521                         int cgtp_flt_pkt;
2522                         netstackid_t stackid;
2523 
2524                         stackid = ipst->ips_netstack->netstack_stackid;
2525 
2526                         /*
2527                          * CGTP and IPMP are mutually exclusive so
2528                          * phyint_ifindex is fine here.
2529                          */
2530                         cgtp_flt_pkt =
2531                             ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6(
2532                             stackid, ill->ill_phyint->phyint_ifindex,
2533                             ip6h, fraghdr);
2534                         if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) {
2535                                 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill);
2536                                 freemsg(mp);
2537                                 return;
2538                         }
2539                 }
2540 
2541                 /*
2542                  * Update ip_hdr_length to skip the frag header
2543                  * ip_input_fragment_v6 will determine the extension header
2544                  * prior to the fragment header and update its nexthdr value,
2545                  * and also set ira_protocol to the nexthdr that follows the
2546                  * completed fragment.
2547                  */
2548                 ip_hdr_length += sizeof (ip6_frag_t);
2549 
2550                 /*
2551                  * Make sure we have ira_l2src before we loose the original
2552                  * mblk
2553                  */
2554                 if (!(ira->ira_flags & IRAF_L2SRC_SET))
2555                         ip_setl2src(mp, ira, ira->ira_rill);
2556 
2557                 mp = ip_input_fragment_v6(mp, ip6h, fraghdr,
2558                     ira->ira_pktlen - ip_hdr_length, ira);
2559                 if (mp == NULL) {
2560                         /* Reassembly is still pending */
2561                         return;
2562                 }
2563                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs);
2564 
2565                 /*
2566                  * The mblk chain has the frag header removed and
2567                  * ira_protocol, ira_pktlen, ira_ip_hdr_length as well as the
2568                  * IP header has been updated to refleact the result.
2569                  */
2570                 ip6h = (ip6_t *)mp->b_rptr;
2571                 ip_hdr_length = ira->ira_ip_hdr_length;
2572                 goto repeat;
2573         }
2574         case IPPROTO_HOPOPTS:
2575                 /*
2576                  * Illegal header sequence.
2577                  * (Hop-by-hop headers are processed above
2578                  *  and required to immediately follow IPv6 header)
2579                  */
2580                 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
2581                 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2582                 return;
2583 
2584         case IPPROTO_ROUTING: {
2585                 uint_t ehdrlen;
2586                 ip6_rthdr_t *rthdr;
2587 
2588                 /* Check if AH is present and needs to be processed. */
2589                 mp = ipsec_early_ah_v6(mp, ira);
2590                 if (mp == NULL)
2591                         return;
2592 
2593                 /*
2594                  * Reinitialize pointers, as ipsec_early_ah_v6() does
2595                  * complete pullups.  We don't have to do more pullups
2596                  * as a result.
2597                  */
2598                 ip6h = (ip6_t *)mp->b_rptr;
2599 
2600                 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2601                         goto pkt_too_short;
2602 
2603                 if (mp->b_cont != NULL &&
2604                     rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2605                         ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2606                         if (ip6h == NULL)
2607                                 goto discard;
2608                 }
2609                 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2610                 protocol = ira->ira_protocol = rthdr->ip6r_nxt;
2611                 ehdrlen = 8 * (rthdr->ip6r_len + 1);
2612                 if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2613                         goto pkt_too_short;
2614                 if (mp->b_cont != NULL &&
2615                     rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2616                         ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2617                         if (ip6h == NULL)
2618                                 goto discard;
2619                         rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2620                 }
2621                 if (rthdr->ip6r_segleft != 0) {
2622                         /* Not end of source route */
2623                         if (ira->ira_flags &
2624                             (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
2625                                 BUMP_MIB(ill->ill_ip_mib,
2626                                     ipIfStatsForwProhibits);
2627                                 ip_drop_input("ipIfStatsInForwProhibits",
2628                                     mp, ill);
2629                                 freemsg(mp);
2630                                 return;
2631                         }
2632                         ip_process_rthdr(mp, ip6h, rthdr, ira);
2633                         return;
2634                 }
2635                 ira->ira_ip_hdr_length += ehdrlen;
2636                 goto repeat;
2637         }
2638 
2639         case IPPROTO_AH:
2640         case IPPROTO_ESP: {
2641                 /*
2642                  * Fast path for AH/ESP.
2643                  */
2644                 netstack_t *ns = ipst->ips_netstack;
2645                 ipsec_stack_t *ipss = ns->netstack_ipsec;
2646 
2647                 IP_STAT(ipst, ipsec_proto_ahesp);
2648 
2649                 if (!ipsec_loaded(ipss)) {
2650                         ip_proto_not_sup(mp, ira);
2651                         return;
2652                 }
2653 
2654                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2655                 /* select inbound SA and have IPsec process the pkt */
2656                 if (protocol == IPPROTO_ESP) {
2657                         esph_t *esph;
2658 
2659                         mp = ipsec_inbound_esp_sa(mp, ira, &esph);
2660                         if (mp == NULL)
2661                                 return;
2662 
2663                         ASSERT(esph != NULL);
2664                         ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2665                         ASSERT(ira->ira_ipsec_esp_sa != NULL);
2666                         ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL);
2667 
2668                         mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph,
2669                             ira);
2670                 } else {
2671                         ah_t *ah;
2672 
2673                         mp = ipsec_inbound_ah_sa(mp, ira, &ah);
2674                         if (mp == NULL)
2675                                 return;
2676 
2677                         ASSERT(ah != NULL);
2678                         ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2679                         ASSERT(ira->ira_ipsec_ah_sa != NULL);
2680                         ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
2681                         mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah,
2682                             ira);
2683                 }
2684 
2685                 if (mp == NULL) {
2686                         /*
2687                          * Either it failed or is pending. In the former case
2688                          * ipIfStatsInDiscards was increased.
2689                          */
2690                         return;
2691                 }
2692                 /* we're done with IPsec processing, send it up */
2693                 ip_input_post_ipsec(mp, ira);
2694                 return;
2695         }
2696         case IPPROTO_NONE:
2697                 /* All processing is done. Count as "delivered". */
2698                 freemsg(mp);
2699                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2700                 return;
2701 
2702         case IPPROTO_ENCAP:
2703         case IPPROTO_IPV6:
2704                 /* iptun will verify trusted label */
2705                 connp = ipcl_classify_v6(mp, protocol, ip_hdr_length,
2706                     ira, ipst);
2707                 if (connp != NULL) {
2708                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2709                         ira->ira_ill = ira->ira_rill = NULL;
2710                         connp->conn_recv(connp, mp, NULL, ira);
2711                         CONN_DEC_REF(connp);
2712                         ira->ira_ill = ill;
2713                         ira->ira_rill = rill;
2714                         return;
2715                 }
2716                 /* FALLTHRU */
2717         default:
2718                 /*
2719                  * On a labeled system, we have to check whether the zone
2720                  * itself is permitted to receive raw traffic.
2721                  */
2722                 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2723                         if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2724                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2725                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2726                                 freemsg(mp);
2727                                 return;
2728                         }
2729                 }
2730                 break;
2731         }
2732 
2733         /*
2734          * The above input functions may have returned the pulled up message.
2735          * So ip6h need to be reinitialized.
2736          */
2737         ip6h = (ip6_t *)mp->b_rptr;
2738         ira->ira_protocol = protocol;
2739         if (ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head == NULL) {
2740                 /* No user-level listener for these packets packets */
2741                 ip_proto_not_sup(mp, ira);
2742                 return;
2743         }
2744 
2745         /*
2746          * Handle fanout to raw sockets.  There
2747          * can be more than one stream bound to a particular
2748          * protocol.  When this is the case, each one gets a copy
2749          * of any incoming packets.
2750          */
2751         ASSERT(ira->ira_protocol == protocol);
2752         ip_fanout_proto_v6(mp, ip6h, ira);
2753         return;
2754 
2755 pkt_too_short:
2756         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
2757         ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
2758         freemsg(mp);
2759         return;
2760 
2761 discard:
2762         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2763         ip_drop_input("ipIfStatsInDiscards", mp, ill);
2764         freemsg(mp);
2765 #undef rptr
2766 }