1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved
  24  *
  25  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  26  */
  27 /* Copyright (c) 1990 Mentat Inc. */
  28 
  29 #include <sys/types.h>
  30 #include <sys/stream.h>
  31 #include <sys/dlpi.h>
  32 #include <sys/stropts.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/strsubr.h>
  35 #include <sys/strlog.h>
  36 #include <sys/strsun.h>
  37 #include <sys/zone.h>
  38 #define _SUN_TPI_VERSION 2
  39 #include <sys/tihdr.h>
  40 #include <sys/xti_inet.h>
  41 #include <sys/ddi.h>
  42 #include <sys/sunddi.h>
  43 #include <sys/cmn_err.h>
  44 #include <sys/debug.h>
  45 #include <sys/kobj.h>
  46 #include <sys/modctl.h>
  47 #include <sys/atomic.h>
  48 #include <sys/policy.h>
  49 #include <sys/priv.h>
  50 
  51 #include <sys/systm.h>
  52 #include <sys/param.h>
  53 #include <sys/kmem.h>
  54 #include <sys/sdt.h>
  55 #include <sys/socket.h>
  56 #include <sys/vtrace.h>
  57 #include <sys/isa_defs.h>
  58 #include <sys/mac.h>
  59 #include <net/if.h>
  60 #include <net/if_arp.h>
  61 #include <net/route.h>
  62 #include <sys/sockio.h>
  63 #include <netinet/in.h>
  64 #include <net/if_dl.h>
  65 
  66 #include <inet/common.h>
  67 #include <inet/mi.h>
  68 #include <inet/mib2.h>
  69 #include <inet/nd.h>
  70 #include <inet/arp.h>
  71 #include <inet/snmpcom.h>
  72 #include <inet/kstatcom.h>
  73 
  74 #include <netinet/igmp_var.h>
  75 #include <netinet/ip6.h>
  76 #include <netinet/icmp6.h>
  77 #include <netinet/sctp.h>
  78 
  79 #include <inet/ip.h>
  80 #include <inet/ip_impl.h>
  81 #include <inet/ip6.h>
  82 #include <inet/ip6_asp.h>
  83 #include <inet/optcom.h>
  84 #include <inet/tcp.h>
  85 #include <inet/tcp_impl.h>
  86 #include <inet/ip_multi.h>
  87 #include <inet/ip_if.h>
  88 #include <inet/ip_ire.h>
  89 #include <inet/ip_ftable.h>
  90 #include <inet/ip_rts.h>
  91 #include <inet/ip_ndp.h>
  92 #include <inet/ip_listutils.h>
  93 #include <netinet/igmp.h>
  94 #include <netinet/ip_mroute.h>
  95 #include <inet/ipp_common.h>
  96 
  97 #include <net/pfkeyv2.h>
  98 #include <inet/sadb.h>
  99 #include <inet/ipsec_impl.h>
 100 #include <inet/ipdrop.h>
 101 #include <inet/ip_netinfo.h>
 102 #include <inet/ilb_ip.h>
 103 #include <sys/squeue_impl.h>
 104 #include <sys/squeue.h>
 105 
 106 #include <sys/ethernet.h>
 107 #include <net/if_types.h>
 108 #include <sys/cpuvar.h>
 109 
 110 #include <ipp/ipp.h>
 111 #include <ipp/ipp_impl.h>
 112 #include <ipp/ipgpc/ipgpc.h>
 113 
 114 #include <sys/pattr.h>
 115 #include <inet/ipclassifier.h>
 116 #include <inet/sctp_ip.h>
 117 #include <inet/sctp/sctp_impl.h>
 118 #include <inet/udp_impl.h>
 119 #include <sys/sunddi.h>
 120 
 121 #include <sys/tsol/label.h>
 122 #include <sys/tsol/tnet.h>
 123 
 124 #include <sys/clock_impl.h>       /* For LBOLT_FASTPATH{,64} */
 125 
 126 #ifdef  DEBUG
 127 extern boolean_t skip_sctp_cksum;
 128 #endif
 129 
 130 static void     ip_input_local_v6(ire_t *, mblk_t *, ip6_t *, ip_recv_attr_t *);
 131 
 132 static void     ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *,
 133     ip_recv_attr_t *);
 134 
 135 #pragma inline(ip_input_common_v6, ip_input_local_v6, ip_forward_xmit_v6)
 136 
 137 /*
 138  * Direct read side procedure capable of dealing with chains. GLDv3 based
 139  * drivers call this function directly with mblk chains while STREAMS
 140  * read side procedure ip_rput() calls this for single packet with ip_ring
 141  * set to NULL to process one packet at a time.
 142  *
 143  * The ill will always be valid if this function is called directly from
 144  * the driver.
 145  *
 146  * If ip_input_v6() is called from GLDv3:
 147  *
 148  *   - This must be a non-VLAN IP stream.
 149  *   - 'mp' is either an untagged or a special priority-tagged packet.
 150  *   - Any VLAN tag that was in the MAC header has been stripped.
 151  *
 152  * If the IP header in packet is not 32-bit aligned, every message in the
 153  * chain will be aligned before further operations. This is required on SPARC
 154  * platform.
 155  */
 156 void
 157 ip_input_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
 158     struct mac_header_info_s *mhip)
 159 {
 160         (void) ip_input_common_v6(ill, ip_ring, mp_chain, mhip, NULL, NULL,
 161             NULL);
 162 }
 163 
 164 /*
 165  * ip_accept_tcp_v6() - This function is called by the squeue when it retrieves
 166  * a chain of packets in the poll mode. The packets have gone through the
 167  * data link processing but not IP processing. For performance and latency
 168  * reasons, the squeue wants to process the chain in line instead of feeding
 169  * it back via ip_input path.
 170  *
 171  * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v6
 172  * will pass back any TCP packets matching the target sqp to
 173  * ip_input_common_v6 using ira_target_sqp_mp. Other packets are handled by
 174  * ip_input_v6 and ip_fanout_v6 as normal.
 175  * The TCP packets that match the target squeue are returned to the caller
 176  * as a b_next chain after each packet has been prepend with an mblk
 177  * from ip_recv_attr_to_mblk.
 178  */
 179 mblk_t *
 180 ip_accept_tcp_v6(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp,
 181     mblk_t *mp_chain, mblk_t **last, uint_t *cnt)
 182 {
 183         return (ip_input_common_v6(ill, ip_ring, mp_chain, NULL, target_sqp,
 184             last, cnt));
 185 }
 186 
 187 /*
 188  * Used by ip_input_v6 and ip_accept_tcp_v6
 189  * The last three arguments are only used by ip_accept_tcp_v6, and mhip is
 190  * only used by ip_input_v6.
 191  */
 192 mblk_t *
 193 ip_input_common_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
 194     struct mac_header_info_s *mhip, squeue_t *target_sqp,
 195     mblk_t **last, uint_t *cnt)
 196 {
 197         mblk_t          *mp;
 198         ip6_t           *ip6h;
 199         ip_recv_attr_t  iras;   /* Receive attributes */
 200         rtc_t           rtc;
 201         iaflags_t       chain_flags = 0;        /* Fixed for chain */
 202         mblk_t          *ahead = NULL;  /* Accepted head */
 203         mblk_t          *atail = NULL;  /* Accepted tail */
 204         uint_t          acnt = 0;       /* Accepted count */
 205 
 206         ASSERT(mp_chain != NULL);
 207         ASSERT(ill != NULL);
 208 
 209         /* These ones do not change as we loop over packets */
 210         iras.ira_ill = iras.ira_rill = ill;
 211         iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
 212         iras.ira_rifindex = iras.ira_ruifindex;
 213         iras.ira_sqp = NULL;
 214         iras.ira_ring = ip_ring;
 215         /* For ECMP and outbound transmit ring selection */
 216         iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring);
 217 
 218         iras.ira_target_sqp = target_sqp;
 219         iras.ira_target_sqp_mp = NULL;
 220         if (target_sqp != NULL)
 221                 chain_flags |= IRAF_TARGET_SQP;
 222 
 223         /*
 224          * We try to have a mhip pointer when possible, but
 225          * it might be NULL in some cases. In those cases we
 226          * have to assume unicast.
 227          */
 228         iras.ira_mhip = mhip;
 229         iras.ira_flags = 0;
 230         if (mhip != NULL) {
 231                 switch (mhip->mhi_dsttype) {
 232                 case MAC_ADDRTYPE_MULTICAST :
 233                         chain_flags |= IRAF_L2DST_MULTICAST;
 234                         break;
 235                 case MAC_ADDRTYPE_BROADCAST :
 236                         chain_flags |= IRAF_L2DST_BROADCAST;
 237                         break;
 238                 }
 239         }
 240 
 241         /*
 242          * Initialize the one-element route cache.
 243          *
 244          * We do ire caching from one iteration to
 245          * another. In the event the packet chain contains
 246          * all packets from the same dst, this caching saves
 247          * an ire_route_recursive for each of the succeeding
 248          * packets in a packet chain.
 249          */
 250         rtc.rtc_ire = NULL;
 251         rtc.rtc_ip6addr = ipv6_all_zeros;
 252 
 253         /* Loop over b_next */
 254         for (mp = mp_chain; mp != NULL; mp = mp_chain) {
 255                 mp_chain = mp->b_next;
 256                 mp->b_next = NULL;
 257 
 258                 /*
 259                  * if db_ref > 1 then copymsg and free original. Packet
 260                  * may be changed and we do not want the other entity
 261                  * who has a reference to this message to trip over the
 262                  * changes. This is a blind change because trying to
 263                  * catch all places that might change the packet is too
 264                  * difficult.
 265                  *
 266                  * This corresponds to the fast path case, where we have
 267                  * a chain of M_DATA mblks.  We check the db_ref count
 268                  * of only the 1st data block in the mblk chain. There
 269                  * doesn't seem to be a reason why a device driver would
 270                  * send up data with varying db_ref counts in the mblk
 271                  * chain. In any case the Fast path is a private
 272                  * interface, and our drivers don't do such a thing.
 273                  * Given the above assumption, there is no need to walk
 274                  * down the entire mblk chain (which could have a
 275                  * potential performance problem)
 276                  *
 277                  * The "(DB_REF(mp) > 1)" check was moved from ip_rput()
 278                  * to here because of exclusive ip stacks and vnics.
 279                  * Packets transmitted from exclusive stack over vnic
 280                  * can have db_ref > 1 and when it gets looped back to
 281                  * another vnic in a different zone, you have ip_input()
 282                  * getting dblks with db_ref > 1. So if someone
 283                  * complains of TCP performance under this scenario,
 284                  * take a serious look here on the impact of copymsg().
 285                  */
 286                 if (DB_REF(mp) > 1) {
 287                         if ((mp = ip_fix_dbref(mp, &iras)) == NULL)
 288                                 continue;
 289                 }
 290 
 291                 /*
 292                  * IP header ptr not aligned?
 293                  * OR IP header not complete in first mblk
 294                  */
 295                 ip6h = (ip6_t *)mp->b_rptr;
 296                 if (!OK_32PTR(ip6h) || MBLKL(mp) < IPV6_HDR_LEN) {
 297                         mp = ip_check_and_align_header(mp, IPV6_HDR_LEN, &iras);
 298                         if (mp == NULL)
 299                                 continue;
 300                         ip6h = (ip6_t *)mp->b_rptr;
 301                 }
 302 
 303                 /* Protect against a mix of Ethertypes and IP versions */
 304                 if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) {
 305                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
 306                         ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
 307                         freemsg(mp);
 308                         /* mhip might point into 1st packet in the chain. */
 309                         iras.ira_mhip = NULL;
 310                         continue;
 311                 }
 312 
 313                 /*
 314                  * Check for Martian addrs; we have to explicitly
 315                  * test for for zero dst since this is also used as
 316                  * an indication that the rtc is not used.
 317                  */
 318                 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_dst)) {
 319                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 320                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 321                         freemsg(mp);
 322                         /* mhip might point into 1st packet in the chain. */
 323                         iras.ira_mhip = NULL;
 324                         continue;
 325                 }
 326                 /*
 327                  * Keep L2SRC from a previous packet in chain since mhip
 328                  * might point into an earlier packet in the chain.
 329                  */
 330                 chain_flags |= (iras.ira_flags & IRAF_L2SRC_SET);
 331 
 332                 iras.ira_flags = IRAF_VERIFY_ULP_CKSUM | chain_flags;
 333                 iras.ira_free_flags = 0;
 334                 iras.ira_cred = NULL;
 335                 iras.ira_cpid = NOPID;
 336                 iras.ira_tsl = NULL;
 337                 iras.ira_zoneid = ALL_ZONES;    /* Default for forwarding */
 338 
 339                 /*
 340                  * We must count all incoming packets, even if they end
 341                  * up being dropped later on. Defer counting bytes until
 342                  * we have the whole IP header in first mblk.
 343                  */
 344                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
 345 
 346                 iras.ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 347                 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets,
 348                     iras.ira_pktlen);
 349 
 350                 /*
 351                  * Call one of:
 352                  *      ill_input_full_v6
 353                  *      ill_input_short_v6
 354                  * The former is used in the case of TX. See ill_set_inputfn().
 355                  */
 356                 (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc);
 357 
 358                 /* Any references to clean up? No hold on ira_ill */
 359                 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED))
 360                         ira_cleanup(&iras, B_FALSE);
 361 
 362                 if (iras.ira_target_sqp_mp != NULL) {
 363                         /* Better be called from ip_accept_tcp */
 364                         ASSERT(target_sqp != NULL);
 365 
 366                         /* Found one packet to accept */
 367                         mp = iras.ira_target_sqp_mp;
 368                         iras.ira_target_sqp_mp = NULL;
 369                         ASSERT(ip_recv_attr_is_mblk(mp));
 370 
 371                         if (atail != NULL)
 372                                 atail->b_next = mp;
 373                         else
 374                                 ahead = mp;
 375                         atail = mp;
 376                         acnt++;
 377                         mp = NULL;
 378                 }
 379                 /* mhip might point into 1st packet in the chain. */
 380                 iras.ira_mhip = NULL;
 381         }
 382         /* Any remaining references to the route cache? */
 383         if (rtc.rtc_ire != NULL) {
 384                 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr));
 385                 ire_refrele(rtc.rtc_ire);
 386         }
 387 
 388         if (ahead != NULL) {
 389                 /* Better be called from ip_accept_tcp */
 390                 ASSERT(target_sqp != NULL);
 391                 *last = atail;
 392                 *cnt = acnt;
 393                 return (ahead);
 394         }
 395 
 396         return (NULL);
 397 }
 398 
 399 /*
 400  * This input function is used when
 401  *  - is_system_labeled()
 402  *
 403  * Note that for IPv6 CGTP filtering is handled only when receiving fragment
 404  * headers, and RSVP uses router alert options, thus we don't need anything
 405  * extra for them.
 406  */
 407 void
 408 ill_input_full_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
 409     ip_recv_attr_t *ira, rtc_t *rtc)
 410 {
 411         ip6_t           *ip6h = (ip6_t *)iph_arg;
 412         in6_addr_t      *nexthop = (in6_addr_t *)nexthop_arg;
 413         ill_t           *ill = ira->ira_ill;
 414 
 415         ASSERT(ira->ira_tsl == NULL);
 416 
 417         /*
 418          * Attach any necessary label information to
 419          * this packet
 420          */
 421         if (is_system_labeled()) {
 422                 ira->ira_flags |= IRAF_SYSTEM_LABELED;
 423 
 424                 /*
 425                  * This updates ira_cred, ira_tsl and ira_free_flags based
 426                  * on the label.
 427                  */
 428                 if (!tsol_get_pkt_label(mp, IPV6_VERSION, ira)) {
 429                         if (ip6opt_ls != 0)
 430                                 ip0dbg(("tsol_get_pkt_label v6 failed\n"));
 431                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 432                         ip_drop_input("ipIfStatsInDiscards", mp, ill);
 433                         freemsg(mp);
 434                         return;
 435                 }
 436                 /* Note that ira_tsl can be NULL here. */
 437 
 438                 /* tsol_get_pkt_label sometimes does pullupmsg */
 439                 ip6h = (ip6_t *)mp->b_rptr;
 440         }
 441         ill_input_short_v6(mp, ip6h, nexthop, ira, rtc);
 442 }
 443 
 444 /*
 445  * Check for IPv6 addresses that should not appear on the wire
 446  * as either source or destination.
 447  * If we ever implement Stateless IPv6 Translators (SIIT) we'd have
 448  * to revisit the IPv4-mapped part.
 449  */
 450 static boolean_t
 451 ip6_bad_address(in6_addr_t *addr, boolean_t is_src)
 452 {
 453         if (IN6_IS_ADDR_V4MAPPED(addr)) {
 454                 ip1dbg(("ip_input_v6: pkt with IPv4-mapped addr"));
 455                 return (B_TRUE);
 456         }
 457         if (IN6_IS_ADDR_LOOPBACK(addr)) {
 458                 ip1dbg(("ip_input_v6: pkt with loopback addr"));
 459                 return (B_TRUE);
 460         }
 461         if (!is_src && IN6_IS_ADDR_UNSPECIFIED(addr)) {
 462                 /*
 463                  * having :: in the src is ok: it's used for DAD.
 464                  */
 465                 ip1dbg(("ip_input_v6: pkt with unspecified addr"));
 466                 return (B_TRUE);
 467         }
 468         return (B_FALSE);
 469 }
 470 
 471 /*
 472  * Routing lookup for IPv6 link-locals.
 473  * First we look on the inbound interface, then we check for IPMP and
 474  * look on the upper interface.
 475  * We update ira_ruifindex if we find the IRE on the upper interface.
 476  */
 477 static ire_t *
 478 ire_linklocal(const in6_addr_t *nexthop, ill_t *ill, ip_recv_attr_t *ira,
 479     uint_t irr_flags, ip_stack_t *ipst)
 480 {
 481         int match_flags = MATCH_IRE_SECATTR | MATCH_IRE_ILL;
 482         ire_t *ire;
 483 
 484         ASSERT(IN6_IS_ADDR_LINKLOCAL(nexthop));
 485         ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
 486             match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 487         if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
 488             !IS_UNDER_IPMP(ill))
 489                 return (ire);
 490 
 491         /*
 492          * When we are using IMP we need to look for an IRE on both the
 493          * under and upper interfaces since there are different
 494          * link-local addresses for the under and upper.
 495          */
 496         ill = ipmp_ill_hold_ipmp_ill(ill);
 497         if (ill == NULL)
 498                 return (ire);
 499 
 500         ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
 501 
 502         ire_refrele(ire);
 503         ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
 504             match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 505         ill_refrele(ill);
 506         return (ire);
 507 }
 508 
 509 /*
 510  * This is the tail-end of the full receive side packet handling.
 511  * It can be used directly when the configuration is simple.
 512  */
 513 void
 514 ill_input_short_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
 515     ip_recv_attr_t *ira, rtc_t *rtc)
 516 {
 517         ire_t           *ire;
 518         ill_t           *ill = ira->ira_ill;
 519         ip_stack_t      *ipst = ill->ill_ipst;
 520         uint_t          pkt_len;
 521         ssize_t         len;
 522         ip6_t           *ip6h = (ip6_t *)iph_arg;
 523         in6_addr_t      nexthop = *(in6_addr_t *)nexthop_arg;
 524         ilb_stack_t     *ilbs = ipst->ips_netstack->netstack_ilb;
 525         uint_t          irr_flags;
 526 #define rptr    ((uchar_t *)ip6h)
 527 
 528         ASSERT(DB_TYPE(mp) == M_DATA);
 529 
 530         /*
 531          * Check for source/dest being a bad address: loopback, any, or
 532          * v4mapped. All of them start with a 64 bits of zero.
 533          */
 534         if (ip6h->ip6_src.s6_addr32[0] == 0 &&
 535             ip6h->ip6_src.s6_addr32[1] == 0) {
 536                 if (ip6_bad_address(&ip6h->ip6_src, B_TRUE)) {
 537                         ip1dbg(("ip_input_v6: pkt with bad src addr\n"));
 538                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 539                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 540                         freemsg(mp);
 541                         return;
 542                 }
 543         }
 544         if (ip6h->ip6_dst.s6_addr32[0] == 0 &&
 545             ip6h->ip6_dst.s6_addr32[1] == 0) {
 546                 if (ip6_bad_address(&ip6h->ip6_dst, B_FALSE)) {
 547                         ip1dbg(("ip_input_v6: pkt with bad dst addr\n"));
 548                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 549                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 550                         freemsg(mp);
 551                         return;
 552                 }
 553         }
 554 
 555         len = mp->b_wptr - rptr;
 556         pkt_len = ira->ira_pktlen;
 557 
 558         /* multiple mblk or too short */
 559         len -= pkt_len;
 560         if (len != 0) {
 561                 mp = ip_check_length(mp, rptr, len, pkt_len, IPV6_HDR_LEN, ira);
 562                 if (mp == NULL)
 563                         return;
 564                 ip6h = (ip6_t *)mp->b_rptr;
 565         }
 566 
 567         DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
 568             ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h,
 569             int, 0);
 570         /*
 571          * The event for packets being received from a 'physical'
 572          * interface is placed after validation of the source and/or
 573          * destination address as being local so that packets can be
 574          * redirected to loopback addresses using ipnat.
 575          */
 576         DTRACE_PROBE4(ip6__physical__in__start,
 577             ill_t *, ill, ill_t *, NULL,
 578             ip6_t *, ip6h, mblk_t *, mp);
 579 
 580         if (HOOKS6_INTERESTED_PHYSICAL_IN(ipst)) {
 581                 int     ll_multicast = 0;
 582                 int     error;
 583                 in6_addr_t orig_dst = ip6h->ip6_dst;
 584 
 585                 if (ira->ira_flags & IRAF_L2DST_MULTICAST)
 586                         ll_multicast = HPE_MULTICAST;
 587                 else if (ira->ira_flags & IRAF_L2DST_BROADCAST)
 588                         ll_multicast = HPE_BROADCAST;
 589 
 590                 FW_HOOKS6(ipst->ips_ip6_physical_in_event,
 591                     ipst->ips_ipv6firewall_physical_in,
 592                     ill, NULL, ip6h, mp, mp, ll_multicast, ipst, error);
 593 
 594                 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, mp);
 595 
 596                 if (mp == NULL)
 597                         return;
 598 
 599                 /* The length could have changed */
 600                 ip6h = (ip6_t *)mp->b_rptr;
 601                 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 602                 pkt_len = ira->ira_pktlen;
 603 
 604                 /*
 605                  * In case the destination changed we override any previous
 606                  * change to nexthop.
 607                  */
 608                 if (!IN6_ARE_ADDR_EQUAL(&orig_dst, &ip6h->ip6_dst))
 609                         nexthop = ip6h->ip6_dst;
 610 
 611                 if (IN6_IS_ADDR_UNSPECIFIED(&nexthop)) {
 612                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 613                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 614                         freemsg(mp);
 615                         return;
 616                 }
 617 
 618         }
 619 
 620         if (ipst->ips_ip6_observe.he_interested) {
 621                 zoneid_t dzone;
 622 
 623                 /*
 624                  * On the inbound path the src zone will be unknown as
 625                  * this packet has come from the wire.
 626                  */
 627                 dzone = ip_get_zoneid_v6(&nexthop, mp, ill, ira, ALL_ZONES);
 628                 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst);
 629         }
 630 
 631         if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) !=
 632             IPV6_DEFAULT_VERS_AND_FLOW) {
 633                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
 634                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion);
 635                 ip_drop_input("ipIfStatsInWrongIPVersion", mp, ill);
 636                 freemsg(mp);
 637                 return;
 638         }
 639 
 640         /*
 641          * For IPv6 we update ira_ip_hdr_length and ira_protocol as
 642          * we parse the headers, starting with the hop-by-hop options header.
 643          */
 644         ira->ira_ip_hdr_length = IPV6_HDR_LEN;
 645         if ((ira->ira_protocol = ip6h->ip6_nxt) == IPPROTO_HOPOPTS) {
 646                 ip6_hbh_t       *hbhhdr;
 647                 uint_t          ehdrlen;
 648                 uint8_t         *optptr;
 649 
 650                 if (pkt_len < IPV6_HDR_LEN + MIN_EHDR_LEN) {
 651                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
 652                         ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
 653                         freemsg(mp);
 654                         return;
 655                 }
 656                 if (mp->b_cont != NULL &&
 657                     rptr + IPV6_HDR_LEN + MIN_EHDR_LEN > mp->b_wptr) {
 658                         ip6h = ip_pullup(mp, IPV6_HDR_LEN + MIN_EHDR_LEN, ira);
 659                         if (ip6h == NULL) {
 660                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 661                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
 662                                 freemsg(mp);
 663                                 return;
 664                         }
 665                 }
 666                 hbhhdr = (ip6_hbh_t *)&ip6h[1];
 667                 ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
 668 
 669                 if (pkt_len < IPV6_HDR_LEN + ehdrlen) {
 670                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
 671                         ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
 672                         freemsg(mp);
 673                         return;
 674                 }
 675                 if (mp->b_cont != NULL &&
 676                     rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
 677                         ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
 678                         if (ip6h == NULL) {
 679                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 680                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
 681                                 freemsg(mp);
 682                                 return;
 683                         }
 684                         hbhhdr = (ip6_hbh_t *)&ip6h[1];
 685                 }
 686 
 687                 /*
 688                  * Update ira_ip_hdr_length to skip the hop-by-hop header
 689                  * once we get to ip_fanout_v6
 690                  */
 691                 ira->ira_ip_hdr_length += ehdrlen;
 692                 ira->ira_protocol = hbhhdr->ip6h_nxt;
 693 
 694                 optptr = (uint8_t *)&hbhhdr[1];
 695                 switch (ip_process_options_v6(mp, ip6h, optptr,
 696                     ehdrlen - 2, IPPROTO_HOPOPTS, ira)) {
 697                 case -1:
 698                         /*
 699                          * Packet has been consumed and any
 700                          * needed ICMP messages sent.
 701                          */
 702                         return;
 703                 case 0:
 704                         /* no action needed */
 705                         break;
 706                 case 1:
 707                         /*
 708                          * Known router alert. Make use handle it as local
 709                          * by setting the nexthop to be the all-host multicast
 710                          * address, and skip multicast membership filter by
 711                          * marking as a router alert.
 712                          */
 713                         ira->ira_flags |= IRAF_ROUTER_ALERT;
 714                         nexthop = ipv6_all_hosts_mcast;
 715                         break;
 716                 }
 717         }
 718 
 719         /*
 720          * Here we check to see if we machine is setup as
 721          * L3 loadbalancer and if the incoming packet is for a VIP
 722          *
 723          * Check the following:
 724          * - there is at least a rule
 725          * - protocol of the packet is supported
 726          *
 727          * We don't load balance IPv6 link-locals.
 728          */
 729         if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ira->ira_protocol) &&
 730             !IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 731                 in6_addr_t      lb_dst;
 732                 int             lb_ret;
 733 
 734                 /* For convenience, we just pull up the mblk. */
 735                 if (mp->b_cont != NULL) {
 736                         if (pullupmsg(mp, -1) == 0) {
 737                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 738                                 ip_drop_input("ipIfStatsInDiscards - pullupmsg",
 739                                     mp, ill);
 740                                 freemsg(mp);
 741                                 return;
 742                         }
 743                         ip6h = (ip6_t *)mp->b_rptr;
 744                 }
 745                 lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, ira->ira_protocol,
 746                     (uint8_t *)ip6h + ira->ira_ip_hdr_length, &lb_dst);
 747                 if (lb_ret == ILB_DROPPED) {
 748                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 749                         ip_drop_input("ILB_DROPPED", mp, ill);
 750                         freemsg(mp);
 751                         return;
 752                 }
 753                 if (lb_ret == ILB_BALANCED) {
 754                         /* Set the dst to that of the chosen server */
 755                         nexthop = lb_dst;
 756                         DB_CKSUMFLAGS(mp) = 0;
 757                 }
 758         }
 759 
 760         if (ill->ill_flags & ILLF_ROUTER)
 761                 irr_flags = IRR_ALLOCATE;
 762         else
 763                 irr_flags = IRR_NONE;
 764 
 765         /* Can not use route cache with TX since the labels can differ */
 766         if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
 767                 if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
 768                         ire = ire_multicast(ill);
 769                 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 770                         ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
 771                             ipst);
 772                 } else {
 773                         /* Match destination and label */
 774                         ire = ire_route_recursive_v6(&nexthop, 0, NULL,
 775                             ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR,
 776                             irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL,
 777                             NULL);
 778                 }
 779                 /* Update the route cache so we do the ire_refrele */
 780                 ASSERT(ire != NULL);
 781                 if (rtc->rtc_ire != NULL)
 782                         ire_refrele(rtc->rtc_ire);
 783                 rtc->rtc_ire = ire;
 784                 rtc->rtc_ip6addr = nexthop;
 785         } else if (IN6_ARE_ADDR_EQUAL(&nexthop, &rtc->rtc_ip6addr) &&
 786             rtc->rtc_ire != NULL) {
 787                 /* Use the route cache */
 788                 ire = rtc->rtc_ire;
 789         } else {
 790                 /* Update the route cache */
 791                 if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
 792                         ire = ire_multicast(ill);
 793                 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 794                         ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
 795                             ipst);
 796                 } else {
 797                         ire = ire_route_recursive_dstonly_v6(&nexthop,
 798                             irr_flags, ira->ira_xmit_hint, ipst);
 799                 }
 800                 ASSERT(ire != NULL);
 801                 if (rtc->rtc_ire != NULL)
 802                         ire_refrele(rtc->rtc_ire);
 803                 rtc->rtc_ire = ire;
 804                 rtc->rtc_ip6addr = nexthop;
 805         }
 806 
 807         ire->ire_ib_pkt_count++;
 808 
 809         /*
 810          * Based on ire_type and ire_flags call one of:
 811          *      ire_recv_local_v6 - for IRE_LOCAL
 812          *      ire_recv_loopback_v6 - for IRE_LOOPBACK
 813          *      ire_recv_multirt_v6 - if RTF_MULTIRT
 814          *      ire_recv_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE
 815          *      ire_recv_multicast_v6 - for IRE_MULTICAST
 816          *      ire_recv_noaccept_v6 - for ire_noaccept ones
 817          *      ire_recv_forward_v6 - for the rest.
 818          */
 819 
 820         (*ire->ire_recvfn)(ire, mp, ip6h, ira);
 821 }
 822 #undef rptr
 823 
 824 /*
 825  * ire_recvfn for IREs that need forwarding
 826  */
 827 void
 828 ire_recv_forward_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
 829 {
 830         ip6_t           *ip6h = (ip6_t *)iph_arg;
 831         ill_t           *ill = ira->ira_ill;
 832         ip_stack_t      *ipst = ill->ill_ipst;
 833         iaflags_t       iraflags = ira->ira_flags;
 834         ill_t           *dst_ill;
 835         nce_t           *nce;
 836         uint32_t        added_tx_len;
 837         uint32_t        mtu, iremtu;
 838 
 839         if (iraflags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
 840                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 841                 ip_drop_input("l2 multicast not forwarded", mp, ill);
 842                 freemsg(mp);
 843                 return;
 844         }
 845 
 846         if (!(ill->ill_flags & ILLF_ROUTER)) {
 847                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 848                 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
 849                 freemsg(mp);
 850                 return;
 851         }
 852 
 853         /*
 854          * Either ire_nce_capable or ire_dep_parent would be set for the IRE
 855          * when it is found by ire_route_recursive, but that some other thread
 856          * could have changed the routes with the effect of clearing
 857          * ire_dep_parent. In that case we'd end up dropping the packet, or
 858          * finding a new nce below.
 859          * Get, allocate, or update the nce.
 860          * We get a refhold on ire_nce_cache as a result of this to avoid races
 861          * where ire_nce_cache is deleted.
 862          *
 863          * This ensures that we don't forward if the interface is down since
 864          * ipif_down removes all the nces.
 865          */
 866         mutex_enter(&ire->ire_lock);
 867         nce = ire->ire_nce_cache;
 868         if (nce == NULL) {
 869                 /* Not yet set up - try to set one up */
 870                 mutex_exit(&ire->ire_lock);
 871                 (void) ire_revalidate_nce(ire);
 872                 mutex_enter(&ire->ire_lock);
 873                 nce = ire->ire_nce_cache;
 874                 if (nce == NULL) {
 875                         mutex_exit(&ire->ire_lock);
 876                         /* The ire_dep_parent chain went bad, or no memory */
 877                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 878                         ip_drop_input("No ire_dep_parent", mp, ill);
 879                         freemsg(mp);
 880                         return;
 881                 }
 882         }
 883         nce_refhold(nce);
 884         mutex_exit(&ire->ire_lock);
 885 
 886         if (nce->nce_is_condemned) {
 887                 nce_t *nce1;
 888 
 889                 nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_FALSE);
 890                 nce_refrele(nce);
 891                 if (nce1 == NULL) {
 892                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 893                         ip_drop_input("No nce", mp, ill);
 894                         freemsg(mp);
 895                         return;
 896                 }
 897                 nce = nce1;
 898         }
 899         dst_ill = nce->nce_ill;
 900 
 901         /*
 902          * Unless we are forwarding, drop the packet.
 903          * Unlike IPv4 we don't allow source routed packets out the same
 904          * interface when we are not a router.
 905          * Note that ill_forward_set() will set the ILLF_ROUTER on
 906          * all the group members when it gets an ipmp-ill or under-ill.
 907          */
 908         if (!(dst_ill->ill_flags & ILLF_ROUTER)) {
 909                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 910                 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
 911                 freemsg(mp);
 912                 nce_refrele(nce);
 913                 return;
 914         }
 915 
 916         if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) {
 917                 ire->ire_ib_pkt_count--;
 918                 /*
 919                  * Should only use IREs that are visible from the
 920                  * global zone for forwarding.
 921                  * For IPv6 any source route would have already been
 922                  * advanced in ip_fanout_v6
 923                  */
 924                 ire = ire_route_recursive_v6(&ip6h->ip6_dst, 0, NULL,
 925                     GLOBAL_ZONEID, ira->ira_tsl, MATCH_IRE_SECATTR,
 926                     (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE,
 927                     ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 928                 ire->ire_ib_pkt_count++;
 929                 (*ire->ire_recvfn)(ire, mp, ip6h, ira);
 930                 ire_refrele(ire);
 931                 nce_refrele(nce);
 932                 return;
 933         }
 934         /*
 935          * ipIfStatsHCInForwDatagrams should only be increment if there
 936          * will be an attempt to forward the packet, which is why we
 937          * increment after the above condition has been checked.
 938          */
 939         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
 940 
 941         /* Initiate Read side IPPF processing */
 942         if (IPP_ENABLED(IPP_FWD_IN, ipst)) {
 943                 /* ip_process translates an IS_UNDER_IPMP */
 944                 mp = ip_process(IPP_FWD_IN, mp, ill, ill);
 945                 if (mp == NULL) {
 946                         /* ip_drop_packet and MIB done */
 947                         ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred "
 948                             "during IPPF processing\n"));
 949                         nce_refrele(nce);
 950                         return;
 951                 }
 952         }
 953 
 954         DTRACE_PROBE4(ip6__forwarding__start,
 955             ill_t *, ill, ill_t *, dst_ill, ip6_t *, ip6h, mblk_t *, mp);
 956 
 957         if (HOOKS6_INTERESTED_FORWARDING(ipst)) {
 958                 int     error;
 959 
 960                 FW_HOOKS(ipst->ips_ip6_forwarding_event,
 961                     ipst->ips_ipv6firewall_forwarding,
 962                     ill, dst_ill, ip6h, mp, mp, 0, ipst, error);
 963 
 964                 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp);
 965 
 966                 if (mp == NULL) {
 967                         nce_refrele(nce);
 968                         return;
 969                 }
 970                 /*
 971                  * Even if the destination was changed by the filter we use the
 972                  * forwarding decision that was made based on the address
 973                  * in ip_input.
 974                  */
 975 
 976                 /* Might have changed */
 977                 ip6h = (ip6_t *)mp->b_rptr;
 978                 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 979         }
 980 
 981         /* Packet is being forwarded. Turning off hwcksum flag. */
 982         DB_CKSUMFLAGS(mp) = 0;
 983 
 984         /*
 985          * Per RFC 3513 section 2.5.2, we must not forward packets with
 986          * an unspecified source address.
 987          * The loopback address check for both src and dst has already
 988          * been checked in ip_input_v6
 989          * In the future one can envision adding RPF checks using number 3.
 990          */
 991         switch (ipst->ips_src_check) {
 992         case 0:
 993                 break;
 994         case 1:
 995         case 2:
 996                 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) ||
 997                     IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
 998                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 999                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1000                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1001                         nce_refrele(nce);
1002                         freemsg(mp);
1003                         return;
1004                 }
1005                 break;
1006         }
1007 
1008         /*
1009          * Check to see if we're forwarding the packet to a
1010          * different link from which it came.  If so, check the
1011          * source and destination addresses since routers must not
1012          * forward any packets with link-local source or
1013          * destination addresses to other links.  Otherwise (if
1014          * we're forwarding onto the same link), conditionally send
1015          * a redirect message.
1016          */
1017         if (!IS_ON_SAME_LAN(dst_ill, ill)) {
1018                 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ||
1019                     IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) {
1020                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1021                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1022                         freemsg(mp);
1023                         nce_refrele(nce);
1024                         return;
1025                 }
1026                 /* TBD add site-local check at site boundary? */
1027         } else if (ipst->ips_ipv6_send_redirects) {
1028                 ip_send_potential_redirect_v6(mp, ip6h, ire, ira);
1029         }
1030 
1031         added_tx_len = 0;
1032         if (iraflags & IRAF_SYSTEM_LABELED) {
1033                 mblk_t          *mp1;
1034                 uint32_t        old_pkt_len = ira->ira_pktlen;
1035 
1036                 /*
1037                  * Check if it can be forwarded and add/remove
1038                  * CIPSO options as needed.
1039                  */
1040                 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) {
1041                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1042                         ip_drop_input("tsol_ip_forward", mp, ill);
1043                         freemsg(mp);
1044                         nce_refrele(nce);
1045                         return;
1046                 }
1047                 /*
1048                  * Size may have changed. Remember amount added in case
1049                  * ip_fragment needs to send an ICMP too big.
1050                  */
1051                 mp = mp1;
1052                 ip6h = (ip6_t *)mp->b_rptr;
1053                 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
1054                 ira->ira_ip_hdr_length = IPV6_HDR_LEN;
1055                 if (ira->ira_pktlen > old_pkt_len)
1056                         added_tx_len = ira->ira_pktlen - old_pkt_len;
1057         }
1058 
1059         mtu = dst_ill->ill_mtu;
1060         if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu)
1061                 mtu = iremtu;
1062         ip_forward_xmit_v6(nce, mp, ip6h, ira, mtu, added_tx_len);
1063         nce_refrele(nce);
1064         return;
1065 
1066 }
1067 
1068 /*
1069  * Used for sending out unicast and multicast packets that are
1070  * forwarded.
1071  */
1072 void
1073 ip_forward_xmit_v6(nce_t *nce, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira,
1074     uint32_t mtu, uint32_t added_tx_len)
1075 {
1076         ill_t           *dst_ill = nce->nce_ill;
1077         uint32_t        pkt_len;
1078         iaflags_t       iraflags = ira->ira_flags;
1079         ip_stack_t      *ipst = dst_ill->ill_ipst;
1080 
1081         if (ip6h->ip6_hops-- <= 1) {
1082                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1083                 ip_drop_input("ICMP6_TIME_EXCEED_TRANSIT", mp, ira->ira_ill);
1084                 icmp_time_exceeded_v6(mp, ICMP6_TIME_EXCEED_TRANSIT, B_FALSE,
1085                     ira);
1086                 return;
1087         }
1088 
1089         /* Initiate Write side IPPF processing before any fragmentation */
1090         if (IPP_ENABLED(IPP_FWD_OUT, ipst)) {
1091                 /* ip_process translates an IS_UNDER_IPMP */
1092                 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill);
1093                 if (mp == NULL) {
1094                         /* ip_drop_packet and MIB done */
1095                         ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred" \
1096                             " during IPPF processing\n"));
1097                         return;
1098                 }
1099         }
1100 
1101         pkt_len = ira->ira_pktlen;
1102 
1103         BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams);
1104 
1105         if (pkt_len > mtu) {
1106                 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails);
1107                 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill);
1108                 if (iraflags & IRAF_SYSTEM_LABELED) {
1109                         /*
1110                          * Remove any CIPSO option added by
1111                          * tsol_ip_forward, and make sure we report
1112                          * a path MTU so that there
1113                          * is room to add such a CIPSO option for future
1114                          * packets.
1115                          */
1116                         mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, AF_INET6);
1117                 }
1118                 icmp_pkt2big_v6(mp, mtu, B_TRUE, ira);
1119                 return;
1120         }
1121 
1122         ASSERT(pkt_len ==
1123             ntohs(((ip6_t *)mp->b_rptr)->ip6_plen) + IPV6_HDR_LEN);
1124 
1125         if (iraflags & IRAF_LOOPBACK_COPY) {
1126                 /*
1127                  * IXAF_NO_LOOP_ZONEID is not set hence 6th arg
1128                  * is don't care
1129                  */
1130                 (void) ip_postfrag_loopcheck(mp, nce,
1131                     (IXAF_LOOPBACK_COPY | IXAF_NO_DEV_FLOW_CTL),
1132                     pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1133         } else {
1134                 (void) ip_xmit(mp, nce, IXAF_NO_DEV_FLOW_CTL,
1135                     pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1136         }
1137 }
1138 
1139 /*
1140  * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE,
1141  * which is what ire_route_recursive returns when there is no matching ire.
1142  * Send ICMP unreachable unless blackhole.
1143  */
1144 void
1145 ire_recv_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1146 {
1147         ip6_t           *ip6h = (ip6_t *)iph_arg;
1148         ill_t           *ill = ira->ira_ill;
1149         ip_stack_t      *ipst = ill->ill_ipst;
1150 
1151         /* Would we have forwarded this packet if we had a route? */
1152         if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
1153                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1154                 ip_drop_input("l2 multicast not forwarded", mp, ill);
1155                 freemsg(mp);
1156                 return;
1157         }
1158 
1159         if (!(ill->ill_flags & ILLF_ROUTER)) {
1160                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1161                 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
1162                 freemsg(mp);
1163                 return;
1164         }
1165         /*
1166          * If we had a route this could have been forwarded. Count as such.
1167          *
1168          * ipIfStatsHCInForwDatagrams should only be increment if there
1169          * will be an attempt to forward the packet, which is why we
1170          * increment after the above condition has been checked.
1171          */
1172         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
1173 
1174         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1175 
1176         ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0, RTA_DST,
1177             ipst);
1178 
1179         if (ire->ire_flags & RTF_BLACKHOLE) {
1180                 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill);
1181                 freemsg(mp);
1182         } else {
1183                 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill);
1184 
1185                 icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE,
1186                     ira);
1187         }
1188 }
1189 
1190 /*
1191  * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for
1192  * VRRP when in noaccept mode.
1193  * We silently drop packets except for Neighbor Solicitations and
1194  * Neighbor Advertisements.
1195  */
1196 void
1197 ire_recv_noaccept_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1198     ip_recv_attr_t *ira)
1199 {
1200         ip6_t           *ip6h = (ip6_t *)iph_arg;
1201         ill_t           *ill = ira->ira_ill;
1202         icmp6_t         *icmp6;
1203         int             ip_hdr_length;
1204 
1205         if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
1206                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1207                 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1208                 freemsg(mp);
1209                 return;
1210         }
1211         ip_hdr_length = ira->ira_ip_hdr_length;
1212         if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
1213                 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
1214                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
1215                         ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
1216                         freemsg(mp);
1217                         return;
1218                 }
1219                 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
1220                 if (ip6h == NULL) {
1221                         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1222                         freemsg(mp);
1223                         return;
1224                 }
1225         }
1226         icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
1227 
1228         if (icmp6->icmp6_type != ND_NEIGHBOR_SOLICIT &&
1229             icmp6->icmp6_type != ND_NEIGHBOR_ADVERT) {
1230                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1231                 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1232                 freemsg(mp);
1233                 return;
1234         }
1235         ire_recv_local_v6(ire, mp, ip6h, ira);
1236 }
1237 
1238 /*
1239  * ire_recvfn for IRE_MULTICAST.
1240  */
1241 void
1242 ire_recv_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1243     ip_recv_attr_t *ira)
1244 {
1245         ip6_t           *ip6h = (ip6_t *)iph_arg;
1246         ill_t           *ill = ira->ira_ill;
1247 
1248         ASSERT(ire->ire_ill == ira->ira_ill);
1249 
1250         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts);
1251         UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen);
1252 
1253         /* Tag for higher-level protocols */
1254         ira->ira_flags |= IRAF_MULTICAST;
1255 
1256         /*
1257          * So that we don't end up with dups, only one ill an IPMP group is
1258          * nominated to receive multicast traffic.
1259          * If we have no cast_ill we are liberal and accept everything.
1260          */
1261         if (IS_UNDER_IPMP(ill)) {
1262                 ip_stack_t      *ipst = ill->ill_ipst;
1263 
1264                 /* For an under ill_grp can change under lock */
1265                 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1266                 if (!ill->ill_nom_cast && ill->ill_grp != NULL &&
1267                     ill->ill_grp->ig_cast_ill != NULL) {
1268                         rw_exit(&ipst->ips_ill_g_lock);
1269                         ip_drop_input("not on cast ill", mp, ill);
1270                         freemsg(mp);
1271                         return;
1272                 }
1273                 rw_exit(&ipst->ips_ill_g_lock);
1274                 /*
1275                  * We switch to the upper ill so that mrouter and hasmembers
1276                  * can operate on upper here and in ip_input_multicast.
1277                  */
1278                 ill = ipmp_ill_hold_ipmp_ill(ill);
1279                 if (ill != NULL) {
1280                         ASSERT(ill != ira->ira_ill);
1281                         ASSERT(ire->ire_ill == ira->ira_ill);
1282                         ira->ira_ill = ill;
1283                         ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1284                 } else {
1285                         ill = ira->ira_ill;
1286                 }
1287         }
1288 
1289 #ifdef notdef
1290         /*
1291          * Check if we are a multicast router - send ip_mforward a copy of
1292          * the packet.
1293          * Due to mroute_decap tunnels we consider forwarding packets even if
1294          * mrouted has not joined the allmulti group on this interface.
1295          */
1296         if (ipst->ips_ip_g_mrouter) {
1297                 int retval;
1298 
1299                 /*
1300                  * Clear the indication that this may have hardware
1301                  * checksum as we are not using it for forwarding.
1302                  */
1303                 DB_CKSUMFLAGS(mp) = 0;
1304 
1305                 /*
1306                  * ip_mforward helps us make these distinctions: If received
1307                  * on tunnel and not IGMP, then drop.
1308                  * If IGMP packet, then don't check membership
1309                  * If received on a phyint and IGMP or PIM, then
1310                  * don't check membership
1311                  */
1312                 retval = ip_mforward_v6(mp, ira);
1313                 /* ip_mforward updates mib variables if needed */
1314 
1315                 switch (retval) {
1316                 case 0:
1317                         /*
1318                          * pkt is okay and arrived on phyint.
1319                          */
1320                         break;
1321                 case -1:
1322                         /* pkt is mal-formed, toss it */
1323                         freemsg(mp);
1324                         goto done;
1325                 case 1:
1326                         /*
1327                          * pkt is okay and arrived on a tunnel
1328                          *
1329                          * If we are running a multicast router
1330                          * we need to see all mld packets, which
1331                          * are marked with router alerts.
1332                          */
1333                         if (ira->ira_flags & IRAF_ROUTER_ALERT)
1334                                 goto forus;
1335                         ip_drop_input("Multicast on tunnel ignored", mp, ill);
1336                         freemsg(mp);
1337                         goto done;
1338                 }
1339         }
1340 #endif /* notdef */
1341 
1342         /*
1343          * If this was a router alert we skip the group membership check.
1344          */
1345         if (ira->ira_flags & IRAF_ROUTER_ALERT)
1346                 goto forus;
1347 
1348         /*
1349          * Check if we have members on this ill. This is not necessary for
1350          * correctness because even if the NIC/GLD had a leaky filter, we
1351          * filter before passing to each conn_t.
1352          */
1353         if (!ill_hasmembers_v6(ill, &ip6h->ip6_dst)) {
1354                 /*
1355                  * Nobody interested
1356                  *
1357                  * This might just be caused by the fact that
1358                  * multiple IP Multicast addresses map to the same
1359                  * link layer multicast - no need to increment counter!
1360                  */
1361                 ip_drop_input("Multicast with no members", mp, ill);
1362                 freemsg(mp);
1363                 goto done;
1364         }
1365 forus:
1366         ip2dbg(("ire_recv_multicast_v6: multicast for us\n"));
1367 
1368         /*
1369          * After reassembly and IPsec we will need to duplicate the
1370          * multicast packet for all matching zones on the ill.
1371          */
1372         ira->ira_zoneid = ALL_ZONES;
1373 
1374         /* Reassemble on the ill on which the packet arrived */
1375         ip_input_local_v6(ire, mp, ip6h, ira);
1376 done:
1377         if (ill != ire->ire_ill) {
1378                 ill_refrele(ill);
1379                 ira->ira_ill = ire->ire_ill;
1380                 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex;
1381         }
1382 }
1383 
1384 /*
1385  * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT.
1386  * Drop packets since we don't forward out multirt routes.
1387  */
1388 /* ARGSUSED */
1389 void
1390 ire_recv_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1391 {
1392         ill_t           *ill = ira->ira_ill;
1393 
1394         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1395         ip_drop_input("Not forwarding out MULTIRT", mp, ill);
1396         freemsg(mp);
1397 }
1398 
1399 /*
1400  * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK
1401  * has rewritten the packet to have a loopback destination address (We
1402  * filter out packet with a loopback destination from arriving over the wire).
1403  * We don't know what zone to use, thus we always use the GLOBAL_ZONEID.
1404  */
1405 void
1406 ire_recv_loopback_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1407 {
1408         ip6_t           *ip6h = (ip6_t *)iph_arg;
1409         ill_t           *ill = ira->ira_ill;
1410         ill_t           *ire_ill = ire->ire_ill;
1411 
1412         ira->ira_zoneid = GLOBAL_ZONEID;
1413 
1414         /* Switch to the lo0 ill for further processing  */
1415         if (ire_ill != ill) {
1416                 /*
1417                  * Update ira_ill to be the ILL on which the IP address
1418                  * is hosted.
1419                  * No need to hold the ill since we have a hold on the ire
1420                  */
1421                 ASSERT(ira->ira_ill == ira->ira_rill);
1422                 ira->ira_ill = ire_ill;
1423 
1424                 ip_input_local_v6(ire, mp, ip6h, ira);
1425 
1426                 /* Restore */
1427                 ASSERT(ira->ira_ill == ire_ill);
1428                 ira->ira_ill = ill;
1429                 return;
1430 
1431         }
1432         ip_input_local_v6(ire, mp, ip6h, ira);
1433 }
1434 
1435 /*
1436  * ire_recvfn for IRE_LOCAL.
1437  */
1438 void
1439 ire_recv_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1440 {
1441         ip6_t           *ip6h = (ip6_t *)iph_arg;
1442         ill_t           *ill = ira->ira_ill;
1443         ill_t           *ire_ill = ire->ire_ill;
1444 
1445         /* Make a note for DAD that this address is in use */
1446         ire->ire_last_used_time = LBOLT_FASTPATH;
1447 
1448         /* Only target the IRE_LOCAL with the right zoneid. */
1449         ira->ira_zoneid = ire->ire_zoneid;
1450 
1451         /*
1452          * If the packet arrived on the wrong ill, we check that
1453          * this is ok.
1454          * If it is, then we ensure that we do the reassembly on
1455          * the ill on which the address is hosted. We keep ira_rill as
1456          * the one on which the packet arrived, so that IP_PKTINFO and
1457          * friends can report this.
1458          */
1459         if (ire_ill != ill) {
1460                 ire_t *new_ire;
1461 
1462                 new_ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill);
1463                 if (new_ire == NULL) {
1464                         /* Drop packet */
1465                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1466                         ip_drop_input("ipIfStatsInForwProhibits", mp, ill);
1467                         freemsg(mp);
1468                         return;
1469                 }
1470                 /*
1471                  * Update ira_ill to be the ILL on which the IP address
1472                  * is hosted. No need to hold the ill since we have a
1473                  * hold on the ire. Note that we do the switch even if
1474                  * new_ire == ire (for IPMP, ire would be the one corresponding
1475                  * to the IPMP ill).
1476                  */
1477                 ASSERT(ira->ira_ill == ira->ira_rill);
1478                 ira->ira_ill = new_ire->ire_ill;
1479 
1480                 /* ira_ruifindex tracks the upper for ira_rill */
1481                 if (IS_UNDER_IPMP(ill))
1482                         ira->ira_ruifindex = ill_get_upper_ifindex(ill);
1483 
1484                 ip_input_local_v6(new_ire, mp, ip6h, ira);
1485 
1486                 /* Restore */
1487                 ASSERT(ira->ira_ill == new_ire->ire_ill);
1488                 ira->ira_ill = ill;
1489                 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1490 
1491                 if (new_ire != ire)
1492                         ire_refrele(new_ire);
1493                 return;
1494         }
1495 
1496         ip_input_local_v6(ire, mp, ip6h, ira);
1497 }
1498 
1499 /*
1500  * Common function for packets arriving for the host. Handles
1501  * checksum verification, reassembly checks, etc.
1502  */
1503 static void
1504 ip_input_local_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1505 {
1506         iaflags_t       iraflags = ira->ira_flags;
1507 
1508         /*
1509          * For multicast we need some extra work before
1510          * we call ip_fanout_v6(), since in the case of shared-IP zones
1511          * we need to pretend that a packet arrived for each zoneid.
1512          */
1513         if (iraflags & IRAF_MULTICAST) {
1514                 ip_input_multicast_v6(ire, mp, ip6h, ira);
1515                 return;
1516         }
1517         ip_fanout_v6(mp, ip6h, ira);
1518 }
1519 
1520 /*
1521  * Handle multiple zones which want to receive the same multicast packets
1522  * on this ill by delivering a packet to each of them.
1523  *
1524  * Note that for packets delivered to transports we could instead do this
1525  * as part of the fanout code, but since we need to handle icmp_inbound
1526  * it is simpler to have multicast work the same as IPv4 broadcast.
1527  *
1528  * The ip_fanout matching for multicast matches based on ilm independent of
1529  * zoneid since the zoneid restriction is applied when joining a multicast
1530  * group.
1531  */
1532 /* ARGSUSED */
1533 static void
1534 ip_input_multicast_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1535 {
1536         ill_t           *ill = ira->ira_ill;
1537         iaflags_t       iraflags = ira->ira_flags;
1538         ip_stack_t      *ipst = ill->ill_ipst;
1539         netstack_t      *ns = ipst->ips_netstack;
1540         zoneid_t        zoneid;
1541         mblk_t          *mp1;
1542         ip6_t           *ip6h1;
1543         uint_t          ira_pktlen = ira->ira_pktlen;
1544         uint16_t        ira_ip_hdr_length = ira->ira_ip_hdr_length;
1545 
1546         /* ire_recv_multicast has switched to the upper ill for IPMP */
1547         ASSERT(!IS_UNDER_IPMP(ill));
1548 
1549         /*
1550          * If we don't have more than one shared-IP zone, or if
1551          * there are no members in anything but the global zone,
1552          * then just set the zoneid and proceed.
1553          */
1554         if (ns->netstack_numzones == 1 ||
1555             !ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst,
1556             GLOBAL_ZONEID)) {
1557                 ira->ira_zoneid = GLOBAL_ZONEID;
1558 
1559                 /* If sender didn't want this zone to receive it, drop */
1560                 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1561                     ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1562                         ip_drop_input("Multicast but wrong zoneid", mp, ill);
1563                         freemsg(mp);
1564                         return;
1565                 }
1566                 ip_fanout_v6(mp, ip6h, ira);
1567                 return;
1568         }
1569 
1570         /*
1571          * Here we loop over all zoneids that have members in the group
1572          * and deliver a packet to ip_fanout for each zoneid.
1573          *
1574          * First find any members in the lowest numeric zoneid by looking for
1575          * first zoneid larger than -1 (ALL_ZONES).
1576          * We terminate the loop when we receive -1 (ALL_ZONES).
1577          */
1578         zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, ALL_ZONES);
1579         for (; zoneid != ALL_ZONES;
1580             zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, zoneid)) {
1581                 /*
1582                  * Avoid an extra copymsg/freemsg by skipping global zone here
1583                  * and doing that at the end.
1584                  */
1585                 if (zoneid == GLOBAL_ZONEID)
1586                         continue;
1587 
1588                 ira->ira_zoneid = zoneid;
1589 
1590                 /* If sender didn't want this zone to receive it, skip */
1591                 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1592                     ira->ira_no_loop_zoneid == ira->ira_zoneid)
1593                         continue;
1594 
1595                 mp1 = copymsg(mp);
1596                 if (mp1 == NULL) {
1597                         /* Failed to deliver to one zone */
1598                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1599                         ip_drop_input("ipIfStatsInDiscards", mp, ill);
1600                         continue;
1601                 }
1602                 ip6h1 = (ip6_t *)mp1->b_rptr;
1603                 ip_fanout_v6(mp1, ip6h1, ira);
1604                 /*
1605                  * IPsec might have modified ira_pktlen and ira_ip_hdr_length
1606                  * so we restore them for a potential next iteration
1607                  */
1608                 ira->ira_pktlen = ira_pktlen;
1609                 ira->ira_ip_hdr_length = ira_ip_hdr_length;
1610         }
1611 
1612         /* Do the main ire */
1613         ira->ira_zoneid = GLOBAL_ZONEID;
1614         /* If sender didn't want this zone to receive it, drop */
1615         if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1616             ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1617                 ip_drop_input("Multicast but wrong zoneid", mp, ill);
1618                 freemsg(mp);
1619         } else {
1620                 ip_fanout_v6(mp, ip6h, ira);
1621         }
1622 }
1623 
1624 
1625 /*
1626  * Determine the zoneid and IRAF_TX_MAC_EXEMPTABLE if trusted extensions
1627  * is in use. Updates ira_zoneid and ira_flags as a result.
1628  */
1629 static void
1630 ip_fanout_tx_v6(mblk_t *mp, ip6_t *ip6h, uint8_t protocol, uint_t ip_hdr_length,
1631     ip_recv_attr_t *ira)
1632 {
1633         uint16_t        *up;
1634         uint16_t        lport;
1635         zoneid_t        zoneid;
1636 
1637         ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED);
1638 
1639         /*
1640          * If the packet is unlabeled we might allow read-down
1641          * for MAC_EXEMPT. Below we clear this if it is a multi-level
1642          * port (MLP).
1643          * Note that ira_tsl can be NULL here.
1644          */
1645         if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED)
1646                 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE;
1647 
1648         if (ira->ira_zoneid != ALL_ZONES)
1649                 return;
1650 
1651         ira->ira_flags |= IRAF_TX_SHARED_ADDR;
1652 
1653         up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
1654         switch (protocol) {
1655         case IPPROTO_TCP:
1656         case IPPROTO_SCTP:
1657         case IPPROTO_UDP:
1658                 /* Caller ensures this */
1659                 ASSERT(((uchar_t *)ip6h) + ip_hdr_length +4 <= mp->b_wptr);
1660 
1661                 /*
1662                  * Only these transports support MLP.
1663                  * We know their destination port numbers is in
1664                  * the same place in the header.
1665                  */
1666                 lport = up[1];
1667 
1668                 /*
1669                  * No need to handle exclusive-stack zones
1670                  * since ALL_ZONES only applies to the shared IP instance.
1671                  */
1672                 zoneid = tsol_mlp_findzone(protocol, lport);
1673                 /*
1674                  * If no shared MLP is found, tsol_mlp_findzone returns
1675                  * ALL_ZONES.  In that case, we assume it's SLP, and
1676                  * search for the zone based on the packet label.
1677                  *
1678                  * If there is such a zone, we prefer to find a
1679                  * connection in it.  Otherwise, we look for a
1680                  * MAC-exempt connection in any zone whose label
1681                  * dominates the default label on the packet.
1682                  */
1683                 if (zoneid == ALL_ZONES)
1684                         zoneid = tsol_attr_to_zoneid(ira);
1685                 else
1686                         ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE;
1687                 break;
1688         default:
1689                 /* Handle shared address for other protocols */
1690                 zoneid = tsol_attr_to_zoneid(ira);
1691                 break;
1692         }
1693         ira->ira_zoneid = zoneid;
1694 }
1695 
1696 /*
1697  * Increment checksum failure statistics
1698  */
1699 static void
1700 ip_input_cksum_err_v6(uint8_t protocol, uint16_t hck_flags, ill_t *ill)
1701 {
1702         ip_stack_t      *ipst = ill->ill_ipst;
1703 
1704         switch (protocol) {
1705         case IPPROTO_TCP:
1706                 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs);
1707 
1708                 if (hck_flags & HCK_FULLCKSUM)
1709                         IP6_STAT(ipst, ip6_tcp_in_full_hw_cksum_err);
1710                 else if (hck_flags & HCK_PARTIALCKSUM)
1711                         IP6_STAT(ipst, ip6_tcp_in_part_hw_cksum_err);
1712                 else
1713                         IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err);
1714                 break;
1715         case IPPROTO_UDP:
1716                 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1717                 if (hck_flags & HCK_FULLCKSUM)
1718                         IP6_STAT(ipst, ip6_udp_in_full_hw_cksum_err);
1719                 else if (hck_flags & HCK_PARTIALCKSUM)
1720                         IP6_STAT(ipst, ip6_udp_in_part_hw_cksum_err);
1721                 else
1722                         IP6_STAT(ipst, ip6_udp_in_sw_cksum_err);
1723                 break;
1724         case IPPROTO_ICMPV6:
1725                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
1726                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1727                 break;
1728         default:
1729                 ASSERT(0);
1730                 break;
1731         }
1732 }
1733 
1734 /* Calculate the IPv6 pseudo-header checksum for TCP, UDP, and ICMPV6 */
1735 uint32_t
1736 ip_input_cksum_pseudo_v6(ip6_t *ip6h, ip_recv_attr_t *ira)
1737 {
1738         uint_t          ulp_len;
1739         uint32_t        cksum;
1740         uint8_t         protocol = ira->ira_protocol;
1741         uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1742 
1743 #define iphs    ((uint16_t *)ip6h)
1744 
1745         switch (protocol) {
1746         case IPPROTO_TCP:
1747                 ulp_len = ira->ira_pktlen - ip_hdr_length;
1748 
1749                 /* Protocol and length */
1750                 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP;
1751                 /* IP addresses */
1752                 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1753                     iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1754                     iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1755                     iphs[16] + iphs[17] + iphs[18] + iphs[19];
1756                 break;
1757 
1758         case IPPROTO_UDP: {
1759                 udpha_t         *udpha;
1760 
1761                 udpha = (udpha_t  *)((uchar_t *)ip6h + ip_hdr_length);
1762 
1763                 /* Protocol and length */
1764                 cksum = udpha->uha_length + IP_UDP_CSUM_COMP;
1765                 /* IP addresses */
1766                 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1767                     iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1768                     iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1769                     iphs[16] + iphs[17] + iphs[18] + iphs[19];
1770                 break;
1771         }
1772         case IPPROTO_ICMPV6:
1773                 ulp_len = ira->ira_pktlen - ip_hdr_length;
1774 
1775                 /* Protocol and length */
1776                 cksum = htons(ulp_len) + IP_ICMPV6_CSUM_COMP;
1777                 /* IP addresses */
1778                 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1779                     iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1780                     iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1781                     iphs[16] + iphs[17] + iphs[18] + iphs[19];
1782                 break;
1783         default:
1784                 cksum = 0;
1785                 break;
1786         }
1787 #undef  iphs
1788         return (cksum);
1789 }
1790 
1791 
1792 /*
1793  * Software verification of the ULP checksums.
1794  * Returns B_TRUE if ok.
1795  * Increments statistics of failed.
1796  */
1797 static boolean_t
1798 ip_input_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1799 {
1800         ip_stack_t      *ipst = ira->ira_ill->ill_ipst;
1801         uint32_t        cksum;
1802         uint8_t         protocol = ira->ira_protocol;
1803         uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1804 
1805         IP6_STAT(ipst, ip6_in_sw_cksum);
1806 
1807         ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP ||
1808             protocol == IPPROTO_ICMPV6);
1809 
1810         cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1811         cksum = IP_CSUM(mp, ip_hdr_length, cksum);
1812         if (cksum == 0)
1813                 return (B_TRUE);
1814 
1815         ip_input_cksum_err_v6(protocol, 0, ira->ira_ill);
1816         return (B_FALSE);
1817 }
1818 
1819 /*
1820  * Verify the ULP checksums.
1821  * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum
1822  * algorithm.
1823  * Increments statistics if failed.
1824  */
1825 static boolean_t
1826 ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h,
1827     ip_recv_attr_t *ira)
1828 {
1829         ill_t           *ill = ira->ira_rill;
1830         uint16_t        hck_flags;
1831         uint32_t        cksum;
1832         mblk_t          *mp1;
1833         uint_t          len;
1834         uint8_t         protocol = ira->ira_protocol;
1835         uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1836 
1837 
1838         switch (protocol) {
1839         case IPPROTO_TCP:
1840         case IPPROTO_ICMPV6:
1841                 break;
1842 
1843         case IPPROTO_UDP: {
1844                 udpha_t         *udpha;
1845 
1846                 udpha = (udpha_t  *)((uchar_t *)ip6h + ip_hdr_length);
1847                 /*
1848                  *  Before going through the regular checksum
1849                  *  calculation, make sure the received checksum
1850                  *  is non-zero. RFC 2460 says, a 0x0000 checksum
1851                  *  in a UDP packet (within IPv6 packet) is invalid
1852                  *  and should be replaced by 0xffff. This makes
1853                  *  sense as regular checksum calculation will
1854                  *  pass for both the cases i.e. 0x0000 and 0xffff.
1855                  *  Removing one of the case makes error detection
1856                  *  stronger.
1857                  */
1858                 if (udpha->uha_checksum == 0) {
1859                         /* 0x0000 checksum is invalid */
1860                         BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1861                         return (B_FALSE);
1862                 }
1863                 break;
1864         }
1865         case IPPROTO_SCTP: {
1866                 sctp_hdr_t      *sctph;
1867                 uint32_t        pktsum;
1868 
1869                 sctph = (sctp_hdr_t *)((uchar_t *)ip6h + ip_hdr_length);
1870 #ifdef  DEBUG
1871                 if (skip_sctp_cksum)
1872                         return (B_TRUE);
1873 #endif
1874                 pktsum = sctph->sh_chksum;
1875                 sctph->sh_chksum = 0;
1876                 cksum = sctp_cksum(mp, ip_hdr_length);
1877                 sctph->sh_chksum = pktsum;
1878                 if (cksum == pktsum)
1879                         return (B_TRUE);
1880 
1881                 /*
1882                  * Defer until later whether a bad checksum is ok
1883                  * in order to allow RAW sockets to use Adler checksum
1884                  * with SCTP.
1885                  */
1886                 ira->ira_flags |= IRAF_SCTP_CSUM_ERR;
1887                 return (B_TRUE);
1888         }
1889 
1890         default:
1891                 /* No ULP checksum to verify. */
1892                 return (B_TRUE);
1893         }
1894 
1895         /*
1896          * Revert to software checksum calculation if the interface
1897          * isn't capable of checksum offload.
1898          * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout.
1899          * Note: IRAF_NO_HW_CKSUM is not currently used.
1900          */
1901         ASSERT(!IS_IPMP(ill));
1902         if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) ||
1903             !dohwcksum) {
1904                 return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1905         }
1906 
1907         /*
1908          * We apply this for all ULP protocols. Does the HW know to
1909          * not set the flags for SCTP and other protocols.
1910          */
1911 
1912         hck_flags = DB_CKSUMFLAGS(mp);
1913 
1914         if (hck_flags & HCK_FULLCKSUM_OK) {
1915                 /*
1916                  * Hardware has already verified the checksum.
1917                  */
1918                 return (B_TRUE);
1919         }
1920 
1921         if (hck_flags & HCK_FULLCKSUM) {
1922                 /*
1923                  * Full checksum has been computed by the hardware
1924                  * and has been attached.  If the driver wants us to
1925                  * verify the correctness of the attached value, in
1926                  * order to protect against faulty hardware, compare
1927                  * it against -0 (0xFFFF) to see if it's valid.
1928                  */
1929                 cksum = DB_CKSUM16(mp);
1930                 if (cksum == 0xFFFF)
1931                         return (B_TRUE);
1932                 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1933                 return (B_FALSE);
1934         }
1935 
1936         mp1 = mp->b_cont;
1937         if ((hck_flags & HCK_PARTIALCKSUM) &&
1938             (mp1 == NULL || mp1->b_cont == NULL) &&
1939             ip_hdr_length >= DB_CKSUMSTART(mp) &&
1940             ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) {
1941                 uint32_t        adj;
1942                 uchar_t         *cksum_start;
1943 
1944                 cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1945 
1946                 cksum_start = ((uchar_t *)ip6h + DB_CKSUMSTART(mp));
1947 
1948                 /*
1949                  * Partial checksum has been calculated by hardware
1950                  * and attached to the packet; in addition, any
1951                  * prepended extraneous data is even byte aligned,
1952                  * and there are at most two mblks associated with
1953                  * the packet.  If any such data exists, we adjust
1954                  * the checksum; also take care any postpended data.
1955                  */
1956                 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj);
1957                 /*
1958                  * One's complement subtract extraneous checksum
1959                  */
1960                 cksum += DB_CKSUM16(mp);
1961                 if (adj >= cksum)
1962                         cksum = ~(adj - cksum) & 0xFFFF;
1963                 else
1964                         cksum -= adj;
1965                 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1966                 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1967                 if (!(~cksum & 0xFFFF))
1968                         return (B_TRUE);
1969 
1970                 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1971                 return (B_FALSE);
1972         }
1973         return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1974 }
1975 
1976 
1977 /*
1978  * Handle fanout of received packets.
1979  * Unicast packets that are looped back (from ire_send_local_v6) and packets
1980  * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM.
1981  *
1982  * IPQoS Notes
1983  * Before sending it to the client, invoke IPPF processing. Policy processing
1984  * takes place only if the callout_position, IPP_LOCAL_IN, is enabled.
1985  */
1986 void
1987 ip_fanout_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1988 {
1989         ill_t           *ill = ira->ira_ill;
1990         iaflags_t       iraflags = ira->ira_flags;
1991         ip_stack_t      *ipst = ill->ill_ipst;
1992         uint8_t         protocol;
1993         conn_t          *connp;
1994 #define rptr    ((uchar_t *)ip6h)
1995         uint_t          ip_hdr_length;
1996         uint_t          min_ulp_header_length;
1997         int             offset;
1998         ssize_t         len;
1999         netstack_t      *ns = ipst->ips_netstack;
2000         ipsec_stack_t   *ipss = ns->netstack_ipsec;
2001         ill_t           *rill = ira->ira_rill;
2002 
2003         ASSERT(ira->ira_pktlen == ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN);
2004 
2005         /*
2006          * We repeat this as we parse over destination options header and
2007          * fragment headers (earlier we've handled any hop-by-hop options
2008          * header.)
2009          * We update ira_protocol and ira_ip_hdr_length as we skip past
2010          * the intermediate headers; they already point past any
2011          * hop-by-hop header.
2012          */
2013 repeat:
2014         protocol = ira->ira_protocol;
2015         ip_hdr_length = ira->ira_ip_hdr_length;
2016 
2017         /*
2018          * Time for IPP once we've done reassembly and IPsec.
2019          * We skip this for loopback packets since we don't do IPQoS
2020          * on loopback.
2021          */
2022         if (IPP_ENABLED(IPP_LOCAL_IN, ipst) &&
2023             !(iraflags & IRAF_LOOPBACK) &&
2024             (protocol != IPPROTO_ESP && protocol != IPPROTO_AH &&
2025             protocol != IPPROTO_DSTOPTS && protocol != IPPROTO_ROUTING &&
2026             protocol != IPPROTO_FRAGMENT)) {
2027                 /*
2028                  * Use the interface on which the packet arrived - not where
2029                  * the IP address is hosted.
2030                  */
2031                 /* ip_process translates an IS_UNDER_IPMP */
2032                 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill);
2033                 if (mp == NULL) {
2034                         /* ip_drop_packet and MIB done */
2035                         return;
2036                 }
2037         }
2038 
2039         /* Determine the minimum required size of the upper-layer header */
2040         /* Need to do this for at least the set of ULPs that TX handles. */
2041         switch (protocol) {
2042         case IPPROTO_TCP:
2043                 min_ulp_header_length = TCP_MIN_HEADER_LENGTH;
2044                 break;
2045         case IPPROTO_SCTP:
2046                 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH;
2047                 break;
2048         case IPPROTO_UDP:
2049                 min_ulp_header_length = UDPH_SIZE;
2050                 break;
2051         case IPPROTO_ICMP:
2052         case IPPROTO_ICMPV6:
2053                 min_ulp_header_length = ICMPH_SIZE;
2054                 break;
2055         case IPPROTO_FRAGMENT:
2056         case IPPROTO_DSTOPTS:
2057         case IPPROTO_ROUTING:
2058                 min_ulp_header_length = MIN_EHDR_LEN;
2059                 break;
2060         default:
2061                 min_ulp_header_length = 0;
2062                 break;
2063         }
2064         /* Make sure we have the min ULP header length */
2065         len = mp->b_wptr - rptr;
2066         if (len < ip_hdr_length + min_ulp_header_length) {
2067                 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length)
2068                         goto pkt_too_short;
2069 
2070                 IP6_STAT(ipst, ip6_recv_pullup);
2071                 ip6h = ip_pullup(mp, ip_hdr_length + min_ulp_header_length,
2072                     ira);
2073                 if (ip6h == NULL)
2074                         goto discard;
2075                 len = mp->b_wptr - rptr;
2076         }
2077 
2078         /*
2079          * If trusted extensions then determine the zoneid and TX specific
2080          * ira_flags.
2081          */
2082         if (iraflags & IRAF_SYSTEM_LABELED) {
2083                 /* This can update ira->ira_flags and ira->ira_zoneid */
2084                 ip_fanout_tx_v6(mp, ip6h, protocol, ip_hdr_length, ira);
2085                 iraflags = ira->ira_flags;
2086         }
2087 
2088 
2089         /* Verify ULP checksum. Handles TCP, UDP, and SCTP */
2090         if (iraflags & IRAF_VERIFY_ULP_CKSUM) {
2091                 if (!ip_input_cksum_v6(iraflags, mp, ip6h, ira)) {
2092                         /* Bad checksum. Stats are already incremented */
2093                         ip_drop_input("Bad ULP checksum", mp, ill);
2094                         freemsg(mp);
2095                         return;
2096                 }
2097                 /* IRAF_SCTP_CSUM_ERR could have been set */
2098                 iraflags = ira->ira_flags;
2099         }
2100         switch (protocol) {
2101         case IPPROTO_TCP:
2102                 /* For TCP, discard multicast packets. */
2103                 if (iraflags & IRAF_MULTIBROADCAST)
2104                         goto discard;
2105 
2106                 /* First mblk contains IP+TCP headers per above check */
2107                 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH);
2108 
2109                 /* TCP options present? */
2110                 offset = ((uchar_t *)ip6h)[ip_hdr_length + 12] >> 4;
2111                 if (offset != 5) {
2112                         if (offset < 5)
2113                                 goto discard;
2114 
2115                         /*
2116                          * There must be TCP options.
2117                          * Make sure we can grab them.
2118                          */
2119                         offset <<= 2;
2120                         offset += ip_hdr_length;
2121                         if (len < offset) {
2122                                 if (ira->ira_pktlen < offset)
2123                                         goto pkt_too_short;
2124 
2125                                 IP6_STAT(ipst, ip6_recv_pullup);
2126                                 ip6h = ip_pullup(mp, offset, ira);
2127                                 if (ip6h == NULL)
2128                                         goto discard;
2129                                 len = mp->b_wptr - rptr;
2130                         }
2131                 }
2132 
2133                 /*
2134                  * Pass up a squeue hint to tcp.
2135                  * If ira_sqp is already set (this is loopback) we leave it
2136                  * alone.
2137                  */
2138                 if (ira->ira_sqp == NULL) {
2139                         ira->ira_sqp = ip_squeue_get(ira->ira_ring);
2140                 }
2141 
2142                 /* Look for AF_INET or AF_INET6 that matches */
2143                 connp = ipcl_classify_v6(mp, IPPROTO_TCP, ip_hdr_length,
2144                     ira, ipst);
2145                 if (connp == NULL) {
2146                         /* Send the TH_RST */
2147                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2148                         tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2149                         return;
2150                 }
2151                 if (connp->conn_incoming_ifindex != 0 &&
2152                     connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2153                         CONN_DEC_REF(connp);
2154 
2155                         /* Send the TH_RST */
2156                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2157                         tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2158                         return;
2159                 }
2160                 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2161                     (iraflags & IRAF_IPSEC_SECURE)) {
2162                         mp = ipsec_check_inbound_policy(mp, connp,
2163                             NULL, ip6h, ira);
2164                         if (mp == NULL) {
2165                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2166                                 /* Note that mp is NULL */
2167                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2168                                 CONN_DEC_REF(connp);
2169                                 return;
2170                         }
2171                 }
2172                 /* Found a client; up it goes */
2173                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2174                 ira->ira_ill = ira->ira_rill = NULL;
2175                 if (!IPCL_IS_TCP(connp)) {
2176                         /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
2177                         (connp->conn_recv)(connp, mp, NULL, ira);
2178                         CONN_DEC_REF(connp);
2179                         ira->ira_ill = ill;
2180                         ira->ira_rill = rill;
2181                         return;
2182                 }
2183 
2184                 /*
2185                  * We do different processing whether called from
2186                  * ip_accept_tcp and we match the target, don't match
2187                  * the target, and when we are called by ip_input.
2188                  */
2189                 if (iraflags & IRAF_TARGET_SQP) {
2190                         if (ira->ira_target_sqp == connp->conn_sqp) {
2191                                 mblk_t  *attrmp;
2192 
2193                                 attrmp = ip_recv_attr_to_mblk(ira);
2194                                 if (attrmp == NULL) {
2195                                         BUMP_MIB(ill->ill_ip_mib,
2196                                             ipIfStatsInDiscards);
2197                                         ip_drop_input("ipIfStatsInDiscards",
2198                                             mp, ill);
2199                                         freemsg(mp);
2200                                         CONN_DEC_REF(connp);
2201                                 } else {
2202                                         SET_SQUEUE(attrmp, connp->conn_recv,
2203                                             connp);
2204                                         attrmp->b_cont = mp;
2205                                         ASSERT(ira->ira_target_sqp_mp == NULL);
2206                                         ira->ira_target_sqp_mp = attrmp;
2207                                         /*
2208                                          * Conn ref release when drained from
2209                                          * the squeue.
2210                                          */
2211                                 }
2212                         } else {
2213                                 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
2214                                     connp->conn_recv, connp, ira, SQ_FILL,
2215                                     SQTAG_IP6_TCP_INPUT);
2216                         }
2217                 } else {
2218                         SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv,
2219                             connp, ira, ip_squeue_flag, SQTAG_IP6_TCP_INPUT);
2220                 }
2221                 ira->ira_ill = ill;
2222                 ira->ira_rill = rill;
2223                 return;
2224 
2225         case IPPROTO_SCTP: {
2226                 sctp_hdr_t      *sctph;
2227                 uint32_t        ports;  /* Source and destination ports */
2228                 sctp_stack_t    *sctps = ipst->ips_netstack->netstack_sctp;
2229 
2230                 /* For SCTP, discard multicast packets. */
2231                 if (iraflags & IRAF_MULTIBROADCAST)
2232                         goto discard;
2233 
2234                 /*
2235                  * Since there is no SCTP h/w cksum support yet, just
2236                  * clear the flag.
2237                  */
2238                 DB_CKSUMFLAGS(mp) = 0;
2239 
2240                 /* Length ensured above */
2241                 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH);
2242                 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length);
2243 
2244                 /* get the ports */
2245                 ports = *(uint32_t *)&sctph->sh_sport;
2246 
2247                 if (iraflags & IRAF_SCTP_CSUM_ERR) {
2248                         /*
2249                          * No potential sctp checksum errors go to the Sun
2250                          * sctp stack however they might be Adler-32 summed
2251                          * packets a userland stack bound to a raw IP socket
2252                          * could reasonably use. Note though that Adler-32 is
2253                          * a long deprecated algorithm and customer sctp
2254                          * networks should eventually migrate to CRC-32 at
2255                          * which time this facility should be removed.
2256                          */
2257                         ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2258                         return;
2259                 }
2260                 connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, ports,
2261                     ira, mp, sctps, sctph);
2262                 if (connp == NULL) {
2263                         /* Check for raw socket or OOTB handling */
2264                         ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2265                         return;
2266                 }
2267                 if (connp->conn_incoming_ifindex != 0 &&
2268                     connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2269                         CONN_DEC_REF(connp);
2270 
2271                         /* Check for raw socket or OOTB handling */
2272                         ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2273                         return;
2274                 }
2275 
2276                 /* Found a client; up it goes */
2277                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2278                 sctp_input(connp, NULL, ip6h, mp, ira);
2279                 /* sctp_input does a rele of the sctp_t */
2280                 return;
2281         }
2282 
2283         case IPPROTO_UDP:
2284                 /* First mblk contains IP+UDP headers as checked above */
2285                 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE);
2286 
2287                 if (iraflags & IRAF_MULTIBROADCAST) {
2288                         uint16_t *up;   /* Pointer to ports in ULP header */
2289 
2290                         up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
2291 
2292                         ip_fanout_udp_multi_v6(mp, ip6h, up[1], up[0], ira);
2293                         return;
2294                 }
2295 
2296                 /* Look for AF_INET or AF_INET6 that matches */
2297                 connp = ipcl_classify_v6(mp, IPPROTO_UDP, ip_hdr_length,
2298                     ira, ipst);
2299                 if (connp == NULL) {
2300         no_udp_match:
2301                         if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].
2302                             connf_head != NULL) {
2303                                 ASSERT(ira->ira_protocol == IPPROTO_UDP);
2304                                 ip_fanout_proto_v6(mp, ip6h, ira);
2305                         } else {
2306                                 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2307                                     ICMP6_DST_UNREACH_NOPORT, ira);
2308                         }
2309                         return;
2310 
2311                 }
2312                 if (connp->conn_incoming_ifindex != 0 &&
2313                     connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2314                         CONN_DEC_REF(connp);
2315                         goto no_udp_match;
2316                 }
2317                 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld :
2318                     !canputnext(connp->conn_rq)) {
2319                         CONN_DEC_REF(connp);
2320                         BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
2321                         ip_drop_input("udpIfStatsInOverflows", mp, ill);
2322                         freemsg(mp);
2323                         return;
2324                 }
2325                 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2326                     (iraflags & IRAF_IPSEC_SECURE)) {
2327                         mp = ipsec_check_inbound_policy(mp, connp,
2328                             NULL, ip6h, ira);
2329                         if (mp == NULL) {
2330                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2331                                 /* Note that mp is NULL */
2332                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2333                                 CONN_DEC_REF(connp);
2334                                 return;
2335                         }
2336                 }
2337 
2338                 /* Found a client; up it goes */
2339                 IP6_STAT(ipst, ip6_udp_fannorm);
2340                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2341                 ira->ira_ill = ira->ira_rill = NULL;
2342                 (connp->conn_recv)(connp, mp, NULL, ira);
2343                 CONN_DEC_REF(connp);
2344                 ira->ira_ill = ill;
2345                 ira->ira_rill = rill;
2346                 return;
2347         default:
2348                 break;
2349         }
2350 
2351         /*
2352          * Clear hardware checksumming flag as it is currently only
2353          * used by TCP and UDP.
2354          */
2355         DB_CKSUMFLAGS(mp) = 0;
2356 
2357         switch (protocol) {
2358         case IPPROTO_ICMPV6:
2359                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
2360 
2361                 /* Check variable for testing applications */
2362                 if (ipst->ips_ipv6_drop_inbound_icmpv6) {
2363                         ip_drop_input("ipv6_drop_inbound_icmpv6", mp, ill);
2364                         freemsg(mp);
2365                         return;
2366                 }
2367                 /*
2368                  * We need to accomodate icmp messages coming in clear
2369                  * until we get everything secure from the wire. If
2370                  * icmp_accept_clear_messages is zero we check with
2371                  * the global policy and act accordingly. If it is
2372                  * non-zero, we accept the message without any checks.
2373                  * But *this does not mean* that this will be delivered
2374                  * to RAW socket clients. By accepting we might send
2375                  * replies back, change our MTU value etc.,
2376                  * but delivery to the ULP/clients depends on their
2377                  * policy dispositions.
2378                  */
2379                 if (ipst->ips_icmp_accept_clear_messages == 0) {
2380                         mp = ipsec_check_global_policy(mp, NULL,
2381                             NULL, ip6h, ira, ns);
2382                         if (mp == NULL)
2383                                 return;
2384                 }
2385 
2386                 /*
2387                  * On a labeled system, we have to check whether the zone
2388                  * itself is permitted to receive raw traffic.
2389                  */
2390                 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2391                         if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2392                                 BUMP_MIB(ill->ill_icmp6_mib,
2393                                     ipv6IfIcmpInErrors);
2394                                 ip_drop_input("tsol_can_accept_raw", mp, ill);
2395                                 freemsg(mp);
2396                                 return;
2397                         }
2398                 }
2399 
2400                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2401                 mp = icmp_inbound_v6(mp, ira);
2402                 if (mp == NULL) {
2403                         /* No need to pass to RAW sockets */
2404                         return;
2405                 }
2406                 break;
2407 
2408         case IPPROTO_DSTOPTS: {
2409                 ip6_dest_t      *desthdr;
2410                 uint_t          ehdrlen;
2411                 uint8_t         *optptr;
2412 
2413                 /* We already check for MIN_EHDR_LEN above */
2414 
2415                 /* Check if AH is present and needs to be processed. */
2416                 mp = ipsec_early_ah_v6(mp, ira);
2417                 if (mp == NULL)
2418                         return;
2419 
2420                 /*
2421                  * Reinitialize pointers, as ipsec_early_ah_v6() does
2422                  * complete pullups.  We don't have to do more pullups
2423                  * as a result.
2424                  */
2425                 ip6h = (ip6_t *)mp->b_rptr;
2426 
2427                 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2428                         goto pkt_too_short;
2429 
2430                 if (mp->b_cont != NULL &&
2431                     rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2432                         ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2433                         if (ip6h == NULL)
2434                                 goto discard;
2435                 }
2436                 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2437                 ehdrlen = 8 * (desthdr->ip6d_len + 1);
2438                 if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2439                         goto pkt_too_short;
2440                 if (mp->b_cont != NULL &&
2441                     rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2442                         ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2443                         if (ip6h == NULL)
2444                                 goto discard;
2445 
2446                         desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2447                 }
2448                 optptr = (uint8_t *)&desthdr[1];
2449 
2450                 /*
2451                  * Update ira_ip_hdr_length to skip the destination header
2452                  * when we repeat.
2453                  */
2454                 ira->ira_ip_hdr_length += ehdrlen;
2455 
2456                 ira->ira_protocol = desthdr->ip6d_nxt;
2457 
2458                 /*
2459                  * Note: XXX This code does not seem to make
2460                  * distinction between Destination Options Header
2461                  * being before/after Routing Header which can
2462                  * happen if we are at the end of source route.
2463                  * This may become significant in future.
2464                  * (No real significant Destination Options are
2465                  * defined/implemented yet ).
2466                  */
2467                 switch (ip_process_options_v6(mp, ip6h, optptr,
2468                     ehdrlen - 2, IPPROTO_DSTOPTS, ira)) {
2469                 case -1:
2470                         /*
2471                          * Packet has been consumed and any needed
2472                          * ICMP errors sent.
2473                          */
2474                         return;
2475                 case 0:
2476                         /* No action needed  continue */
2477                         break;
2478                 case 1:
2479                         /*
2480                          * Unnexpected return value
2481                          * (Router alert is a Hop-by-Hop option)
2482                          */
2483 #ifdef DEBUG
2484                         panic("ip_fanout_v6: router "
2485                             "alert hbh opt indication in dest opt");
2486                         /*NOTREACHED*/
2487 #else
2488                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2489                         ip_drop_input("ipIfStatsInDiscards", mp, ill);
2490                         freemsg(mp);
2491                         return;
2492 #endif
2493                 }
2494                 goto repeat;
2495         }
2496         case IPPROTO_FRAGMENT: {
2497                 ip6_frag_t *fraghdr;
2498 
2499                 if (ira->ira_pktlen - ip_hdr_length < sizeof (ip6_frag_t))
2500                         goto pkt_too_short;
2501 
2502                 if (mp->b_cont != NULL &&
2503                     rptr + ip_hdr_length + sizeof (ip6_frag_t) > mp->b_wptr) {
2504                         ip6h = ip_pullup(mp,
2505                             ip_hdr_length + sizeof (ip6_frag_t), ira);
2506                         if (ip6h == NULL)
2507                                 goto discard;
2508                 }
2509 
2510                 fraghdr = (ip6_frag_t *)(rptr + ip_hdr_length);
2511                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds);
2512 
2513                 /*
2514                  * Invoke the CGTP (multirouting) filtering module to
2515                  * process the incoming packet. Packets identified as
2516                  * duplicates must be discarded. Filtering is active
2517                  * only if the ip_cgtp_filter ndd variable is
2518                  * non-zero.
2519                  */
2520                 if (ipst->ips_ip_cgtp_filter &&
2521                     ipst->ips_ip_cgtp_filter_ops != NULL) {
2522                         int cgtp_flt_pkt;
2523                         netstackid_t stackid;
2524 
2525                         stackid = ipst->ips_netstack->netstack_stackid;
2526 
2527                         /*
2528                          * CGTP and IPMP are mutually exclusive so
2529                          * phyint_ifindex is fine here.
2530                          */
2531                         cgtp_flt_pkt =
2532                             ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6(
2533                             stackid, ill->ill_phyint->phyint_ifindex,
2534                             ip6h, fraghdr);
2535                         if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) {
2536                                 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill);
2537                                 freemsg(mp);
2538                                 return;
2539                         }
2540                 }
2541 
2542                 /*
2543                  * Update ip_hdr_length to skip the frag header
2544                  * ip_input_fragment_v6 will determine the extension header
2545                  * prior to the fragment header and update its nexthdr value,
2546                  * and also set ira_protocol to the nexthdr that follows the
2547                  * completed fragment.
2548                  */
2549                 ip_hdr_length += sizeof (ip6_frag_t);
2550 
2551                 /*
2552                  * Make sure we have ira_l2src before we loose the original
2553                  * mblk
2554                  */
2555                 if (!(ira->ira_flags & IRAF_L2SRC_SET))
2556                         ip_setl2src(mp, ira, ira->ira_rill);
2557 
2558                 mp = ip_input_fragment_v6(mp, ip6h, fraghdr,
2559                     ira->ira_pktlen - ip_hdr_length, ira);
2560                 if (mp == NULL) {
2561                         /* Reassembly is still pending */
2562                         return;
2563                 }
2564                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs);
2565 
2566                 /*
2567                  * The mblk chain has the frag header removed and
2568                  * ira_protocol, ira_pktlen, ira_ip_hdr_length as well as the
2569                  * IP header has been updated to refleact the result.
2570                  */
2571                 ip6h = (ip6_t *)mp->b_rptr;
2572                 ip_hdr_length = ira->ira_ip_hdr_length;
2573                 goto repeat;
2574         }
2575         case IPPROTO_HOPOPTS:
2576                 /*
2577                  * Illegal header sequence.
2578                  * (Hop-by-hop headers are processed above
2579                  *  and required to immediately follow IPv6 header)
2580                  */
2581                 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
2582                 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2583                 return;
2584 
2585         case IPPROTO_ROUTING: {
2586                 uint_t ehdrlen;
2587                 ip6_rthdr_t *rthdr;
2588 
2589                 /* Check if AH is present and needs to be processed. */
2590                 mp = ipsec_early_ah_v6(mp, ira);
2591                 if (mp == NULL)
2592                         return;
2593 
2594                 /*
2595                  * Reinitialize pointers, as ipsec_early_ah_v6() does
2596                  * complete pullups.  We don't have to do more pullups
2597                  * as a result.
2598                  */
2599                 ip6h = (ip6_t *)mp->b_rptr;
2600 
2601                 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2602                         goto pkt_too_short;
2603 
2604                 if (mp->b_cont != NULL &&
2605                     rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2606                         ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2607                         if (ip6h == NULL)
2608                                 goto discard;
2609                 }
2610                 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2611                 protocol = ira->ira_protocol = rthdr->ip6r_nxt;
2612                 ehdrlen = 8 * (rthdr->ip6r_len + 1);
2613                 if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2614                         goto pkt_too_short;
2615                 if (mp->b_cont != NULL &&
2616                     rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2617                         ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2618                         if (ip6h == NULL)
2619                                 goto discard;
2620                         rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2621                 }
2622                 if (rthdr->ip6r_segleft != 0) {
2623                         /* Not end of source route */
2624                         if (ira->ira_flags &
2625                             (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
2626                                 BUMP_MIB(ill->ill_ip_mib,
2627                                     ipIfStatsForwProhibits);
2628                                 ip_drop_input("ipIfStatsInForwProhibits",
2629                                     mp, ill);
2630                                 freemsg(mp);
2631                                 return;
2632                         }
2633                         ip_process_rthdr(mp, ip6h, rthdr, ira);
2634                         return;
2635                 }
2636                 ira->ira_ip_hdr_length += ehdrlen;
2637                 goto repeat;
2638         }
2639 
2640         case IPPROTO_AH:
2641         case IPPROTO_ESP: {
2642                 /*
2643                  * Fast path for AH/ESP.
2644                  */
2645                 netstack_t *ns = ipst->ips_netstack;
2646                 ipsec_stack_t *ipss = ns->netstack_ipsec;
2647 
2648                 IP_STAT(ipst, ipsec_proto_ahesp);
2649 
2650                 if (!ipsec_loaded(ipss)) {
2651                         ip_proto_not_sup(mp, ira);
2652                         return;
2653                 }
2654 
2655                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2656                 /* select inbound SA and have IPsec process the pkt */
2657                 if (protocol == IPPROTO_ESP) {
2658                         esph_t *esph;
2659 
2660                         mp = ipsec_inbound_esp_sa(mp, ira, &esph);
2661                         if (mp == NULL)
2662                                 return;
2663 
2664                         ASSERT(esph != NULL);
2665                         ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2666                         ASSERT(ira->ira_ipsec_esp_sa != NULL);
2667                         ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL);
2668 
2669                         mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph,
2670                             ira);
2671                 } else {
2672                         ah_t *ah;
2673 
2674                         mp = ipsec_inbound_ah_sa(mp, ira, &ah);
2675                         if (mp == NULL)
2676                                 return;
2677 
2678                         ASSERT(ah != NULL);
2679                         ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2680                         ASSERT(ira->ira_ipsec_ah_sa != NULL);
2681                         ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
2682                         mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah,
2683                             ira);
2684                 }
2685 
2686                 if (mp == NULL) {
2687                         /*
2688                          * Either it failed or is pending. In the former case
2689                          * ipIfStatsInDiscards was increased.
2690                          */
2691                         return;
2692                 }
2693                 /* we're done with IPsec processing, send it up */
2694                 ip_input_post_ipsec(mp, ira);
2695                 return;
2696         }
2697         case IPPROTO_NONE:
2698                 /* All processing is done. Count as "delivered". */
2699                 freemsg(mp);
2700                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2701                 return;
2702 
2703         case IPPROTO_ENCAP:
2704         case IPPROTO_IPV6:
2705                 /* iptun will verify trusted label */
2706                 connp = ipcl_classify_v6(mp, protocol, ip_hdr_length,
2707                     ira, ipst);
2708                 if (connp != NULL) {
2709                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2710                         ira->ira_ill = ira->ira_rill = NULL;
2711                         connp->conn_recv(connp, mp, NULL, ira);
2712                         CONN_DEC_REF(connp);
2713                         ira->ira_ill = ill;
2714                         ira->ira_rill = rill;
2715                         return;
2716                 }
2717                 /* FALLTHRU */
2718         default:
2719                 /*
2720                  * On a labeled system, we have to check whether the zone
2721                  * itself is permitted to receive raw traffic.
2722                  */
2723                 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2724                         if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2725                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2726                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2727                                 freemsg(mp);
2728                                 return;
2729                         }
2730                 }
2731                 break;
2732         }
2733 
2734         /*
2735          * The above input functions may have returned the pulled up message.
2736          * So ip6h need to be reinitialized.
2737          */
2738         ip6h = (ip6_t *)mp->b_rptr;
2739         ira->ira_protocol = protocol;
2740         if (ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head == NULL) {
2741                 /* No user-level listener for these packets packets */
2742                 ip_proto_not_sup(mp, ira);
2743                 return;
2744         }
2745 
2746         /*
2747          * Handle fanout to raw sockets.  There
2748          * can be more than one stream bound to a particular
2749          * protocol.  When this is the case, each one gets a copy
2750          * of any incoming packets.
2751          */
2752         ASSERT(ira->ira_protocol == protocol);
2753         ip_fanout_proto_v6(mp, ip6h, ira);
2754         return;
2755 
2756 pkt_too_short:
2757         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
2758         ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
2759         freemsg(mp);
2760         return;
2761 
2762 discard:
2763         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2764         ip_drop_input("ipIfStatsInDiscards", mp, ill);
2765         freemsg(mp);
2766 #undef rptr
2767 }