Print this page
11493 aggr needs support for multiple pseudo rx groups
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/io/mac/mac.c
          +++ new/usr/src/uts/common/io/mac/mac.c
↓ open down ↓ 1449 lines elided ↑ open up ↑
1450 1450  
1451 1451          for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next)
1452 1452                  ring->mr_flag &= ~flag;
1453 1453  }
1454 1454  
1455 1455  /*
1456 1456   * The following mac_hwrings_xxx() functions are private mac client functions
1457 1457   * used by the aggr driver to access and control the underlying HW Rx group
1458 1458   * and rings. In this case, the aggr driver has exclusive control of the
1459 1459   * underlying HW Rx group/rings, it calls the following functions to
1460      - * start/stop the HW Rx rings, disable/enable polling, add/remove mac'
     1460 + * start/stop the HW Rx rings, disable/enable polling, add/remove MAC
1461 1461   * addresses, or set up the Rx callback.
1462 1462   */
1463 1463  /* ARGSUSED */
1464 1464  static void
1465 1465  mac_hwrings_rx_process(void *arg, mac_resource_handle_t srs,
1466 1466      mblk_t *mp_chain, boolean_t loopback)
1467 1467  {
1468 1468          mac_soft_ring_set_t     *mac_srs = (mac_soft_ring_set_t *)srs;
1469 1469          mac_srs_rx_t            *srs_rx = &mac_srs->srs_rx;
1470 1470          mac_direct_rx_t         proc;
↓ open down ↓ 24 lines elided ↑ open up ↑
1495 1495          int                     cnt = 0;
1496 1496  
1497 1497          if (rtype == MAC_RING_TYPE_RX) {
1498 1498                  grp = flent->fe_rx_ring_group;
1499 1499          } else if (rtype == MAC_RING_TYPE_TX) {
1500 1500                  grp = flent->fe_tx_ring_group;
1501 1501          } else {
1502 1502                  ASSERT(B_FALSE);
1503 1503                  return (-1);
1504 1504          }
     1505 +
1505 1506          /*
1506      -         * The mac client did not reserve any RX group, return directly.
     1507 +         * The MAC client did not reserve an Rx group, return directly.
1507 1508           * This is probably because the underlying MAC does not support
1508 1509           * any groups.
1509 1510           */
1510 1511          if (hwgh != NULL)
1511 1512                  *hwgh = NULL;
1512 1513          if (grp == NULL)
1513 1514                  return (0);
1514 1515          /*
1515      -         * This group must be reserved by this mac client.
     1516 +         * This group must be reserved by this MAC client.
1516 1517           */
1517 1518          ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) &&
1518 1519              (mcip == MAC_GROUP_ONLY_CLIENT(grp)));
1519 1520  
1520 1521          for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next, cnt++) {
1521 1522                  ASSERT(cnt < MAX_RINGS_PER_GROUP);
1522 1523                  hwrh[cnt] = (mac_ring_handle_t)ring;
1523 1524          }
1524 1525          if (hwgh != NULL)
1525 1526                  *hwgh = (mac_group_handle_t)grp;
1526 1527  
1527 1528          return (cnt);
1528 1529  }
1529 1530  
1530 1531  /*
     1532 + * Get the HW ring handles of the given group index. If the MAC
     1533 + * doesn't have a group at this index, or any groups at all, then 0 is
     1534 + * returned and hwgh is set to NULL. This is a private client API. The
     1535 + * MAC perimeter must be held when calling this function.
     1536 + *
     1537 + * mh: A handle to the MAC that owns the group.
     1538 + *
     1539 + * idx: The index of the HW group to be read.
     1540 + *
     1541 + * hwgh: If non-NULL, contains a handle to the HW group on return.
     1542 + *
     1543 + * hwrh: An array of ring handles pointing to the HW rings in the
     1544 + * group. The array must be large enough to hold a handle to each ring
     1545 + * in the group. To be safe, this array should be of size MAX_RINGS_PER_GROUP.
     1546 + *
     1547 + * rtype: Used to determine if we are fetching Rx or Tx rings.
     1548 + *
     1549 + * Returns the number of rings in the group.
     1550 + */
     1551 +uint_t
     1552 +mac_hwrings_idx_get(mac_handle_t mh, uint_t idx, mac_group_handle_t *hwgh,
     1553 +    mac_ring_handle_t *hwrh, mac_ring_type_t rtype)
     1554 +{
     1555 +        mac_impl_t              *mip = (mac_impl_t *)mh;
     1556 +        mac_group_t             *grp;
     1557 +        mac_ring_t              *ring;
     1558 +        uint_t                  cnt = 0;
     1559 +
     1560 +        /*
     1561 +         * The MAC perimeter must be held when accessing the
     1562 +         * mi_{rx,tx}_groups fields.
     1563 +         */
     1564 +        ASSERT(MAC_PERIM_HELD(mh));
     1565 +        ASSERT(rtype == MAC_RING_TYPE_RX || rtype == MAC_RING_TYPE_TX);
     1566 +
     1567 +        if (rtype == MAC_RING_TYPE_RX) {
     1568 +                grp = mip->mi_rx_groups;
     1569 +        } else if (rtype == MAC_RING_TYPE_TX) {
     1570 +                grp = mip->mi_tx_groups;
     1571 +        }
     1572 +
     1573 +        while (grp != NULL && grp->mrg_index != idx)
     1574 +                grp = grp->mrg_next;
     1575 +
     1576 +        /*
     1577 +         * If the MAC doesn't have a group at this index or doesn't
     1578 +         * impelement RINGS capab, then set hwgh to NULL and return 0.
     1579 +         */
     1580 +        if (hwgh != NULL)
     1581 +                *hwgh = NULL;
     1582 +
     1583 +        if (grp == NULL)
     1584 +                return (0);
     1585 +
     1586 +        ASSERT3U(idx, ==, grp->mrg_index);
     1587 +
     1588 +        for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next, cnt++) {
     1589 +                ASSERT3U(cnt, <, MAX_RINGS_PER_GROUP);
     1590 +                hwrh[cnt] = (mac_ring_handle_t)ring;
     1591 +        }
     1592 +
     1593 +        /* A group should always have at least one ring. */
     1594 +        ASSERT3U(cnt, >, 0);
     1595 +
     1596 +        if (hwgh != NULL)
     1597 +                *hwgh = (mac_group_handle_t)grp;
     1598 +
     1599 +        return (cnt);
     1600 +}
     1601 +
     1602 +/*
1531 1603   * This function is called to get info about Tx/Rx rings.
1532 1604   *
1533 1605   * Return value: returns uint_t which will have various bits set
1534 1606   * that indicates different properties of the ring.
1535 1607   */
1536 1608  uint_t
1537 1609  mac_hwring_getinfo(mac_ring_handle_t rh)
1538 1610  {
1539 1611          mac_ring_t *ring = (mac_ring_t *)rh;
1540 1612          mac_ring_info_t *info = &ring->mr_info;
1541 1613  
1542 1614          return (info->mri_flags);
1543 1615  }
1544 1616  
1545 1617  /*
     1618 + * Set the passthru callback on the hardware ring.
     1619 + */
     1620 +void
     1621 +mac_hwring_set_passthru(mac_ring_handle_t hwrh, mac_rx_t fn, void *arg1,
     1622 +    mac_resource_handle_t arg2)
     1623 +{
     1624 +        mac_ring_t *hwring = (mac_ring_t *)hwrh;
     1625 +
     1626 +        ASSERT3S(hwring->mr_type, ==, MAC_RING_TYPE_RX);
     1627 +
     1628 +        hwring->mr_classify_type = MAC_PASSTHRU_CLASSIFIER;
     1629 +
     1630 +        hwring->mr_pt_fn = fn;
     1631 +        hwring->mr_pt_arg1 = arg1;
     1632 +        hwring->mr_pt_arg2 = arg2;
     1633 +}
     1634 +
     1635 +/*
     1636 + * Clear the passthru callback on the hardware ring.
     1637 + */
     1638 +void
     1639 +mac_hwring_clear_passthru(mac_ring_handle_t hwrh)
     1640 +{
     1641 +        mac_ring_t *hwring = (mac_ring_t *)hwrh;
     1642 +
     1643 +        ASSERT3S(hwring->mr_type, ==, MAC_RING_TYPE_RX);
     1644 +
     1645 +        hwring->mr_classify_type = MAC_NO_CLASSIFIER;
     1646 +
     1647 +        hwring->mr_pt_fn = NULL;
     1648 +        hwring->mr_pt_arg1 = NULL;
     1649 +        hwring->mr_pt_arg2 = NULL;
     1650 +}
     1651 +
     1652 +void
     1653 +mac_client_set_flow_cb(mac_client_handle_t mch, mac_rx_t func, void *arg1)
     1654 +{
     1655 +        mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
     1656 +        flow_entry_t            *flent = mcip->mci_flent;
     1657 +
     1658 +        mutex_enter(&flent->fe_lock);
     1659 +        flent->fe_cb_fn = (flow_fn_t)func;
     1660 +        flent->fe_cb_arg1 = arg1;
     1661 +        flent->fe_cb_arg2 = NULL;
     1662 +        flent->fe_flags &= ~FE_MC_NO_DATAPATH;
     1663 +        mutex_exit(&flent->fe_lock);
     1664 +}
     1665 +
     1666 +void
     1667 +mac_client_clear_flow_cb(mac_client_handle_t mch)
     1668 +{
     1669 +        mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
     1670 +        flow_entry_t            *flent = mcip->mci_flent;
     1671 +
     1672 +        mutex_enter(&flent->fe_lock);
     1673 +        flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop;
     1674 +        flent->fe_cb_arg1 = NULL;
     1675 +        flent->fe_cb_arg2 = NULL;
     1676 +        flent->fe_flags |= FE_MC_NO_DATAPATH;
     1677 +        mutex_exit(&flent->fe_lock);
     1678 +}
     1679 +
     1680 +/*
1546 1681   * Export ddi interrupt handles from the HW ring to the pseudo ring and
1547 1682   * setup the RX callback of the mac client which exclusively controls
1548 1683   * HW ring.
1549 1684   */
1550 1685  void
1551 1686  mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh,
1552 1687      mac_ring_handle_t pseudo_rh)
1553 1688  {
1554 1689          mac_ring_t              *hw_ring = (mac_ring_t *)hwrh;
1555 1690          mac_ring_t              *pseudo_ring;
↓ open down ↓ 51 lines elided ↑ open up ↑
1607 1742  
1608 1743  int
1609 1744  mac_hwring_enable_intr(mac_ring_handle_t rh)
1610 1745  {
1611 1746          mac_ring_t *rr_ring = (mac_ring_t *)rh;
1612 1747          mac_intr_t *intr = &rr_ring->mr_info.mri_intr;
1613 1748  
1614 1749          return (intr->mi_enable(intr->mi_handle));
1615 1750  }
1616 1751  
     1752 +/*
     1753 + * Start the HW ring pointed to by rh.
     1754 + *
     1755 + * This is used by special MAC clients that are MAC themselves and
     1756 + * need to exert control over the underlying HW rings of the NIC.
     1757 + */
1617 1758  int
1618 1759  mac_hwring_start(mac_ring_handle_t rh)
1619 1760  {
1620 1761          mac_ring_t *rr_ring = (mac_ring_t *)rh;
     1762 +        int rv = 0;
1621 1763  
     1764 +        if (rr_ring->mr_state != MR_INUSE)
     1765 +                rv = mac_start_ring(rr_ring);
     1766 +
     1767 +        return (rv);
     1768 +}
     1769 +
     1770 +/*
     1771 + * Stop the HW ring pointed to by rh. Also see mac_hwring_start().
     1772 + */
     1773 +void
     1774 +mac_hwring_stop(mac_ring_handle_t rh)
     1775 +{
     1776 +        mac_ring_t *rr_ring = (mac_ring_t *)rh;
     1777 +
     1778 +        if (rr_ring->mr_state != MR_FREE)
     1779 +                mac_stop_ring(rr_ring);
     1780 +}
     1781 +
     1782 +/*
     1783 + * Remove the quiesced flag from the HW ring pointed to by rh.
     1784 + *
     1785 + * This is used by special MAC clients that are MAC themselves and
     1786 + * need to exert control over the underlying HW rings of the NIC.
     1787 + */
     1788 +int
     1789 +mac_hwring_activate(mac_ring_handle_t rh)
     1790 +{
     1791 +        mac_ring_t *rr_ring = (mac_ring_t *)rh;
     1792 +
1622 1793          MAC_RING_UNMARK(rr_ring, MR_QUIESCE);
1623 1794          return (0);
1624 1795  }
1625 1796  
     1797 +/*
     1798 + * Quiesce the HW ring pointed to by rh. Also see mac_hwring_activate().
     1799 + */
1626 1800  void
1627      -mac_hwring_stop(mac_ring_handle_t rh)
     1801 +mac_hwring_quiesce(mac_ring_handle_t rh)
1628 1802  {
1629 1803          mac_ring_t *rr_ring = (mac_ring_t *)rh;
1630 1804  
1631 1805          mac_rx_ring_quiesce(rr_ring, MR_QUIESCE);
1632 1806  }
1633 1807  
1634 1808  mblk_t *
1635 1809  mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup)
1636 1810  {
1637 1811          mac_ring_t *rr_ring = (mac_ring_t *)rh;
↓ open down ↓ 127 lines elided ↑ open up ↑
1765 1939   */
1766 1940  boolean_t
1767 1941  mac_has_hw_vlan(mac_handle_t mh)
1768 1942  {
1769 1943          mac_impl_t *mip = (mac_impl_t *)mh;
1770 1944  
1771 1945          return (MAC_GROUP_HW_VLAN(mip->mi_rx_groups));
1772 1946  }
1773 1947  
1774 1948  /*
     1949 + * Get the number of Rx HW groups on this MAC.
     1950 + */
     1951 +uint_t
     1952 +mac_get_num_rx_groups(mac_handle_t mh)
     1953 +{
     1954 +        mac_impl_t *mip = (mac_impl_t *)mh;
     1955 +
     1956 +        ASSERT(MAC_PERIM_HELD(mh));
     1957 +        return (mip->mi_rx_group_count);
     1958 +}
     1959 +
     1960 +int
     1961 +mac_set_promisc(mac_handle_t mh, boolean_t value)
     1962 +{
     1963 +        mac_impl_t *mip = (mac_impl_t *)mh;
     1964 +
     1965 +        ASSERT(MAC_PERIM_HELD(mh));
     1966 +        return (i_mac_promisc_set(mip, value));
     1967 +}
     1968 +
     1969 +/*
1775 1970   * Set the RX group to be shared/reserved. Note that the group must be
1776 1971   * started/stopped outside of this function.
1777 1972   */
1778 1973  void
1779 1974  mac_set_group_state(mac_group_t *grp, mac_group_state_t state)
1780 1975  {
1781 1976          /*
1782 1977           * If there is no change in the group state, just return.
1783 1978           */
1784 1979          if (grp->mrg_state == state)
↓ open down ↓ 673 lines elided ↑ open up ↑
2458 2653   * incoming packets to the right flow.
2459 2654   */
2460 2655  /* ARGSUSED */
2461 2656  static mblk_t *
2462 2657  mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp)
2463 2658  {
2464 2659          flow_entry_t    *flent = NULL;
2465 2660          uint_t          flags = FLOW_INBOUND;
2466 2661          int             err;
2467 2662  
2468      -        /*
2469      -         * If the MAC is a port of an aggregation, pass FLOW_IGNORE_VLAN
2470      -         * to mac_flow_lookup() so that the VLAN packets can be successfully
2471      -         * passed to the non-VLAN aggregation flows.
2472      -         *
2473      -         * Note that there is possibly a race between this and
2474      -         * mac_unicast_remove/add() and VLAN packets could be incorrectly
2475      -         * classified to non-VLAN flows of non-aggregation MAC clients. These
2476      -         * VLAN packets will be then filtered out by the MAC module.
2477      -         */
2478      -        if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0)
2479      -                flags |= FLOW_IGNORE_VLAN;
2480      -
2481 2663          err = mac_flow_lookup(mip->mi_flow_tab, mp, flags, &flent);
2482 2664          if (err != 0) {
2483 2665                  /* no registered receive function */
2484 2666                  return (mp);
2485 2667          } else {
2486 2668                  mac_client_impl_t       *mcip;
2487 2669  
2488 2670                  /*
2489 2671                   * This flent might just be an additional one on the MAC client,
2490 2672                   * i.e. for classification purposes (different fdesc), however
↓ open down ↓ 1313 lines elided ↑ open up ↑
3804 3986  {
3805 3987          mac_ring_t      *ring;
3806 3988          int             rv = 0;
3807 3989  
3808 3990          ASSERT(group->mrg_state == MAC_GROUP_STATE_REGISTERED);
3809 3991          if ((rv = mac_start_group(group)) != 0)
3810 3992                  return (rv);
3811 3993  
3812 3994          for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) {
3813 3995                  ASSERT(ring->mr_state == MR_FREE);
     3996 +
3814 3997                  if ((rv = mac_start_ring(ring)) != 0)
3815 3998                          goto error;
3816      -                ring->mr_classify_type = MAC_SW_CLASSIFIER;
     3999 +
     4000 +                /*
     4001 +                 * When aggr_set_port_sdu() is called, it will remove
     4002 +                 * the port client's unicast address. This will cause
     4003 +                 * MAC to stop the default group's rings on the port
     4004 +                 * MAC. After it modifies the SDU, it will then re-add
     4005 +                 * the unicast address. At which time, this function is
     4006 +                 * called to start the default group's rings. Normally
     4007 +                 * this function would set the classify type to
     4008 +                 * MAC_SW_CLASSIFIER; but that will break aggr which
     4009 +                 * relies on the passthru classify mode being set for
     4010 +                 * correct delivery (see mac_rx_common()). To avoid
     4011 +                 * that, we check for a passthru callback and set the
     4012 +                 * classify type to MAC_PASSTHRU_CLASSIFIER; as it was
     4013 +                 * before the rings were stopped.
     4014 +                 */
     4015 +                ring->mr_classify_type = (ring->mr_pt_fn != NULL) ?
     4016 +                    MAC_PASSTHRU_CLASSIFIER : MAC_SW_CLASSIFIER;
3817 4017          }
3818 4018          return (0);
3819 4019  
3820 4020  error:
3821 4021          mac_stop_group_and_rings(group);
3822 4022          return (rv);
3823 4023  }
3824 4024  
3825 4025  /* Called from mac_stop on the default Rx group */
3826 4026  static void
↓ open down ↓ 4678 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX