diff options
Diffstat (limited to 'drivers/net/ethernet')
408 files changed, 18863 insertions, 12298 deletions
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index 3da97996bdf3..8cafd06ff0c4 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -196,7 +196,7 @@ static struct net_device_stats *el3_get_stats(struct net_device *dev); static int el3_rx(struct net_device *dev); static int el3_close(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void el3_tx_timeout (struct net_device *dev); +static void el3_tx_timeout (struct net_device *dev, unsigned int txqueue); static void el3_down(struct net_device *dev); static void el3_up(struct net_device *dev); static const struct ethtool_ops ethtool_ops; @@ -689,7 +689,7 @@ el3_open(struct net_device *dev) } static void -el3_tx_timeout (struct net_device *dev) +el3_tx_timeout (struct net_device *dev, unsigned int txqueue) { int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index b15752267c8d..1e233e2f0a5a 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -371,7 +371,7 @@ static void corkscrew_timer(struct timer_list *t); static netdev_tx_t corkscrew_start_xmit(struct sk_buff *skb, struct net_device *dev); static int corkscrew_rx(struct net_device *dev); -static void corkscrew_timeout(struct net_device *dev); +static void corkscrew_timeout(struct net_device *dev, unsigned int txqueue); static int boomerang_rx(struct net_device *dev); static irqreturn_t corkscrew_interrupt(int irq, void *dev_id); static int corkscrew_close(struct net_device *dev); @@ -961,7 +961,7 @@ static void corkscrew_timer(struct timer_list *t) #endif /* AUTOMEDIA */ } -static void corkscrew_timeout(struct net_device *dev) +static void corkscrew_timeout(struct net_device *dev, unsigned int txqueue) { int i; struct corkscrew_private *vp = netdev_priv(dev); diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c index 3044a6f35f04..ef1c3151fbb2 100644 --- a/drivers/net/ethernet/3com/3c574_cs.c +++ b/drivers/net/ethernet/3com/3c574_cs.c @@ -234,7 +234,7 @@ static void update_stats(struct net_device *dev); static struct net_device_stats *el3_get_stats(struct net_device *dev); static int el3_rx(struct net_device *dev, int worklimit); static int el3_close(struct net_device *dev); -static void el3_tx_timeout(struct net_device *dev); +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue); static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static void set_rx_mode(struct net_device *dev); static void set_multicast_list(struct net_device *dev); @@ -690,7 +690,7 @@ static int el3_open(struct net_device *dev) return 0; } -static void el3_tx_timeout(struct net_device *dev) +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue) { unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c index 2b2695311bda..d47cde6c5f08 100644 --- a/drivers/net/ethernet/3com/3c589_cs.c +++ b/drivers/net/ethernet/3com/3c589_cs.c @@ -173,7 +173,7 @@ static void update_stats(struct net_device *dev); static struct net_device_stats *el3_get_stats(struct net_device *dev); static int el3_rx(struct net_device *dev); static int el3_close(struct net_device *dev); -static void el3_tx_timeout(struct net_device *dev); +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue); static void set_rx_mode(struct net_device *dev); static void set_multicast_list(struct net_device *dev); static const struct ethtool_ops netdev_ethtool_ops; @@ -526,7 +526,7 @@ static int el3_open(struct net_device *dev) return 0; } -static void el3_tx_timeout(struct net_device *dev) +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue) { unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 8785c2ff3825..a2b7f7ab8170 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -776,7 +776,7 @@ static void set_rx_mode(struct net_device *dev); #ifdef CONFIG_PCI static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); #endif -static void vortex_tx_timeout(struct net_device *dev); +static void vortex_tx_timeout(struct net_device *dev, unsigned int txqueue); static void acpi_set_WOL(struct net_device *dev); static const struct ethtool_ops vortex_ethtool_ops; static void set_8021q_mode(struct net_device *dev, int enable); @@ -1548,7 +1548,7 @@ vortex_up(struct net_device *dev) struct vortex_private *vp = netdev_priv(dev); void __iomem *ioaddr = vp->ioaddr; unsigned int config; - int i, mii_reg1, mii_reg5, err = 0; + int i, mii_reg5, err = 0; if (VORTEX_PCI(vp)) { pci_set_power_state(VORTEX_PCI(vp), PCI_D0); /* Go active */ @@ -1605,7 +1605,7 @@ vortex_up(struct net_device *dev) window_write32(vp, config, 3, Wn3_Config); if (dev->if_port == XCVR_MII || dev->if_port == XCVR_NWAY) { - mii_reg1 = mdio_read(dev, vp->phys[0], MII_BMSR); + mdio_read(dev, vp->phys[0], MII_BMSR); mii_reg5 = mdio_read(dev, vp->phys[0], MII_LPA); vp->partner_flow_ctrl = ((mii_reg5 & 0x0400) != 0); vp->mii.full_duplex = vp->full_duplex; @@ -1877,7 +1877,7 @@ leave_media_alone: iowrite16(FakeIntr, ioaddr + EL3_CMD); } -static void vortex_tx_timeout(struct net_device *dev) +static void vortex_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct vortex_private *vp = netdev_priv(dev); void __iomem *ioaddr = vp->ioaddr; diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index be823c186517..14fce6658106 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -2013,7 +2013,7 @@ typhoon_stop_runtime(struct typhoon *tp, int wait_type) } static void -typhoon_tx_timeout(struct net_device *dev) +typhoon_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct typhoon *tp = netdev_priv(dev); diff --git a/drivers/net/ethernet/8390/8390.c b/drivers/net/ethernet/8390/8390.c index 78f3e532c600..0e0aa4016858 100644 --- a/drivers/net/ethernet/8390/8390.c +++ b/drivers/net/ethernet/8390/8390.c @@ -36,9 +36,9 @@ void ei_set_multicast_list(struct net_device *dev) } EXPORT_SYMBOL(ei_set_multicast_list); -void ei_tx_timeout(struct net_device *dev) +void ei_tx_timeout(struct net_device *dev, unsigned int txqueue) { - __ei_tx_timeout(dev); + __ei_tx_timeout(dev, txqueue); } EXPORT_SYMBOL(ei_tx_timeout); diff --git a/drivers/net/ethernet/8390/8390.h b/drivers/net/ethernet/8390/8390.h index 3e2f2c2e7b58..529c728f334a 100644 --- a/drivers/net/ethernet/8390/8390.h +++ b/drivers/net/ethernet/8390/8390.h @@ -32,7 +32,7 @@ void NS8390_init(struct net_device *dev, int startp); int ei_open(struct net_device *dev); int ei_close(struct net_device *dev); irqreturn_t ei_interrupt(int irq, void *dev_id); -void ei_tx_timeout(struct net_device *dev); +void ei_tx_timeout(struct net_device *dev, unsigned int txqueue); netdev_tx_t ei_start_xmit(struct sk_buff *skb, struct net_device *dev); void ei_set_multicast_list(struct net_device *dev); struct net_device_stats *ei_get_stats(struct net_device *dev); @@ -50,7 +50,7 @@ void NS8390p_init(struct net_device *dev, int startp); int eip_open(struct net_device *dev); int eip_close(struct net_device *dev); irqreturn_t eip_interrupt(int irq, void *dev_id); -void eip_tx_timeout(struct net_device *dev); +void eip_tx_timeout(struct net_device *dev, unsigned int txqueue); netdev_tx_t eip_start_xmit(struct sk_buff *skb, struct net_device *dev); void eip_set_multicast_list(struct net_device *dev); struct net_device_stats *eip_get_stats(struct net_device *dev); diff --git a/drivers/net/ethernet/8390/8390p.c b/drivers/net/ethernet/8390/8390p.c index 6cf36992a2c6..6834742057b3 100644 --- a/drivers/net/ethernet/8390/8390p.c +++ b/drivers/net/ethernet/8390/8390p.c @@ -41,9 +41,9 @@ void eip_set_multicast_list(struct net_device *dev) } EXPORT_SYMBOL(eip_set_multicast_list); -void eip_tx_timeout(struct net_device *dev) +void eip_tx_timeout(struct net_device *dev, unsigned int txqueue) { - __ei_tx_timeout(dev); + __ei_tx_timeout(dev, txqueue); } EXPORT_SYMBOL(eip_tx_timeout); diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 0b6bbf63f7ca..aeae7966a082 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -83,7 +83,7 @@ static netdev_tx_t axnet_start_xmit(struct sk_buff *skb, struct net_device *dev); static struct net_device_stats *get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void axnet_tx_timeout(struct net_device *dev); +static void axnet_tx_timeout(struct net_device *dev, unsigned int txqueue); static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); static void ei_watchdog(struct timer_list *t); static void axnet_reset_8390(struct net_device *dev); @@ -903,7 +903,7 @@ static int ax_close(struct net_device *dev) * completed (or failed) - i.e. never posted a Tx related interrupt. */ -static void axnet_tx_timeout(struct net_device *dev) +static void axnet_tx_timeout(struct net_device *dev, unsigned int txqueue) { long e8390_base = dev->base_addr; struct ei_device *ei_local = netdev_priv(dev); diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c index c9c55c9eab9f..babc92e2692e 100644 --- a/drivers/net/ethernet/8390/lib8390.c +++ b/drivers/net/ethernet/8390/lib8390.c @@ -251,7 +251,7 @@ static int __ei_close(struct net_device *dev) * completed (or failed) - i.e. never posted a Tx related interrupt. */ -static void __ei_tx_timeout(struct net_device *dev) +static void __ei_tx_timeout(struct net_device *dev, unsigned int txqueue) { unsigned long e8390_base = dev->base_addr; struct ei_device *ei_local = netdev_priv(dev); diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 816540e6beac..165d18405b0c 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -576,7 +576,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int location, int value); static int netdev_open(struct net_device *dev); static void check_duplex(struct net_device *dev); -static void tx_timeout(struct net_device *dev); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t intr_handler(int irq, void *dev_instance); @@ -1105,7 +1105,7 @@ static void check_duplex(struct net_device *dev) } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->base; diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 174344c450af..3c51d8c502ed 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3811,7 +3811,7 @@ drop_err: * specified by the 'tx_timeo" element in the net_device structure (see * et131x_alloc_device() to see how this value is set). */ -static void et131x_tx_timeout(struct net_device *netdev) +static void et131x_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct et131x_adapter *adapter = netdev_priv(netdev); struct tx_ring *tx_ring = &adapter->tx_ring; diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index 0537df06a9b5..5ea806423e4c 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -407,7 +407,7 @@ static void emac_init_device(struct net_device *dev) } /* Our watchdog timed out. Called by the networking layer */ -static void emac_timeout(struct net_device *dev) +static void emac_timeout(struct net_device *dev, unsigned int txqueue) { struct emac_board_info *db = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index 46b4207d3266..f366faf88eee 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -437,7 +437,7 @@ static const struct ethtool_ops ace_ethtool_ops = { .set_link_ksettings = ace_set_link_ksettings, }; -static void ace_watchdog(struct net_device *dev); +static void ace_watchdog(struct net_device *dev, unsigned int txqueue); static const struct net_device_ops ace_netdev_ops = { .ndo_open = ace_open, @@ -1542,7 +1542,7 @@ static void ace_set_rxtx_parms(struct net_device *dev, int jumbo) } -static void ace_watchdog(struct net_device *data) +static void ace_watchdog(struct net_device *data, unsigned int txqueue) { struct net_device *dev = data; struct ace_private *ap = netdev_priv(dev); diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index fc96c66b44cb..b4e891d49a94 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -740,7 +740,9 @@ static int ena_set_channels(struct net_device *netdev, struct ena_adapter *adapter = netdev_priv(netdev); u32 count = channels->combined_count; /* The check for max value is already done in ethtool */ - if (count < ENA_MIN_NUM_IO_QUEUES) + if (count < ENA_MIN_NUM_IO_QUEUES || + (ena_xdp_present(adapter) && + !ena_xdp_legal_queue_count(adapter, channels->combined_count))) return -EINVAL; return ena_update_queue_count(adapter, count); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 948583fdcc28..894e8c1a8cf1 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -36,7 +36,6 @@ #include <linux/cpu_rmap.h> #endif /* CONFIG_RFS_ACCEL */ #include <linux/ethtool.h> -#include <linux/if_vlan.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/numa.h> @@ -47,6 +46,7 @@ #include <net/ip.h> #include "ena_netdev.h" +#include <linux/bpf_trace.h> #include "ena_pci_id_tbl.h" static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n"; @@ -78,7 +78,37 @@ static void check_for_admin_com_state(struct ena_adapter *adapter); static void ena_destroy_device(struct ena_adapter *adapter, bool graceful); static int ena_restore_device(struct ena_adapter *adapter); -static void ena_tx_timeout(struct net_device *dev) +static void ena_init_io_rings(struct ena_adapter *adapter, + int first_index, int count); +static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index, + int count); +static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index, + int count); +static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid); +static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, + int count); +static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid); +static void ena_free_tx_resources(struct ena_adapter *adapter, int qid); +static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget); +static void ena_destroy_all_tx_queues(struct ena_adapter *adapter); +static void ena_free_all_io_tx_resources(struct ena_adapter *adapter); +static void ena_napi_disable_in_range(struct ena_adapter *adapter, + int first_index, int count); +static void ena_napi_enable_in_range(struct ena_adapter *adapter, + int first_index, int count); +static int ena_up(struct ena_adapter *adapter); +static void ena_down(struct ena_adapter *adapter); +static void ena_unmask_interrupt(struct ena_ring *tx_ring, + struct ena_ring *rx_ring); +static void ena_update_ring_numa_node(struct ena_ring *tx_ring, + struct ena_ring *rx_ring); +static void ena_unmap_tx_buff(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info); +static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, + int first_index, int count); + +static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ena_adapter *adapter = netdev_priv(dev); @@ -123,6 +153,448 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu) return ret; } +static int ena_xmit_common(struct net_device *dev, + struct ena_ring *ring, + struct ena_tx_buffer *tx_info, + struct ena_com_tx_ctx *ena_tx_ctx, + u16 next_to_use, + u32 bytes) +{ + struct ena_adapter *adapter = netdev_priv(dev); + int rc, nb_hw_desc; + + if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq, + ena_tx_ctx))) { + netif_dbg(adapter, tx_queued, dev, + "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", + ring->qid); + ena_com_write_sq_doorbell(ring->ena_com_io_sq); + } + + /* prepare the packet's descriptors to dma engine */ + rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx, + &nb_hw_desc); + + /* In case there isn't enough space in the queue for the packet, + * we simply drop it. All other failure reasons of + * ena_com_prepare_tx() are fatal and therefore require a device reset. + */ + if (unlikely(rc)) { + netif_err(adapter, tx_queued, dev, + "failed to prepare tx bufs\n"); + u64_stats_update_begin(&ring->syncp); + ring->tx_stats.prepare_ctx_err++; + u64_stats_update_end(&ring->syncp); + if (rc != -ENOMEM) { + adapter->reset_reason = + ENA_REGS_RESET_DRIVER_INVALID_STATE; + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } + return rc; + } + + u64_stats_update_begin(&ring->syncp); + ring->tx_stats.cnt++; + ring->tx_stats.bytes += bytes; + u64_stats_update_end(&ring->syncp); + + tx_info->tx_descs = nb_hw_desc; + tx_info->last_jiffies = jiffies; + tx_info->print_once = 0; + + ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, + ring->ring_size); + return 0; +} + +/* This is the XDP napi callback. XDP queues use a separate napi callback + * than Rx/Tx queues. + */ +static int ena_xdp_io_poll(struct napi_struct *napi, int budget) +{ + struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); + u32 xdp_work_done, xdp_budget; + struct ena_ring *xdp_ring; + int napi_comp_call = 0; + int ret; + + xdp_ring = ena_napi->xdp_ring; + xdp_ring->first_interrupt = ena_napi->first_interrupt; + + xdp_budget = budget; + + if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) || + test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) { + napi_complete_done(napi, 0); + return 0; + } + + xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget); + + /* If the device is about to reset or down, avoid unmask + * the interrupt and return 0 so NAPI won't reschedule + */ + if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) { + napi_complete_done(napi, 0); + ret = 0; + } else if (xdp_budget > xdp_work_done) { + napi_comp_call = 1; + if (napi_complete_done(napi, xdp_work_done)) + ena_unmask_interrupt(xdp_ring, NULL); + ena_update_ring_numa_node(xdp_ring, NULL); + ret = xdp_work_done; + } else { + ret = xdp_budget; + } + + u64_stats_update_begin(&xdp_ring->syncp); + xdp_ring->tx_stats.napi_comp += napi_comp_call; + xdp_ring->tx_stats.tx_poll++; + u64_stats_update_end(&xdp_ring->syncp); + + return ret; +} + +static int ena_xdp_tx_map_buff(struct ena_ring *xdp_ring, + struct ena_tx_buffer *tx_info, + struct xdp_buff *xdp, + void **push_hdr, + u32 *push_len) +{ + struct ena_adapter *adapter = xdp_ring->adapter; + struct ena_com_buf *ena_buf; + dma_addr_t dma = 0; + u32 size; + + tx_info->xdpf = convert_to_xdp_frame(xdp); + size = tx_info->xdpf->len; + ena_buf = tx_info->bufs; + + /* llq push buffer */ + *push_len = min_t(u32, size, xdp_ring->tx_max_header_size); + *push_hdr = tx_info->xdpf->data; + + if (size - *push_len > 0) { + dma = dma_map_single(xdp_ring->dev, + *push_hdr + *push_len, + size - *push_len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(xdp_ring->dev, dma))) + goto error_report_dma_error; + + tx_info->map_linear_data = 1; + tx_info->num_of_bufs = 1; + } + + ena_buf->paddr = dma; + ena_buf->len = size; + + return 0; + +error_report_dma_error: + u64_stats_update_begin(&xdp_ring->syncp); + xdp_ring->tx_stats.dma_mapping_err++; + u64_stats_update_end(&xdp_ring->syncp); + netdev_warn(adapter->netdev, "failed to map xdp buff\n"); + + xdp_return_frame_rx_napi(tx_info->xdpf); + tx_info->xdpf = NULL; + tx_info->num_of_bufs = 0; + + return -EINVAL; +} + +static int ena_xdp_xmit_buff(struct net_device *dev, + struct xdp_buff *xdp, + int qid, + struct ena_rx_buffer *rx_info) +{ + struct ena_adapter *adapter = netdev_priv(dev); + struct ena_com_tx_ctx ena_tx_ctx = {0}; + struct ena_tx_buffer *tx_info; + struct ena_ring *xdp_ring; + u16 next_to_use, req_id; + int rc; + void *push_hdr; + u32 push_len; + + xdp_ring = &adapter->tx_ring[qid]; + next_to_use = xdp_ring->next_to_use; + req_id = xdp_ring->free_ids[next_to_use]; + tx_info = &xdp_ring->tx_buffer_info[req_id]; + tx_info->num_of_bufs = 0; + page_ref_inc(rx_info->page); + tx_info->xdp_rx_page = rx_info->page; + + rc = ena_xdp_tx_map_buff(xdp_ring, tx_info, xdp, &push_hdr, &push_len); + if (unlikely(rc)) + goto error_drop_packet; + + ena_tx_ctx.ena_bufs = tx_info->bufs; + ena_tx_ctx.push_header = push_hdr; + ena_tx_ctx.num_bufs = tx_info->num_of_bufs; + ena_tx_ctx.req_id = req_id; + ena_tx_ctx.header_len = push_len; + + rc = ena_xmit_common(dev, + xdp_ring, + tx_info, + &ena_tx_ctx, + next_to_use, + xdp->data_end - xdp->data); + if (rc) + goto error_unmap_dma; + /* trigger the dma engine. ena_com_write_sq_doorbell() + * has a mb + */ + ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq); + u64_stats_update_begin(&xdp_ring->syncp); + xdp_ring->tx_stats.doorbells++; + u64_stats_update_end(&xdp_ring->syncp); + + return NETDEV_TX_OK; + +error_unmap_dma: + ena_unmap_tx_buff(xdp_ring, tx_info); + tx_info->xdpf = NULL; +error_drop_packet: + + return NETDEV_TX_OK; +} + +static int ena_xdp_execute(struct ena_ring *rx_ring, + struct xdp_buff *xdp, + struct ena_rx_buffer *rx_info) +{ + struct bpf_prog *xdp_prog; + u32 verdict = XDP_PASS; + + rcu_read_lock(); + xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog); + + if (!xdp_prog) + goto out; + + verdict = bpf_prog_run_xdp(xdp_prog, xdp); + + if (verdict == XDP_TX) + ena_xdp_xmit_buff(rx_ring->netdev, + xdp, + rx_ring->qid + rx_ring->adapter->num_io_queues, + rx_info); + else if (unlikely(verdict == XDP_ABORTED)) + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + else if (unlikely(verdict > XDP_TX)) + bpf_warn_invalid_xdp_action(verdict); +out: + rcu_read_unlock(); + return verdict; +} + +static void ena_init_all_xdp_queues(struct ena_adapter *adapter) +{ + adapter->xdp_first_ring = adapter->num_io_queues; + adapter->xdp_num_queues = adapter->num_io_queues; + + ena_init_io_rings(adapter, + adapter->xdp_first_ring, + adapter->xdp_num_queues); +} + +static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter) +{ + int rc = 0; + + rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring, + adapter->xdp_num_queues); + if (rc) + goto setup_err; + + rc = ena_create_io_tx_queues_in_range(adapter, + adapter->xdp_first_ring, + adapter->xdp_num_queues); + if (rc) + goto create_err; + + return 0; + +create_err: + ena_free_all_io_tx_resources(adapter); +setup_err: + return rc; +} + +/* Provides a way for both kernel and bpf-prog to know + * more about the RX-queue a given XDP frame arrived on. + */ +static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring) +{ + int rc; + + rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid); + + if (rc) { + netif_err(rx_ring->adapter, ifup, rx_ring->netdev, + "Failed to register xdp rx queue info. RX queue num %d rc: %d\n", + rx_ring->qid, rc); + goto err; + } + + rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, + NULL); + + if (rc) { + netif_err(rx_ring->adapter, ifup, rx_ring->netdev, + "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n", + rx_ring->qid, rc); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + } + +err: + return rc; +} + +static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring) +{ + xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); +} + +void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, + struct bpf_prog *prog, + int first, + int count) +{ + struct ena_ring *rx_ring; + int i = 0; + + for (i = first; i < count; i++) { + rx_ring = &adapter->rx_ring[i]; + xchg(&rx_ring->xdp_bpf_prog, prog); + if (prog) { + ena_xdp_register_rxq_info(rx_ring); + rx_ring->rx_headroom = XDP_PACKET_HEADROOM; + } else { + ena_xdp_unregister_rxq_info(rx_ring); + rx_ring->rx_headroom = 0; + } + } +} + +void ena_xdp_exchange_program(struct ena_adapter *adapter, + struct bpf_prog *prog) +{ + struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog); + + ena_xdp_exchange_program_rx_in_range(adapter, + prog, + 0, + adapter->num_io_queues); + + if (old_bpf_prog) + bpf_prog_put(old_bpf_prog); +} + +static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter) +{ + bool was_up; + int rc; + + was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + + if (was_up) + ena_down(adapter); + + adapter->xdp_first_ring = 0; + adapter->xdp_num_queues = 0; + ena_xdp_exchange_program(adapter, NULL); + if (was_up) { + rc = ena_up(adapter); + if (rc) + return rc; + } + return 0; +} + +static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct bpf_prog *prog = bpf->prog; + struct bpf_prog *old_bpf_prog; + int rc, prev_mtu; + bool is_up; + + is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + rc = ena_xdp_allowed(adapter); + if (rc == ENA_XDP_ALLOWED) { + old_bpf_prog = adapter->xdp_bpf_prog; + if (prog) { + if (!is_up) { + ena_init_all_xdp_queues(adapter); + } else if (!old_bpf_prog) { + ena_down(adapter); + ena_init_all_xdp_queues(adapter); + } + ena_xdp_exchange_program(adapter, prog); + + if (is_up && !old_bpf_prog) { + rc = ena_up(adapter); + if (rc) + return rc; + } + } else if (old_bpf_prog) { + rc = ena_destroy_and_free_all_xdp_queues(adapter); + if (rc) + return rc; + } + + prev_mtu = netdev->max_mtu; + netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu; + + if (!old_bpf_prog) + netif_info(adapter, drv, adapter->netdev, + "xdp program set, changing the max_mtu from %d to %d", + prev_mtu, netdev->max_mtu); + + } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) { + netif_err(adapter, drv, adapter->netdev, + "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on", + netdev->mtu, ENA_XDP_MAX_MTU); + NL_SET_ERR_MSG_MOD(bpf->extack, + "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info"); + return -EINVAL; + } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) { + netif_err(adapter, drv, adapter->netdev, + "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n", + adapter->num_io_queues, adapter->max_num_io_queues); + NL_SET_ERR_MSG_MOD(bpf->extack, + "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info"); + return -EINVAL; + } + + return 0; +} + +/* This is the main xdp callback, it's used by the kernel to set/unset the xdp + * program as well as to query the current xdp program id. + */ +static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + switch (bpf->command) { + case XDP_SETUP_PROG: + return ena_xdp_set(netdev, bpf); + case XDP_QUERY_PROG: + bpf->prog_id = adapter->xdp_bpf_prog ? + adapter->xdp_bpf_prog->aux->id : 0; + break; + default: + return -EINVAL; + } + return 0; +} + static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) { #ifdef CONFIG_RFS_ACCEL @@ -164,7 +636,8 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter, u64_stats_init(&ring->syncp); } -static void ena_init_io_rings(struct ena_adapter *adapter) +static void ena_init_io_rings(struct ena_adapter *adapter, + int first_index, int count) { struct ena_com_dev *ena_dev; struct ena_ring *txr, *rxr; @@ -172,13 +645,12 @@ static void ena_init_io_rings(struct ena_adapter *adapter) ena_dev = adapter->ena_dev; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = first_index; i < first_index + count; i++) { txr = &adapter->tx_ring[i]; rxr = &adapter->rx_ring[i]; - /* TX/RX common ring state */ + /* TX common ring state */ ena_init_io_rings_common(adapter, txr, i); - ena_init_io_rings_common(adapter, rxr, i); /* TX specific ring state */ txr->ring_size = adapter->requested_tx_ring_size; @@ -188,14 +660,20 @@ static void ena_init_io_rings(struct ena_adapter *adapter) txr->smoothed_interval = ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); - /* RX specific ring state */ - rxr->ring_size = adapter->requested_rx_ring_size; - rxr->rx_copybreak = adapter->rx_copybreak; - rxr->sgl_size = adapter->max_rx_sgl_size; - rxr->smoothed_interval = - ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); - rxr->empty_rx_queue = 0; - adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + /* Don't init RX queues for xdp queues */ + if (!ENA_IS_XDP_INDEX(adapter, i)) { + /* RX common ring state */ + ena_init_io_rings_common(adapter, rxr, i); + + /* RX specific ring state */ + rxr->ring_size = adapter->requested_rx_ring_size; + rxr->rx_copybreak = adapter->rx_copybreak; + rxr->sgl_size = adapter->max_rx_sgl_size; + rxr->smoothed_interval = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); + rxr->empty_rx_queue = 0; + adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + } } } @@ -285,16 +763,13 @@ static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) tx_ring->push_buf_intermediate_buf = NULL; } -/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues - * @adapter: private structure - * - * Return 0 on success, negative on failure - */ -static int ena_setup_all_tx_resources(struct ena_adapter *adapter) +static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, + int count) { int i, rc = 0; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = first_index; i < first_index + count; i++) { rc = ena_setup_tx_resources(adapter, i); if (rc) goto err_setup_tx; @@ -308,11 +783,20 @@ err_setup_tx: "Tx queue %d: allocation failed\n", i); /* rewind the index freeing the rings as we go */ - while (i--) + while (first_index < i--) ena_free_tx_resources(adapter, i); return rc; } +static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, int count) +{ + int i; + + for (i = first_index; i < first_index + count; i++) + ena_free_tx_resources(adapter, i); +} + /* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues * @adapter: board private structure * @@ -320,10 +804,10 @@ err_setup_tx: */ static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) { - int i; - - for (i = 0; i < adapter->num_io_queues; i++) - ena_free_tx_resources(adapter, i); + ena_free_all_io_tx_resources_in_range(adapter, + 0, + adapter->xdp_num_queues + + adapter->num_io_queues); } static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id) @@ -495,8 +979,8 @@ static int ena_alloc_rx_page(struct ena_ring *rx_ring, rx_info->page = page; rx_info->page_offset = 0; ena_buf = &rx_info->ena_buf; - ena_buf->paddr = dma; - ena_buf->len = ENA_PAGE_SIZE; + ena_buf->paddr = dma + rx_ring->rx_headroom; + ena_buf->len = ENA_PAGE_SIZE - rx_ring->rx_headroom; return 0; } @@ -513,7 +997,9 @@ static void ena_free_rx_page(struct ena_ring *rx_ring, return; } - dma_unmap_page(rx_ring->dev, ena_buf->paddr, ENA_PAGE_SIZE, + dma_unmap_page(rx_ring->dev, + ena_buf->paddr - rx_ring->rx_headroom, + ENA_PAGE_SIZE, DMA_FROM_DEVICE); __free_page(page); @@ -620,8 +1106,8 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter) ena_free_rx_bufs(adapter, i); } -static void ena_unmap_tx_skb(struct ena_ring *tx_ring, - struct ena_tx_buffer *tx_info) +static void ena_unmap_tx_buff(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info) { struct ena_com_buf *ena_buf; u32 cnt; @@ -675,7 +1161,7 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring) tx_ring->qid, i); } - ena_unmap_tx_skb(tx_ring, tx_info); + ena_unmap_tx_buff(tx_ring, tx_info); dev_kfree_skb_any(tx_info->skb); } @@ -688,7 +1174,7 @@ static void ena_free_all_tx_bufs(struct ena_adapter *adapter) struct ena_ring *tx_ring; int i; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) { tx_ring = &adapter->tx_ring[i]; ena_free_tx_bufs(tx_ring); } @@ -699,7 +1185,7 @@ static void ena_destroy_all_tx_queues(struct ena_adapter *adapter) u16 ena_qid; int i; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) { ena_qid = ENA_IO_TXQ_IDX(i); ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); } @@ -723,6 +1209,32 @@ static void ena_destroy_all_io_queues(struct ena_adapter *adapter) ena_destroy_all_rx_queues(adapter); } +static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, + struct ena_tx_buffer *tx_info, bool is_xdp) +{ + if (tx_info) + netif_err(ring->adapter, + tx_done, + ring->netdev, + "tx_info doesn't have valid %s", + is_xdp ? "xdp frame" : "skb"); + else + netif_err(ring->adapter, + tx_done, + ring->netdev, + "Invalid req_id: %hu\n", + req_id); + + u64_stats_update_begin(&ring->syncp); + ring->tx_stats.bad_req_id++; + u64_stats_update_end(&ring->syncp); + + /* Trigger device reset */ + ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; + set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags); + return -EFAULT; +} + static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) { struct ena_tx_buffer *tx_info = NULL; @@ -733,21 +1245,20 @@ static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) return 0; } - if (tx_info) - netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, - "tx_info doesn't have valid skb\n"); - else - netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, - "Invalid req_id: %hu\n", req_id); + return handle_invalid_req_id(tx_ring, req_id, tx_info, false); +} - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->tx_stats.bad_req_id++; - u64_stats_update_end(&tx_ring->syncp); +static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id) +{ + struct ena_tx_buffer *tx_info = NULL; - /* Trigger device reset */ - tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; - set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags); - return -EFAULT; + if (likely(req_id < xdp_ring->ring_size)) { + tx_info = &xdp_ring->tx_buffer_info[req_id]; + if (likely(tx_info->xdpf)) + return 0; + } + + return handle_invalid_req_id(xdp_ring, req_id, tx_info, true); } static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) @@ -786,7 +1297,7 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) tx_info->skb = NULL; tx_info->last_jiffies = 0; - ena_unmap_tx_skb(tx_ring, tx_info); + ena_unmap_tx_buff(tx_ring, tx_info); netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, "tx_poll: q %d skb %p completed\n", tx_ring->qid, @@ -1037,6 +1548,33 @@ static void ena_set_rx_hash(struct ena_ring *rx_ring, } } +int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) +{ + struct ena_rx_buffer *rx_info; + int ret; + + rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; + xdp->data = page_address(rx_info->page) + + rx_info->page_offset + rx_ring->rx_headroom; + xdp_set_data_meta_invalid(xdp); + xdp->data_hard_start = page_address(rx_info->page); + xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len; + /* If for some reason we received a bigger packet than + * we expect, then we simply drop it + */ + if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) + return XDP_DROP; + + ret = ena_xdp_execute(rx_ring, xdp, rx_info); + + /* The xdp program might expand the headers */ + if (ret == XDP_PASS) { + rx_info->page_offset = xdp->data - xdp->data_hard_start; + rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; + } + + return ret; +} /* ena_clean_rx_irq - Cleanup RX irq * @rx_ring: RX ring to clean * @napi: napi handler @@ -1048,23 +1586,27 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, u32 budget) { u16 next_to_clean = rx_ring->next_to_clean; - u32 res_budget, work_done; - struct ena_com_rx_ctx ena_rx_ctx; struct ena_adapter *adapter; + u32 res_budget, work_done; + int rx_copybreak_pkt = 0; + int refill_threshold; struct sk_buff *skb; int refill_required; - int refill_threshold; - int rc = 0; + struct xdp_buff xdp; int total_len = 0; - int rx_copybreak_pkt = 0; + int xdp_verdict; + int rc = 0; int i; netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "%s qid %d\n", __func__, rx_ring->qid); res_budget = budget; + xdp.rxq = &rx_ring->xdp_rxq; do { + xdp_verdict = XDP_PASS; + skb = NULL; ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; ena_rx_ctx.max_bufs = rx_ring->sgl_size; ena_rx_ctx.descs = 0; @@ -1082,12 +1624,22 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, ena_rx_ctx.l4_proto, ena_rx_ctx.hash); + if (ena_xdp_present_ring(rx_ring)) + xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); + /* allocate skb and fill it */ - skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs, - &next_to_clean); + if (xdp_verdict == XDP_PASS) + skb = ena_rx_skb(rx_ring, + rx_ring->ena_bufs, + ena_rx_ctx.descs, + &next_to_clean); - /* exit if we failed to retrieve a buffer */ if (unlikely(!skb)) { + if (xdp_verdict == XDP_TX) { + ena_free_rx_page(rx_ring, + &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]); + res_budget--; + } for (i = 0; i < ena_rx_ctx.descs; i++) { rx_ring->free_ids[next_to_clean] = rx_ring->ena_bufs[i].req_id; @@ -1095,6 +1647,8 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, ENA_RX_RING_IDX_NEXT(next_to_clean, rx_ring->ring_size); } + if (xdp_verdict == XDP_TX || xdp_verdict == XDP_DROP) + continue; break; } @@ -1188,9 +1742,14 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring, struct ena_ring *rx_ring) { struct ena_eth_io_intr_reg intr_reg; - u32 rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ? - rx_ring->smoothed_interval : - ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev); + u32 rx_interval = 0; + /* Rx ring can be NULL when for XDP tx queues which don't have an + * accompanying rx_ring pair. + */ + if (rx_ring) + rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ? + rx_ring->smoothed_interval : + ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev); /* Update intr register: rx intr delay, * tx intr delay and interrupt unmask @@ -1203,8 +1762,9 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring, /* It is a shared MSI-X. * Tx and Rx CQ have pointer to it. * So we use one of them to reach the intr reg + * The Tx ring is used because the rx_ring is NULL for XDP queues */ - ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); + ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg); } static void ena_update_ring_numa_node(struct ena_ring *tx_ring, @@ -1222,22 +1782,82 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring, if (numa_node != NUMA_NO_NODE) { ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); - ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node); + if (rx_ring) + ena_com_update_numa_node(rx_ring->ena_com_io_cq, + numa_node); } tx_ring->cpu = cpu; - rx_ring->cpu = cpu; + if (rx_ring) + rx_ring->cpu = cpu; return; out: put_cpu(); } +static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget) +{ + u32 total_done = 0; + u16 next_to_clean; + u32 tx_bytes = 0; + int tx_pkts = 0; + u16 req_id; + int rc; + + if (unlikely(!xdp_ring)) + return 0; + next_to_clean = xdp_ring->next_to_clean; + + while (tx_pkts < budget) { + struct ena_tx_buffer *tx_info; + struct xdp_frame *xdpf; + + rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq, + &req_id); + if (rc) + break; + + rc = validate_xdp_req_id(xdp_ring, req_id); + if (rc) + break; + + tx_info = &xdp_ring->tx_buffer_info[req_id]; + xdpf = tx_info->xdpf; + + tx_info->xdpf = NULL; + tx_info->last_jiffies = 0; + ena_unmap_tx_buff(xdp_ring, tx_info); + + netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, + "tx_poll: q %d skb %p completed\n", xdp_ring->qid, + xdpf); + + tx_bytes += xdpf->len; + tx_pkts++; + total_done += tx_info->tx_descs; + + __free_page(tx_info->xdp_rx_page); + xdp_ring->free_ids[next_to_clean] = req_id; + next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, + xdp_ring->ring_size); + } + + xdp_ring->next_to_clean = next_to_clean; + ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done); + ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq); + + netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, + "tx_poll: q %d done. total pkts: %d\n", + xdp_ring->qid, tx_pkts); + + return tx_pkts; +} + static int ena_io_poll(struct napi_struct *napi, int budget) { struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); struct ena_ring *tx_ring, *rx_ring; - int tx_work_done; int rx_work_done = 0; int tx_budget; @@ -1247,6 +1867,9 @@ static int ena_io_poll(struct napi_struct *napi, int budget) tx_ring = ena_napi->tx_ring; rx_ring = ena_napi->rx_ring; + tx_ring->first_interrupt = ena_napi->first_interrupt; + rx_ring->first_interrupt = ena_napi->first_interrupt; + tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER; if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) || @@ -1322,8 +1945,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data) { struct ena_napi *ena_napi = data; - ena_napi->tx_ring->first_interrupt = true; - ena_napi->rx_ring->first_interrupt = true; + ena_napi->first_interrupt = true; napi_schedule_irqoff(&ena_napi->napi); @@ -1398,10 +2020,12 @@ static void ena_setup_io_intr(struct ena_adapter *adapter) { struct net_device *netdev; int irq_idx, i, cpu; + int io_queue_count; netdev = adapter->netdev; + io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = 0; i < io_queue_count; i++) { irq_idx = ENA_IO_IRQ_IDX(i); cpu = i % num_online_cpus(); @@ -1529,45 +2153,64 @@ static void ena_disable_io_intr_sync(struct ena_adapter *adapter) synchronize_irq(adapter->irq_tbl[i].vector); } -static void ena_del_napi(struct ena_adapter *adapter) +static void ena_del_napi_in_range(struct ena_adapter *adapter, + int first_index, + int count) { int i; - for (i = 0; i < adapter->num_io_queues; i++) - netif_napi_del(&adapter->ena_napi[i].napi); + for (i = first_index; i < first_index + count; i++) { + /* Check if napi was initialized before */ + if (!ENA_IS_XDP_INDEX(adapter, i) || + adapter->ena_napi[i].xdp_ring) + netif_napi_del(&adapter->ena_napi[i].napi); + else + WARN_ON(ENA_IS_XDP_INDEX(adapter, i) && + adapter->ena_napi[i].xdp_ring); + } } -static void ena_init_napi(struct ena_adapter *adapter) +static void ena_init_napi_in_range(struct ena_adapter *adapter, + int first_index, int count) { - struct ena_napi *napi; + struct ena_napi *napi = {0}; int i; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = first_index; i < first_index + count; i++) { napi = &adapter->ena_napi[i]; netif_napi_add(adapter->netdev, &adapter->ena_napi[i].napi, - ena_io_poll, + ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll, ENA_NAPI_BUDGET); - napi->rx_ring = &adapter->rx_ring[i]; - napi->tx_ring = &adapter->tx_ring[i]; + + if (!ENA_IS_XDP_INDEX(adapter, i)) { + napi->rx_ring = &adapter->rx_ring[i]; + napi->tx_ring = &adapter->tx_ring[i]; + } else { + napi->xdp_ring = &adapter->tx_ring[i]; + } napi->qid = i; } } -static void ena_napi_disable_all(struct ena_adapter *adapter) +static void ena_napi_disable_in_range(struct ena_adapter *adapter, + int first_index, + int count) { int i; - for (i = 0; i < adapter->num_io_queues; i++) + for (i = first_index; i < first_index + count; i++) napi_disable(&adapter->ena_napi[i].napi); } -static void ena_napi_enable_all(struct ena_adapter *adapter) +static void ena_napi_enable_in_range(struct ena_adapter *adapter, + int first_index, + int count) { int i; - for (i = 0; i < adapter->num_io_queues; i++) + for (i = first_index; i < first_index + count; i++) napi_enable(&adapter->ena_napi[i].napi); } @@ -1582,7 +2225,7 @@ static int ena_rss_configure(struct ena_adapter *adapter) rc = ena_rss_init_default(adapter); if (rc && (rc != -EOPNOTSUPP)) { netif_err(adapter, ifup, adapter->netdev, - "Failed to init RSS rc: %d\n", rc); + "Failed to init RSS rc: %d\n", rc); return rc; } } @@ -1620,7 +2263,9 @@ static int ena_up_complete(struct ena_adapter *adapter) /* enable transmits */ netif_tx_start_all_queues(adapter->netdev); - ena_napi_enable_all(adapter); + ena_napi_enable_in_range(adapter, + 0, + adapter->xdp_num_queues + adapter->num_io_queues); return 0; } @@ -1653,7 +2298,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) if (rc) { netif_err(adapter, ifup, adapter->netdev, "Failed to create I/O TX queue num %d rc: %d\n", - qid, rc); + qid, rc); return rc; } @@ -1672,12 +2317,13 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) return rc; } -static int ena_create_all_io_tx_queues(struct ena_adapter *adapter) +static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, + int first_index, int count) { struct ena_com_dev *ena_dev = adapter->ena_dev; int rc, i; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = first_index; i < first_index + count; i++) { rc = ena_create_io_tx_queue(adapter, i); if (rc) goto create_err; @@ -1686,7 +2332,7 @@ static int ena_create_all_io_tx_queues(struct ena_adapter *adapter) return 0; create_err: - while (i--) + while (i-- > first_index) ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); return rc; @@ -1731,13 +2377,15 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) netif_err(adapter, ifup, adapter->netdev, "Failed to get RX queue handlers. RX queue num %d rc: %d\n", qid, rc); - ena_com_destroy_io_queue(ena_dev, ena_qid); - return rc; + goto err; } ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node); return rc; +err: + ena_com_destroy_io_queue(ena_dev, ena_qid); + return rc; } static int ena_create_all_io_rx_queues(struct ena_adapter *adapter) @@ -1764,7 +2412,8 @@ create_err: } static void set_io_rings_size(struct ena_adapter *adapter, - int new_tx_size, int new_rx_size) + int new_tx_size, + int new_rx_size) { int i; @@ -1798,14 +2447,24 @@ static int create_queues_with_size_backoff(struct ena_adapter *adapter) * ones due to past queue allocation failures. */ set_io_rings_size(adapter, adapter->requested_tx_ring_size, - adapter->requested_rx_ring_size); + adapter->requested_rx_ring_size); while (1) { - rc = ena_setup_all_tx_resources(adapter); + if (ena_xdp_present(adapter)) { + rc = ena_setup_and_create_all_xdp_queues(adapter); + + if (rc) + goto err_setup_tx; + } + rc = ena_setup_tx_resources_in_range(adapter, + 0, + adapter->num_io_queues); if (rc) goto err_setup_tx; - rc = ena_create_all_io_tx_queues(adapter); + rc = ena_create_io_tx_queues_in_range(adapter, + 0, + adapter->num_io_queues); if (rc) goto err_create_tx_queues; @@ -1829,7 +2488,7 @@ err_setup_tx: if (rc != -ENOMEM) { netif_err(adapter, ifup, adapter->netdev, "Queue creation failed with error code %d\n", - rc); + rc); return rc; } @@ -1852,7 +2511,7 @@ err_setup_tx: new_rx_ring_size = cur_rx_ring_size / 2; if (new_tx_ring_size < ENA_MIN_RING_SIZE || - new_rx_ring_size < ENA_MIN_RING_SIZE) { + new_rx_ring_size < ENA_MIN_RING_SIZE) { netif_err(adapter, ifup, adapter->netdev, "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n", ENA_MIN_RING_SIZE); @@ -1871,10 +2530,11 @@ err_setup_tx: static int ena_up(struct ena_adapter *adapter) { - int rc, i; + int io_queue_count, rc, i; netdev_dbg(adapter->netdev, "%s\n", __func__); + io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; ena_setup_io_intr(adapter); /* napi poll functions should be initialized before running @@ -1882,7 +2542,7 @@ static int ena_up(struct ena_adapter *adapter) * interrupt, causing the ISR to fire immediately while the poll * function wasn't set yet, causing a null dereference */ - ena_init_napi(adapter); + ena_init_napi_in_range(adapter, 0, io_queue_count); rc = ena_request_io_irq(adapter); if (rc) @@ -1913,7 +2573,7 @@ static int ena_up(struct ena_adapter *adapter) /* schedule napi in case we had pending packets * from the last time we disable napi */ - for (i = 0; i < adapter->num_io_queues; i++) + for (i = 0; i < io_queue_count; i++) napi_schedule(&adapter->ena_napi[i].napi); return rc; @@ -1926,13 +2586,15 @@ err_up: err_create_queues_with_backoff: ena_free_io_irq(adapter); err_req_irq: - ena_del_napi(adapter); + ena_del_napi_in_range(adapter, 0, io_queue_count); return rc; } static void ena_down(struct ena_adapter *adapter) { + int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; + netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__); clear_bit(ENA_FLAG_DEV_UP, &adapter->flags); @@ -1945,7 +2607,7 @@ static void ena_down(struct ena_adapter *adapter) netif_tx_disable(adapter->netdev); /* After this point the napi handler won't enable the tx queue */ - ena_napi_disable_all(adapter); + ena_napi_disable_in_range(adapter, 0, io_queue_count); /* After destroy the queue there won't be any new interrupts */ @@ -1963,7 +2625,7 @@ static void ena_down(struct ena_adapter *adapter) ena_disable_io_intr_sync(adapter); ena_free_io_irq(adapter); - ena_del_napi(adapter); + ena_del_napi_in_range(adapter, 0, io_queue_count); ena_free_all_tx_bufs(adapter); ena_free_all_rx_bufs(adapter); @@ -2053,23 +2715,47 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, ena_close(adapter->netdev); adapter->requested_tx_ring_size = new_tx_size; adapter->requested_rx_ring_size = new_rx_size; - ena_init_io_rings(adapter); + ena_init_io_rings(adapter, + 0, + adapter->xdp_num_queues + + adapter->num_io_queues); return dev_was_up ? ena_up(adapter) : 0; } int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count) { struct ena_com_dev *ena_dev = adapter->ena_dev; + int prev_channel_count; bool dev_was_up; dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); ena_close(adapter->netdev); + prev_channel_count = adapter->num_io_queues; adapter->num_io_queues = new_channel_count; + if (ena_xdp_present(adapter) && + ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) { + adapter->xdp_first_ring = new_channel_count; + adapter->xdp_num_queues = new_channel_count; + if (prev_channel_count > new_channel_count) + ena_xdp_exchange_program_rx_in_range(adapter, + NULL, + new_channel_count, + prev_channel_count); + else + ena_xdp_exchange_program_rx_in_range(adapter, + adapter->xdp_bpf_prog, + prev_channel_count, + new_channel_count); + } + /* We need to destroy the rss table so that the indirection * table will be reinitialized by ena_up() */ ena_com_rss_destroy(ena_dev); - ena_init_io_rings(adapter); + ena_init_io_rings(adapter, + 0, + adapter->xdp_num_queues + + adapter->num_io_queues); return dev_was_up ? ena_open(adapter->netdev) : 0; } @@ -2253,7 +2939,7 @@ error_report_dma_error: tx_info->skb = NULL; tx_info->num_of_bufs += i; - ena_unmap_tx_skb(tx_ring, tx_info); + ena_unmap_tx_buff(tx_ring, tx_info); return -EINVAL; } @@ -2268,7 +2954,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) struct netdev_queue *txq; void *push_hdr; u16 next_to_use, req_id, header_len; - int qid, rc, nb_hw_desc; + int qid, rc; netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); /* Determine which tx ring we will be placed on */ @@ -2303,50 +2989,17 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) /* set flags and meta data */ ena_tx_csum(&ena_tx_ctx, skb); - if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx))) { - netif_dbg(adapter, tx_queued, dev, - "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", - qid); - ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); - } - - /* prepare the packet's descriptors to dma engine */ - rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, - &nb_hw_desc); - - /* ena_com_prepare_tx() can't fail due to overflow of tx queue, - * since the number of free descriptors in the queue is checked - * after sending the previous packet. In case there isn't enough - * space in the queue for the next packet, it is stopped - * until there is again enough available space in the queue. - * All other failure reasons of ena_com_prepare_tx() are fatal - * and therefore require a device reset. - */ - if (unlikely(rc)) { - netif_err(adapter, tx_queued, dev, - "failed to prepare tx bufs\n"); - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->tx_stats.prepare_ctx_err++; - u64_stats_update_end(&tx_ring->syncp); - adapter->reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE; - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + rc = ena_xmit_common(dev, + tx_ring, + tx_info, + &ena_tx_ctx, + next_to_use, + skb->len); + if (rc) goto error_unmap_dma; - } netdev_tx_sent_queue(txq, skb->len); - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->tx_stats.cnt++; - tx_ring->tx_stats.bytes += skb->len; - u64_stats_update_end(&tx_ring->syncp); - - tx_info->tx_descs = nb_hw_desc; - tx_info->last_jiffies = jiffies; - tx_info->print_once = 0; - - tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, - tx_ring->ring_size); - /* stop the queue when no more space available, the packet can have up * to sgl_size + 2. one for the meta descriptor and one for header * (if the header is larger than tx_max_header_size). @@ -2393,7 +3046,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; error_unmap_dma: - ena_unmap_tx_skb(tx_ring, tx_info); + ena_unmap_tx_buff(tx_ring, tx_info); tx_info->skb = NULL; error_drop_packet: @@ -2572,6 +3225,7 @@ static const struct net_device_ops ena_netdev_ops = { .ndo_change_mtu = ena_change_mtu, .ndo_set_mac_address = NULL, .ndo_validate_addr = eth_validate_addr, + .ndo_bpf = ena_xdp, }; static int ena_device_validate_params(struct ena_adapter *adapter, @@ -2951,7 +3605,9 @@ static void check_for_missing_completions(struct ena_adapter *adapter) struct ena_ring *tx_ring; struct ena_ring *rx_ring; int i, budget, rc; + int io_queue_count; + io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues; /* Make sure the driver doesn't turn the device in other process */ smp_rmb(); @@ -2966,7 +3622,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter) budget = ENA_MONITORED_TX_QUEUES; - for (i = adapter->last_monitored_tx_qid; i < adapter->num_io_queues; i++) { + for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) { tx_ring = &adapter->tx_ring[i]; rx_ring = &adapter->rx_ring[i]; @@ -2974,7 +3630,8 @@ static void check_for_missing_completions(struct ena_adapter *adapter) if (unlikely(rc)) return; - rc = check_for_rx_interrupt_queue(adapter, rx_ring); + rc = !ENA_IS_XDP_INDEX(adapter, i) ? + check_for_rx_interrupt_queue(adapter, rx_ring) : 0; if (unlikely(rc)) return; @@ -2983,7 +3640,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter) break; } - adapter->last_monitored_tx_qid = i % adapter->num_io_queues; + adapter->last_monitored_tx_qid = i % io_queue_count; } /* trigger napi schedule after 2 consecutive detections */ @@ -3560,6 +4217,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->num_io_queues = max_num_io_queues; adapter->max_num_io_queues = max_num_io_queues; + adapter->xdp_first_ring = 0; + adapter->xdp_num_queues = 0; + adapter->last_monitored_tx_qid = 0; adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK; @@ -3573,7 +4233,10 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) "Failed to query interrupt moderation feature\n"); goto err_netdev_destroy; } - ena_init_io_rings(adapter); + ena_init_io_rings(adapter, + 0, + adapter->xdp_num_queues + + adapter->num_io_queues); netdev->netdev_ops = &ena_netdev_ops; netdev->watchdog_timeo = TX_TIMEOUT; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index bffd778f2ce3..094324fd0edc 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -36,6 +36,7 @@ #include <linux/bitops.h> #include <linux/dim.h> #include <linux/etherdevice.h> +#include <linux/if_vlan.h> #include <linux/inetdevice.h> #include <linux/interrupt.h> #include <linux/netdevice.h> @@ -142,6 +143,18 @@ #define ENA_MMIO_DISABLE_REG_READ BIT(0) +/* The max MTU size is configured to be the ethernet frame size without + * the overhead of the ethernet header, which can have a VLAN header, and + * a frame check sequence (FCS). + * The buffer size we share with the device is defined to be ENA_PAGE_SIZE + */ + +#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \ + VLAN_HLEN - XDP_PACKET_HEADROOM) + +#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \ + ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues)) + struct ena_irq { irq_handler_t handler; void *data; @@ -155,6 +168,8 @@ struct ena_napi { struct napi_struct napi ____cacheline_aligned; struct ena_ring *tx_ring; struct ena_ring *rx_ring; + struct ena_ring *xdp_ring; + bool first_interrupt; u32 qid; struct dim dim; }; @@ -180,6 +195,17 @@ struct ena_tx_buffer { /* num of buffers used by this skb */ u32 num_of_bufs; + /* XDP buffer structure which is used for sending packets in + * the xdp queues + */ + struct xdp_frame *xdpf; + /* The rx page for the rx buffer that was received in rx and + * re transmitted on xdp tx queues as a result of XDP_TX action. + * We need to free the page once we finished cleaning the buffer in + * clean_xdp_irq() + */ + struct page *xdp_rx_page; + /* Indicate if bufs[0] map the linear data of the skb. */ u8 map_linear_data; @@ -258,10 +284,13 @@ struct ena_ring { struct ena_adapter *adapter; struct ena_com_io_cq *ena_com_io_cq; struct ena_com_io_sq *ena_com_io_sq; + struct bpf_prog *xdp_bpf_prog; + struct xdp_rxq_info xdp_rxq; u16 next_to_use; u16 next_to_clean; u16 rx_copybreak; + u16 rx_headroom; u16 qid; u16 mtu; u16 sgl_size; @@ -379,6 +408,10 @@ struct ena_adapter { u32 last_monitored_tx_qid; enum ena_regs_reset_reason_types reset_reason; + + struct bpf_prog *xdp_bpf_prog; + u32 xdp_first_ring; + u32 xdp_num_queues; }; void ena_set_ethtool_ops(struct net_device *netdev); @@ -390,8 +423,48 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); int ena_update_queue_sizes(struct ena_adapter *adapter, u32 new_tx_size, u32 new_rx_size); + int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count); int ena_get_sset_count(struct net_device *netdev, int sset); +enum ena_xdp_errors_t { + ENA_XDP_ALLOWED = 0, + ENA_XDP_CURRENT_MTU_TOO_LARGE, + ENA_XDP_NO_ENOUGH_QUEUES, +}; + +static inline bool ena_xdp_queues_present(struct ena_adapter *adapter) +{ + return adapter->xdp_first_ring != 0; +} + +static inline bool ena_xdp_present(struct ena_adapter *adapter) +{ + return !!adapter->xdp_bpf_prog; +} + +static inline bool ena_xdp_present_ring(struct ena_ring *ring) +{ + return !!ring->xdp_bpf_prog; +} + +static inline int ena_xdp_legal_queue_count(struct ena_adapter *adapter, + u32 queues) +{ + return 2 * queues <= adapter->max_num_io_queues; +} + +static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter) +{ + enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED; + + if (adapter->netdev->mtu > ENA_XDP_MAX_MTU) + rc = ENA_XDP_CURRENT_MTU_TOO_LARGE; + else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues)) + rc = ENA_XDP_NO_ENOUGH_QUEUES; + + return rc; +} + #endif /* !(ENA_H) */ diff --git a/drivers/net/ethernet/amd/7990.c b/drivers/net/ethernet/amd/7990.c index ab30761003da..cf3562e82ca9 100644 --- a/drivers/net/ethernet/amd/7990.c +++ b/drivers/net/ethernet/amd/7990.c @@ -527,7 +527,7 @@ int lance_close(struct net_device *dev) } EXPORT_SYMBOL_GPL(lance_close); -void lance_tx_timeout(struct net_device *dev) +void lance_tx_timeout(struct net_device *dev, unsigned int txqueue) { printk("lance_tx_timeout\n"); lance_reset(dev); diff --git a/drivers/net/ethernet/amd/7990.h b/drivers/net/ethernet/amd/7990.h index 741cdc392c6b..8266b3c1fefc 100644 --- a/drivers/net/ethernet/amd/7990.h +++ b/drivers/net/ethernet/amd/7990.h @@ -243,7 +243,7 @@ int lance_open(struct net_device *dev); int lance_close(struct net_device *dev); int lance_start_xmit(struct sk_buff *skb, struct net_device *dev); void lance_set_multicast(struct net_device *dev); -void lance_tx_timeout(struct net_device *dev); +void lance_tx_timeout(struct net_device *dev, unsigned int txqueue); #ifdef CONFIG_NET_POLL_CONTROLLER void lance_poll(struct net_device *dev); #endif diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index 212fe72a190b..2f808dbc8b0e 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -118,10 +118,6 @@ struct lance_private { int auto_select; /* cable-selection by carrier */ unsigned short busmaster_regval; -#ifdef CONFIG_SUNLANCE - struct Linux_SBus_DMA *ledma; /* if set this points to ledma and arch=4m */ - int burst_sizes; /* ledma SBus burst sizes */ -#endif struct timer_list multicast_timer; struct net_device *dev; }; @@ -522,7 +518,7 @@ static inline int lance_reset(struct net_device *dev) return status; } -static void lance_tx_timeout(struct net_device *dev) +static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = netdev_priv(dev); volatile struct lance_regs *ll = lp->ll; @@ -551,11 +547,10 @@ static netdev_tx_t lance_start_xmit(struct sk_buff *skb, if (!lance_tx_buffs_avail(lp)) goto out_free; -#ifdef DEBUG /* dump the packet */ - print_hex_dump(KERN_DEBUG, "skb->data: ", DUMP_PREFIX_NONE, - 16, 1, skb->data, 64, true); -#endif + print_hex_dump_debug("skb->data: ", DUMP_PREFIX_NONE, 16, 1, skb->data, + 64, true); + entry = lp->tx_new & lp->tx_ring_mod_mask; ib->btx_ring[entry].length = (-skblen) | 0xf000; ib->btx_ring[entry].misc = 0; diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c index 0842da492a64..1c53408f5d47 100644 --- a/drivers/net/ethernet/amd/am79c961a.c +++ b/drivers/net/ethernet/amd/am79c961a.c @@ -422,7 +422,7 @@ static void am79c961_setmulticastlist (struct net_device *dev) spin_unlock_irqrestore(&priv->chip_lock, flags); } -static void am79c961_timeout(struct net_device *dev) +static void am79c961_timeout(struct net_device *dev, unsigned int txqueue) { printk(KERN_WARNING "%s: transmit timed out, network cable problem?\n", dev->name); diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 573e88fc8ede..0f3b743425e8 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1569,7 +1569,7 @@ static int amd8111e_enable_link_change(struct amd8111e_priv *lp) * failed or the interface is locked up. This function will reinitialize * the hardware. */ -static void amd8111e_tx_timeout(struct net_device *dev) +static void amd8111e_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct amd8111e_priv *lp = netdev_priv(dev); int err; diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c index 4b6a5cb85dd2..5e0f645f5bde 100644 --- a/drivers/net/ethernet/amd/ariadne.c +++ b/drivers/net/ethernet/amd/ariadne.c @@ -530,7 +530,7 @@ static inline void ariadne_reset(struct net_device *dev) netif_start_queue(dev); } -static void ariadne_tx_timeout(struct net_device *dev) +static void ariadne_tx_timeout(struct net_device *dev, unsigned int txqueue) { volatile struct Am79C960 *lance = (struct Am79C960 *)dev->base_addr; diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c index d3d44e07afbc..4e36122609a3 100644 --- a/drivers/net/ethernet/amd/atarilance.c +++ b/drivers/net/ethernet/amd/atarilance.c @@ -346,7 +346,7 @@ static int lance_rx( struct net_device *dev ); static int lance_close( struct net_device *dev ); static void set_multicast_list( struct net_device *dev ); static int lance_set_mac_address( struct net_device *dev, void *addr ); -static void lance_tx_timeout (struct net_device *dev); +static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue); /************************* End of Prototypes **************************/ @@ -727,7 +727,7 @@ static void lance_init_ring( struct net_device *dev ) /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -static void lance_tx_timeout (struct net_device *dev) +static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = netdev_priv(dev); struct lance_ioreg *IO = lp->iobase; diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 1793950f0582..d832c9f4d306 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1014,7 +1014,7 @@ static netdev_tx_t au1000_tx(struct sk_buff *skb, struct net_device *dev) * The Tx ring has been full longer than the watchdog timeout * value. The transmitter must be hung? */ -static void au1000_tx_timeout(struct net_device *dev) +static void au1000_tx_timeout(struct net_device *dev, unsigned int txqueue) { netdev_err(dev, "au1000_tx_timeout: dev=%p\n", dev); au1000_reset_mac(dev); diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index dac4a2fcad6a..6592a2db9efb 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -884,7 +884,7 @@ static inline int lance_reset(struct net_device *dev) return status; } -static void lance_tx_timeout(struct net_device *dev) +static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = netdev_priv(dev); volatile struct lance_regs *ll = lp->ll; diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c index f90b454b1642..aff44241988c 100644 --- a/drivers/net/ethernet/amd/lance.c +++ b/drivers/net/ethernet/amd/lance.c @@ -306,7 +306,7 @@ static irqreturn_t lance_interrupt(int irq, void *dev_id); static int lance_close(struct net_device *dev); static struct net_device_stats *lance_get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void lance_tx_timeout (struct net_device *dev); +static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue); @@ -913,7 +913,7 @@ lance_restart(struct net_device *dev, unsigned int csr0_bits, int must_reinit) } -static void lance_tx_timeout (struct net_device *dev) +static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = (struct lance_private *) dev->ml_priv; int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c index c6c2a54c1121..c38edf6f03a3 100644 --- a/drivers/net/ethernet/amd/ni65.c +++ b/drivers/net/ethernet/amd/ni65.c @@ -254,7 +254,7 @@ static int ni65_lance_reinit(struct net_device *dev); static void ni65_init_lance(struct priv *p,unsigned char*,int,int); static netdev_tx_t ni65_send_packet(struct sk_buff *skb, struct net_device *dev); -static void ni65_timeout(struct net_device *dev); +static void ni65_timeout(struct net_device *dev, unsigned int txqueue); static int ni65_close(struct net_device *dev); static int ni65_alloc_buffer(struct net_device *dev); static void ni65_free_buffer(struct priv *p); @@ -1133,7 +1133,7 @@ static void ni65_recv_intr(struct net_device *dev,int csr0) * kick xmitter .. */ -static void ni65_timeout(struct net_device *dev) +static void ni65_timeout(struct net_device *dev, unsigned int txqueue) { int i; struct priv *p = dev->ml_priv; diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c index 9c152d85840d..023aecf6ab30 100644 --- a/drivers/net/ethernet/amd/nmclan_cs.c +++ b/drivers/net/ethernet/amd/nmclan_cs.c @@ -407,7 +407,7 @@ static int mace_open(struct net_device *dev); static int mace_close(struct net_device *dev); static netdev_tx_t mace_start_xmit(struct sk_buff *skb, struct net_device *dev); -static void mace_tx_timeout(struct net_device *dev); +static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue); static irqreturn_t mace_interrupt(int irq, void *dev_id); static struct net_device_stats *mace_get_stats(struct net_device *dev); static int mace_rx(struct net_device *dev, unsigned char RxCnt); @@ -837,7 +837,7 @@ mace_start_xmit failed, put skb back into a list." ---------------------------------------------------------------------------- */ -static void mace_tx_timeout(struct net_device *dev) +static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue) { mace_private *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index f5ad12c10934..dc7d88227e76 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -314,7 +314,7 @@ static int pcnet32_open(struct net_device *); static int pcnet32_init_ring(struct net_device *); static netdev_tx_t pcnet32_start_xmit(struct sk_buff *, struct net_device *); -static void pcnet32_tx_timeout(struct net_device *dev); +static void pcnet32_tx_timeout(struct net_device *dev, unsigned int txqueue); static irqreturn_t pcnet32_interrupt(int, void *); static int pcnet32_close(struct net_device *); static struct net_device_stats *pcnet32_get_stats(struct net_device *); @@ -2455,7 +2455,7 @@ static void pcnet32_restart(struct net_device *dev, unsigned int csr0_bits) lp->a->write_csr(ioaddr, CSR0, csr0_bits); } -static void pcnet32_tx_timeout(struct net_device *dev) +static void pcnet32_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct pcnet32_private *lp = netdev_priv(dev); unsigned long ioaddr = dev->base_addr, flags; diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index ebcbf8ca4829..b00e00881253 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1097,7 +1097,7 @@ static void lance_piozero(void __iomem *dest, int len) sbus_writeb(0, piobuf); } -static void lance_tx_timeout(struct net_device *dev) +static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = netdev_priv(dev); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 98f8f2033154..b71f9b04a51e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2152,7 +2152,7 @@ static int xgbe_change_mtu(struct net_device *netdev, int mtu) return 0; } -static void xgbe_tx_timeout(struct net_device *netdev) +static void xgbe_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct xgbe_prv_data *pdata = netdev_priv(netdev); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 128cd648ba99..46c3c1ca38d6 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -1227,7 +1227,7 @@ static bool xgbe_phy_sfp_verify_eeprom(u8 cc_in, u8 *buf, unsigned int len) for (cc = 0; len; buf++, len--) cc += *buf; - return (cc == cc_in) ? true : false; + return cc == cc_in; } static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata) diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c index 02b4f3af02b5..c48f60996761 100644 --- a/drivers/net/ethernet/apm/xgene-v2/main.c +++ b/drivers/net/ethernet/apm/xgene-v2/main.c @@ -575,7 +575,7 @@ static void xge_free_pending_skb(struct net_device *ndev) } } -static void xge_timeout(struct net_device *ndev) +static void xge_timeout(struct net_device *ndev, unsigned int txqueue) { struct xge_pdata *pdata = netdev_priv(ndev); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index d8612131c55e..e284b6753725 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -859,7 +859,7 @@ static int xgene_enet_napi(struct napi_struct *napi, const int budget) return processed; } -static void xgene_enet_timeout(struct net_device *ndev) +static void xgene_enet_timeout(struct net_device *ndev, unsigned int txqueue) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct netdev_queue *txq; diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c index 8d03578d5e8c..95d3061c61be 100644 --- a/drivers/net/ethernet/apple/macmace.c +++ b/drivers/net/ethernet/apple/macmace.c @@ -91,7 +91,7 @@ static int mace_set_address(struct net_device *dev, void *addr); static void mace_reset(struct net_device *dev); static irqreturn_t mace_interrupt(int irq, void *dev_id); static irqreturn_t mace_dma_intr(int irq, void *dev_id); -static void mace_tx_timeout(struct net_device *dev); +static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue); static void __mace_set_address(struct net_device *dev, void *addr); /* @@ -600,7 +600,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void mace_tx_timeout(struct net_device *dev) +static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mace_data *mp = netdev_priv(dev); volatile struct mace *mb = mp->mace; diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 61a334d1b5e6..88e4e1500a8e 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1409,7 +1409,7 @@ static void ag71xx_oom_timer_handler(struct timer_list *t) napi_schedule(&ag->napi); } -static void ag71xx_tx_timeout(struct net_device *ndev) +static void ag71xx_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ag71xx *ag = netdev_priv(ndev); diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index d4bbcdfd691a..1dcbc486eca9 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1553,7 +1553,7 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb, return alx_start_xmit_ring(skb, alx_tx_queue_mapping(alx, skb)); } -static void alx_tx_timeout(struct net_device *dev) +static void alx_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct alx_priv *alx = netdev_priv(dev); diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 2b239ecea05f..4c0b1f8551dd 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -350,7 +350,7 @@ static void atl1c_del_timer(struct atl1c_adapter *adapter) * atl1c_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void atl1c_tx_timeout(struct net_device *netdev) +static void atl1c_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct atl1c_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 4f7b65825c15..e0d89942d537 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -251,7 +251,7 @@ static void atl1e_cancel_work(struct atl1e_adapter *adapter) * atl1e_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void atl1e_tx_timeout(struct net_device *netdev) +static void atl1e_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct atl1e_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 3aba38322717..b81a4e0c5b57 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1001,7 +1001,7 @@ static int atl2_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) * atl2_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void atl2_tx_timeout(struct net_device *netdev) +static void atl2_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct atl2_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/atheros/atlx/atlx.c b/drivers/net/ethernet/atheros/atlx/atlx.c index 505a22c703f7..0941d07d0833 100644 --- a/drivers/net/ethernet/atheros/atlx/atlx.c +++ b/drivers/net/ethernet/atheros/atlx/atlx.c @@ -183,7 +183,7 @@ static void atlx_clear_phy_int(struct atlx_adapter *adapter) * atlx_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void atlx_tx_timeout(struct net_device *netdev) +static void atlx_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct atlx_adapter *adapter = netdev_priv(netdev); /* Do the reset outside of interrupt context */ diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index ec25fd81985d..a780b7215021 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -948,7 +948,7 @@ irq_ack: return IRQ_RETVAL(handled); } -static void b44_tx_timeout(struct net_device *dev) +static void b44_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct b44 *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index d6b1a153f9df..f07ac0e0af59 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1354,7 +1354,7 @@ out: return ret; } -static void bcm_sysport_tx_timeout(struct net_device *dev) +static void bcm_sysport_tx_timeout(struct net_device *dev, unsigned int txqueue) { netdev_warn(dev, "transmit timeout!\n"); @@ -2428,6 +2428,14 @@ static int bcm_sysport_probe(struct platform_device *pdev) if (!of_id || !of_id->data) return -EINVAL; + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (ret) + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(&pdev->dev, "unable to set DMA mask: %d\n", ret); + return ret; + } + /* Fairly quickly we need to know the type of adapter we have */ params = of_id->data; diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index fbc196b480b6..dbb7874607ca 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -6575,7 +6575,7 @@ bnx2_dump_state(struct bnx2 *bp) } static void -bnx2_tx_timeout(struct net_device *dev) +bnx2_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct bnx2 *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 5e037a305b83..ee9e9290f112 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -4970,7 +4970,7 @@ int bnx2x_set_features(struct net_device *dev, netdev_features_t features) return 0; } -void bnx2x_tx_timeout(struct net_device *dev) +void bnx2x_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct bnx2x *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 3f63ffd7561b..6f1352d51cb2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -617,7 +617,7 @@ int bnx2x_set_features(struct net_device *dev, netdev_features_t features); * * @dev: net device */ -void bnx2x_tx_timeout(struct net_device *dev); +void bnx2x_tx_timeout(struct net_device *dev, unsigned int txqueue); /** bnx2x_get_c2s_mapping - read inner-to-outer vlan configuration * c2s_map should have BNX2X_MAX_PRIORITY entries. diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index cff64e43bdd8..741d865e4afc 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -15410,6 +15410,7 @@ int bnx2x_configure_ptp_filters(struct bnx2x *bp) REG_WR(bp, rule, BNX2X_PTP_TX_ON_RULE_MASK); break; case HWTSTAMP_TX_ONESTEP_SYNC: + case HWTSTAMP_TX_ONESTEP_P2P: BNX2X_ERR("One-step timestamping is not supported\n"); return -ERANGE; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e6f18f6070ef..198c69dceeef 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -944,6 +944,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, dma_addr -= bp->rx_dma_offset; dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, DMA_ATTR_WEAK_ORDERING); + page_pool_release_page(rxr->page_pool, page); if (unlikely(!payload)) payload = eth_get_headlen(bp->dev, data_ptr, len); @@ -9975,7 +9976,7 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent) } } -static void bnxt_tx_timeout(struct net_device *dev) +static void bnxt_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct bnxt *bp = netdev_priv(dev); @@ -10822,6 +10823,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) smp_mb__before_atomic(); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); bnxt_ulp_start(bp, rc); + bnxt_dl_health_recovery_done(bp); bnxt_dl_health_status_update(bp, true); rtnl_unlock(); break; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 3eedd4477218..0c3d224637b9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -89,7 +89,7 @@ static int bnxt_fw_reset_recover(struct devlink_health_reporter *reporter, return -EOPNOTSUPP; bnxt_fw_reset(bp); - return 0; + return -EINPROGRESS; } static const @@ -116,7 +116,7 @@ static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter, else if (event == BNXT_FW_EXCEPTION_SP_EVENT) bnxt_fw_exception(bp); - return 0; + return -EINPROGRESS; } static const @@ -262,6 +262,16 @@ void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy) health->fatal = false; } +void bnxt_dl_health_recovery_done(struct bnxt *bp) +{ + struct bnxt_fw_health *hlth = bp->fw_health; + + if (hlth->fatal) + devlink_health_reporter_recovery_done(hlth->fw_fatal_reporter); + else + devlink_health_reporter_recovery_done(hlth->fw_reset_reporter); +} + static const struct devlink_ops bnxt_dl_ops = { #ifdef CONFIG_BNXT_SRIOV .eswitch_mode_set = bnxt_dl_eswitch_mode_set, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h index 6db6c3dac472..08aaa4441c78 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h @@ -58,6 +58,7 @@ struct bnxt_dl_nvm_param { void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event); void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy); +void bnxt_dl_health_recovery_done(struct bnxt *bp); void bnxt_dl_fw_reporters_create(struct bnxt *bp); void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all); int bnxt_dl_register(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 120fa05a39ff..3ee7917e3fc0 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2,7 +2,7 @@ /* * Broadcom GENET (Gigabit Ethernet) controller driver * - * Copyright (c) 2014-2017 Broadcom + * Copyright (c) 2014-2019 Broadcom */ #define pr_fmt(fmt) "bcmgenet: " fmt @@ -508,8 +508,8 @@ static int bcmgenet_set_link_ksettings(struct net_device *dev, return phy_ethtool_ksettings_set(dev->phydev, cmd); } -static int bcmgenet_set_rx_csum(struct net_device *dev, - netdev_features_t wanted) +static void bcmgenet_set_rx_csum(struct net_device *dev, + netdev_features_t wanted) { struct bcmgenet_priv *priv = netdev_priv(dev); u32 rbuf_chk_ctrl; @@ -521,7 +521,7 @@ static int bcmgenet_set_rx_csum(struct net_device *dev, /* enable rx checksumming */ if (rx_csum_en) - rbuf_chk_ctrl |= RBUF_RXCHK_EN; + rbuf_chk_ctrl |= RBUF_RXCHK_EN | RBUF_L3_PARSE_DIS; else rbuf_chk_ctrl &= ~RBUF_RXCHK_EN; priv->desc_rxchk_en = rx_csum_en; @@ -535,12 +535,10 @@ static int bcmgenet_set_rx_csum(struct net_device *dev, rbuf_chk_ctrl &= ~RBUF_SKIP_FCS; bcmgenet_rbuf_writel(priv, rbuf_chk_ctrl, RBUF_CHK_CTRL); - - return 0; } -static int bcmgenet_set_tx_csum(struct net_device *dev, - netdev_features_t wanted) +static void bcmgenet_set_tx_csum(struct net_device *dev, + netdev_features_t wanted) { struct bcmgenet_priv *priv = netdev_priv(dev); bool desc_64b_en; @@ -549,7 +547,7 @@ static int bcmgenet_set_tx_csum(struct net_device *dev, tbuf_ctrl = bcmgenet_tbuf_ctrl_get(priv); rbuf_ctrl = bcmgenet_rbuf_readl(priv, RBUF_CTRL); - desc_64b_en = !!(wanted & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)); + desc_64b_en = !!(wanted & NETIF_F_HW_CSUM); /* enable 64 bytes descriptor in both directions (RBUF and TBUF) */ if (desc_64b_en) { @@ -563,21 +561,27 @@ static int bcmgenet_set_tx_csum(struct net_device *dev, bcmgenet_tbuf_ctrl_set(priv, tbuf_ctrl); bcmgenet_rbuf_writel(priv, rbuf_ctrl, RBUF_CTRL); - - return 0; } static int bcmgenet_set_features(struct net_device *dev, netdev_features_t features) { - netdev_features_t changed = features ^ dev->features; - netdev_features_t wanted = dev->wanted_features; - int ret = 0; + struct bcmgenet_priv *priv = netdev_priv(dev); + u32 reg; + int ret; - if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) - ret = bcmgenet_set_tx_csum(dev, wanted); - if (changed & (NETIF_F_RXCSUM)) - ret = bcmgenet_set_rx_csum(dev, wanted); + ret = clk_prepare_enable(priv->clk); + if (ret) + return ret; + + /* Make sure we reflect the value of CRC_CMD_FWD */ + reg = bcmgenet_umac_readl(priv, UMAC_CMD); + priv->crc_fwd_en = !!(reg & CMD_CRC_FWD); + + bcmgenet_set_tx_csum(dev, features); + bcmgenet_set_rx_csum(dev, features); + + clk_disable_unprepare(priv->clk); return ret; } @@ -857,6 +861,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = { STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed), STAT_GENET_SOFT_MIB("tx_dma_failed", mib.tx_dma_failed), + STAT_GENET_SOFT_MIB("tx_realloc_tsb", mib.tx_realloc_tsb), + STAT_GENET_SOFT_MIB("tx_realloc_tsb_failed", + mib.tx_realloc_tsb_failed), /* Per TX queues */ STAT_GENET_Q(0), STAT_GENET_Q(1), @@ -1483,6 +1490,7 @@ static void bcmgenet_tx_reclaim_all(struct net_device *dev) static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, struct sk_buff *skb) { + struct bcmgenet_priv *priv = netdev_priv(dev); struct status_64 *status = NULL; struct sk_buff *new_skb; u16 offset; @@ -1495,12 +1503,15 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, * enough headroom for us to insert 64B status block. */ new_skb = skb_realloc_headroom(skb, sizeof(*status)); - dev_kfree_skb(skb); if (!new_skb) { + dev_kfree_skb_any(skb); + priv->mib.tx_realloc_tsb_failed++; dev->stats.tx_dropped++; return NULL; } + dev_consume_skb_any(skb); skb = new_skb; + priv->mib.tx_realloc_tsb++; } skb_push(skb, sizeof(*status)); @@ -1516,24 +1527,19 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, ip_proto = ipv6_hdr(skb)->nexthdr; break; default: - return skb; + /* don't use UDP flag */ + ip_proto = 0; + break; } offset = skb_checksum_start_offset(skb) - sizeof(*status); tx_csum_info = (offset << STATUS_TX_CSUM_START_SHIFT) | - (offset + skb->csum_offset); + (offset + skb->csum_offset) | + STATUS_TX_CSUM_LV; - /* Set the length valid bit for TCP and UDP and just set - * the special UDP flag for IPv4, else just set to 0. - */ - if (ip_proto == IPPROTO_TCP || ip_proto == IPPROTO_UDP) { - tx_csum_info |= STATUS_TX_CSUM_LV; - if (ip_proto == IPPROTO_UDP && - ip_ver == htons(ETH_P_IP)) - tx_csum_info |= STATUS_TX_CSUM_PROTO_UDP; - } else { - tx_csum_info = 0; - } + /* Set the special UDP flag for UDP */ + if (ip_proto == IPPROTO_UDP) + tx_csum_info |= STATUS_TX_CSUM_PROTO_UDP; status->tx_csum_info = tx_csum_info; } @@ -1744,7 +1750,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, unsigned int bytes_processed = 0; unsigned int p_index, mask; unsigned int discards; - unsigned int chksum_ok = 0; /* Clear status before servicing to reduce spurious interrupts */ if (ring->index == DESC_INDEX) { @@ -1795,9 +1800,15 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, dmadesc_get_length_status(priv, cb->bd_addr); } else { struct status_64 *status; + __be16 rx_csum; status = (struct status_64 *)skb->data; dma_length_status = status->length_status; + rx_csum = (__force __be16)(status->rx_csum & 0xffff); + if (priv->desc_rxchk_en) { + skb->csum = (__force __wsum)ntohs(rx_csum); + skb->ip_summed = CHECKSUM_COMPLETE; + } } /* DMA flags and length are still valid no matter how @@ -1840,18 +1851,12 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, goto next; } /* error packet */ - chksum_ok = (dma_flag & priv->dma_rx_chk_bit) && - priv->desc_rxchk_en; - skb_put(skb, len); if (priv->desc_64b_en) { skb_pull(skb, 64); len -= 64; } - if (likely(chksum_ok)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - /* remove hardware 2bytes added for IP alignment */ skb_pull(skb, 2); len -= 2; @@ -2886,9 +2891,10 @@ static int bcmgenet_open(struct net_device *dev) init_umac(priv); - /* Make sure we reflect the value of CRC_CMD_FWD */ - reg = bcmgenet_umac_readl(priv, UMAC_CMD); - priv->crc_fwd_en = !!(reg & CMD_CRC_FWD); + /* Apply features again in case we changed them while interface was + * down + */ + bcmgenet_set_features(dev, dev->features); bcmgenet_set_hw_addr(priv, dev->dev_addr); @@ -3055,7 +3061,7 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring) ring->cb_ptr, ring->end_ptr); } -static void bcmgenet_timeout(struct net_device *dev) +static void bcmgenet_timeout(struct net_device *dev, unsigned int txqueue) { struct bcmgenet_priv *priv = netdev_priv(dev); u32 int0_enable = 0; @@ -3327,19 +3333,15 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) if (GENET_IS_V5(priv) || GENET_IS_V4(priv)) { bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus; genet_dma_ring_regs = genet_dma_ring_regs_v4; - priv->dma_rx_chk_bit = DMA_RX_CHK_V3PLUS; } else if (GENET_IS_V3(priv)) { bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus; genet_dma_ring_regs = genet_dma_ring_regs_v123; - priv->dma_rx_chk_bit = DMA_RX_CHK_V3PLUS; } else if (GENET_IS_V2(priv)) { bcmgenet_dma_regs = bcmgenet_dma_regs_v2; genet_dma_ring_regs = genet_dma_ring_regs_v123; - priv->dma_rx_chk_bit = DMA_RX_CHK_V12; } else if (GENET_IS_V1(priv)) { bcmgenet_dma_regs = bcmgenet_dma_regs_v1; genet_dma_ring_regs = genet_dma_ring_regs_v123; - priv->dma_rx_chk_bit = DMA_RX_CHK_V12; } /* enum genet_version starts at 1 */ @@ -3535,9 +3537,11 @@ static int bcmgenet_probe(struct platform_device *pdev) priv->msg_enable = netif_msg_init(-1, GENET_MSG_DEFAULT); - /* Set hardware features */ - dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; + /* Set default features */ + dev->features |= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | + NETIF_F_RXCSUM; + dev->hw_features |= dev->features; + dev->vlan_features |= dev->features; /* Request the WOL interrupt and advertise suspend if available */ priv->wol_irq_disabled = true; @@ -3574,6 +3578,14 @@ static int bcmgenet_probe(struct platform_device *pdev) bcmgenet_set_hw_params(priv); + err = -EIO; + if (priv->hw_params->flags & GENET_HAS_40BITS) + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (err) + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) + goto err; + /* Mii wait queue */ init_waitqueue_head(&priv->wq); /* Always use RX_BUF_LENGTH (2KB) buffer for all chips */ @@ -3689,6 +3701,9 @@ static int bcmgenet_resume(struct device *d) genphy_config_aneg(dev->phydev); bcmgenet_mii_config(priv->dev, false); + /* Restore enabled features */ + bcmgenet_set_features(dev, dev->features); + bcmgenet_set_hw_addr(priv, dev->dev_addr); if (priv->internal_phy) { diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index a5659197598f..61a6fe9f4cec 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -144,6 +144,8 @@ struct bcmgenet_mib_counters { u32 alloc_rx_buff_failed; u32 rx_dma_failed; u32 tx_dma_failed; + u32 tx_realloc_tsb; + u32 tx_realloc_tsb_failed; }; #define UMAC_HD_BKP_CTRL 0x004 @@ -251,6 +253,7 @@ struct bcmgenet_mib_counters { #define RBUF_CHK_CTRL 0x14 #define RBUF_RXCHK_EN (1 << 0) #define RBUF_SKIP_FCS (1 << 4) +#define RBUF_L3_PARSE_DIS (1 << 5) #define RBUF_ENERGY_CTRL 0x9c #define RBUF_EEE_EN (1 << 0) @@ -663,7 +666,6 @@ struct bcmgenet_priv { bool desc_rxchk_en; bool crc_fwd_en; - unsigned int dma_rx_chk_bit; u32 dma_max_burst_length; u32 msg_enable; diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 1604ad32e920..80ff52527233 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -294,7 +294,7 @@ static int sbmac_set_duplex(struct sbmac_softc *s, enum sbmac_duplex duplex, enum sbmac_fc fc); static int sbmac_open(struct net_device *dev); -static void sbmac_tx_timeout (struct net_device *dev); +static void sbmac_tx_timeout (struct net_device *dev, unsigned int txqueue); static void sbmac_set_rx_mode(struct net_device *dev); static int sbmac_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int sbmac_close(struct net_device *dev); @@ -2419,7 +2419,7 @@ static void sbmac_mii_poll(struct net_device *dev) } -static void sbmac_tx_timeout (struct net_device *dev) +static void sbmac_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct sbmac_softc *sc = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index ca3aa1250dd1..88466255bf66 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7645,7 +7645,7 @@ static void tg3_poll_controller(struct net_device *dev) } #endif -static void tg3_tx_timeout(struct net_device *dev) +static void tg3_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct tg3 *tp = netdev_priv(dev); @@ -7874,8 +7874,8 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *); static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi, struct netdev_queue *txq, struct sk_buff *skb) { - struct sk_buff *segs, *nskb; u32 frag_cnt_est = skb_shinfo(skb)->gso_segs * 3; + struct sk_buff *segs, *seg, *next; /* Estimate the number of fragments in the worst case */ if (unlikely(tg3_tx_avail(tnapi) <= frag_cnt_est)) { @@ -7898,12 +7898,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi, if (IS_ERR(segs) || !segs) goto tg3_tso_bug_end; - do { - nskb = segs; - segs = segs->next; - nskb->next = NULL; - tg3_start_xmit(nskb, tp->dev); - } while (segs); + skb_list_walk_safe(segs, seg, next) { + skb_mark_not_on_list(seg); + tg3_start_xmit(seg, tp->dev); + } tg3_tso_bug_end: dev_consume_skb_any(skb); diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 4042c2185e98..e17bfc87da90 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -1124,11 +1124,10 @@ bfa_nw_ioc_sem_release(void __iomem *sem_reg) static void bfa_ioc_fwver_clear(struct bfa_ioc *ioc) { - u32 pgnum, pgoff, loff = 0; + u32 pgnum, loff = 0; int i; pgnum = PSS_SMEM_PGNUM(ioc->ioc_regs.smem_pg0, loff); - pgoff = PSS_SMEM_PGOFF(loff); writel(pgnum, ioc->ioc_regs.host_page_num_fn); for (i = 0; i < (sizeof(struct bfi_ioc_image_hdr) / sizeof(u32)); i++) { diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 19fe4f4867c7..dbf7070fcdba 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -630,10 +630,17 @@ #define GEM_CLK_DIV96 5 /* Constants for MAN register */ -#define MACB_MAN_SOF 1 -#define MACB_MAN_WRITE 1 -#define MACB_MAN_READ 2 -#define MACB_MAN_CODE 2 +#define MACB_MAN_C22_SOF 1 +#define MACB_MAN_C22_WRITE 1 +#define MACB_MAN_C22_READ 2 +#define MACB_MAN_C22_CODE 2 + +#define MACB_MAN_C45_SOF 0 +#define MACB_MAN_C45_ADDR 0 +#define MACB_MAN_C45_WRITE 1 +#define MACB_MAN_C45_POST_READ_INCR 2 +#define MACB_MAN_C45_READ 3 +#define MACB_MAN_C45_CODE 2 /* Capability mask bits */ #define MACB_CAPS_ISR_CLEAR_ON_WRITE 0x00000001 diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index f7d87c71aaa9..7a2fe63d1136 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -337,11 +337,30 @@ static int macb_mdio_read(struct mii_bus *bus, int mii_id, int regnum) if (status < 0) goto mdio_read_exit; - macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF) - | MACB_BF(RW, MACB_MAN_READ) - | MACB_BF(PHYA, mii_id) - | MACB_BF(REGA, regnum) - | MACB_BF(CODE, MACB_MAN_CODE))); + if (regnum & MII_ADDR_C45) { + macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF) + | MACB_BF(RW, MACB_MAN_C45_ADDR) + | MACB_BF(PHYA, mii_id) + | MACB_BF(REGA, (regnum >> 16) & 0x1F) + | MACB_BF(DATA, regnum & 0xFFFF) + | MACB_BF(CODE, MACB_MAN_C45_CODE))); + + status = macb_mdio_wait_for_idle(bp); + if (status < 0) + goto mdio_read_exit; + + macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF) + | MACB_BF(RW, MACB_MAN_C45_READ) + | MACB_BF(PHYA, mii_id) + | MACB_BF(REGA, (regnum >> 16) & 0x1F) + | MACB_BF(CODE, MACB_MAN_C45_CODE))); + } else { + macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C22_SOF) + | MACB_BF(RW, MACB_MAN_C22_READ) + | MACB_BF(PHYA, mii_id) + | MACB_BF(REGA, regnum) + | MACB_BF(CODE, MACB_MAN_C22_CODE))); + } status = macb_mdio_wait_for_idle(bp); if (status < 0) @@ -370,12 +389,32 @@ static int macb_mdio_write(struct mii_bus *bus, int mii_id, int regnum, if (status < 0) goto mdio_write_exit; - macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF) - | MACB_BF(RW, MACB_MAN_WRITE) - | MACB_BF(PHYA, mii_id) - | MACB_BF(REGA, regnum) - | MACB_BF(CODE, MACB_MAN_CODE) - | MACB_BF(DATA, value))); + if (regnum & MII_ADDR_C45) { + macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF) + | MACB_BF(RW, MACB_MAN_C45_ADDR) + | MACB_BF(PHYA, mii_id) + | MACB_BF(REGA, (regnum >> 16) & 0x1F) + | MACB_BF(DATA, regnum & 0xFFFF) + | MACB_BF(CODE, MACB_MAN_C45_CODE))); + + status = macb_mdio_wait_for_idle(bp); + if (status < 0) + goto mdio_write_exit; + + macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF) + | MACB_BF(RW, MACB_MAN_C45_WRITE) + | MACB_BF(PHYA, mii_id) + | MACB_BF(REGA, (regnum >> 16) & 0x1F) + | MACB_BF(CODE, MACB_MAN_C45_CODE) + | MACB_BF(DATA, value))); + } else { + macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C22_SOF) + | MACB_BF(RW, MACB_MAN_C22_WRITE) + | MACB_BF(PHYA, mii_id) + | MACB_BF(REGA, regnum) + | MACB_BF(CODE, MACB_MAN_C22_CODE) + | MACB_BF(DATA, value))); + } status = macb_mdio_wait_for_idle(bp); if (status < 0) diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index af04a2c81adb..05a3d067c3fc 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1251,7 +1251,7 @@ static int xgmac_poll(struct napi_struct *napi, int budget) * netdev structure and arrange for the device to be reset to a sane state * in order to transmit a new packet. */ -static void xgmac_tx_timeout(struct net_device *dev) +static void xgmac_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct xgmac_priv *priv = netdev_priv(dev); schedule_work(&priv->tx_timeout_work); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 7f3b2e3b0868..eab05b5534ea 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -2562,7 +2562,7 @@ lio_xmit_failed: /** \brief Network device Tx timeout * @param netdev pointer to network device */ -static void liquidio_tx_timeout(struct net_device *netdev) +static void liquidio_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct lio *lio; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 370d76822ee0..7a77544a54f5 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -1628,7 +1628,7 @@ lio_xmit_failed: /** \brief Network device Tx timeout * @param netdev pointer to network device */ -static void liquidio_tx_timeout(struct net_device *netdev) +static void liquidio_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct lio *lio; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c index f3f2e71431ac..600de587d7a9 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -31,7 +31,7 @@ static int lio_vf_rep_open(struct net_device *ndev); static int lio_vf_rep_stop(struct net_device *ndev); static netdev_tx_t lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev); -static void lio_vf_rep_tx_timeout(struct net_device *netdev); +static void lio_vf_rep_tx_timeout(struct net_device *netdev, unsigned int txqueue); static int lio_vf_rep_phys_port_name(struct net_device *dev, char *buf, size_t len); static void lio_vf_rep_get_stats64(struct net_device *dev, @@ -172,7 +172,7 @@ lio_vf_rep_stop(struct net_device *ndev) } static void -lio_vf_rep_tx_timeout(struct net_device *ndev) +lio_vf_rep_tx_timeout(struct net_device *ndev, unsigned int txqueue) { netif_trans_update(ndev); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index f28409279ea4..016957285f99 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1741,7 +1741,7 @@ static void nicvf_get_stats64(struct net_device *netdev, } -static void nicvf_tx_timeout(struct net_device *dev) +static void nicvf_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct nicvf *nic = netdev_priv(dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index becee29f5df7..8b7d156f79d3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -56,6 +56,7 @@ #include <asm/io.h> #include "t4_chip_type.h" #include "cxgb4_uld.h" +#include "t4fw_api.h" #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__) extern struct list_head adapter_list; @@ -68,6 +69,16 @@ extern struct mutex uld_mutex; #define ETHTXQ_STOP_THRES \ (1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8)) +#define FW_PARAM_DEV(param) \ + (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | \ + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_##param)) + +#define FW_PARAM_PFVF(param) \ + (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \ + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param) | \ + FW_PARAMS_PARAM_Y_V(0) | \ + FW_PARAMS_PARAM_Z_V(0)) + enum { MAX_NPORTS = 4, /* max # of ports */ SERNUM_LEN = 24, /* Serial # length */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index aca9f7a20a2a..ee3aab563b1d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -3175,14 +3175,12 @@ static const struct file_operations mem_debugfs_fops = { static int tid_info_show(struct seq_file *seq, void *v) { - unsigned int tid_start = 0; struct adapter *adap = seq->private; - const struct tid_info *t = &adap->tids; - enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip); - - if (chip > CHELSIO_T5) - tid_start = t4_read_reg(adap, LE_DB_ACTIVE_TABLE_START_INDEX_A); + const struct tid_info *t; + enum chip_type chip; + t = &adap->tids; + chip = CHELSIO_CHIP_VERSION(adap->params.chip); if (t4_read_reg(adap, LE_DB_CONFIG_A) & HASHEN_F) { unsigned int sb; seq_printf(seq, "Connections in use: %u\n", @@ -3194,9 +3192,9 @@ static int tid_info_show(struct seq_file *seq, void *v) sb = t4_read_reg(adap, LE_DB_SRVR_START_INDEX_A); if (sb) { - seq_printf(seq, "TID range: %u..%u/%u..%u", tid_start, + seq_printf(seq, "TID range: %u..%u/%u..%u", t->tid_base, sb - 1, adap->tids.hash_base, - t->ntids - 1); + t->tid_base + t->ntids - 1); seq_printf(seq, ", in use: %u/%u\n", atomic_read(&t->tids_in_use), atomic_read(&t->hash_tids_in_use)); @@ -3205,14 +3203,14 @@ static int tid_info_show(struct seq_file *seq, void *v) t->aftid_base, t->aftid_end, adap->tids.hash_base, - t->ntids - 1); + t->tid_base + t->ntids - 1); seq_printf(seq, ", in use: %u/%u\n", atomic_read(&t->tids_in_use), atomic_read(&t->hash_tids_in_use)); } else { seq_printf(seq, "TID range: %u..%u", adap->tids.hash_base, - t->ntids - 1); + t->tid_base + t->ntids - 1); seq_printf(seq, ", in use: %u\n", atomic_read(&t->hash_tids_in_use)); } @@ -3220,8 +3218,8 @@ static int tid_info_show(struct seq_file *seq, void *v) seq_printf(seq, "Connections in use: %u\n", atomic_read(&t->conns_in_use)); - seq_printf(seq, "TID range: %u..%u", tid_start, - tid_start + t->ntids - 1); + seq_printf(seq, "TID range: %u..%u", t->tid_base, + t->tid_base + t->ntids - 1); seq_printf(seq, ", in use: %u\n", atomic_read(&t->tids_in_use)); } @@ -3244,6 +3242,9 @@ static int tid_info_show(struct seq_file *seq, void *v) seq_printf(seq, "SFTID range: %u..%u in use: %u\n", t->sftid_base, t->sftid_base + t->nsftids - 2, t->sftids_in_use); + if (t->nhpftids) + seq_printf(seq, "HPFTID range: %u..%u\n", t->hpftid_base, + t->hpftid_base + t->nhpftids - 1); if (t->ntids) seq_printf(seq, "HW TID usage: %u IP users, %u IPv6 users\n", t4_read_reg(adap, LE_DB_ACT_CNT_IPV4_A), diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 1d39fca11810..2a2938bbb93a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -361,20 +361,22 @@ static int get_filter_count(struct adapter *adapter, unsigned int fidx, tcb_base = t4_read_reg(adapter, TP_CMM_TCB_BASE_A); if (is_hashfilter(adapter) && hash) { - if (fidx < adapter->tids.ntids) { - f = adapter->tids.tid_tab[fidx]; - if (!f) - return -EINVAL; - } else { + if (tid_out_of_range(&adapter->tids, fidx)) return -E2BIG; - } + f = adapter->tids.tid_tab[fidx - adapter->tids.tid_base]; + if (!f) + return -EINVAL; } else { - if ((fidx != (adapter->tids.nftids + - adapter->tids.nsftids - 1)) && - fidx >= adapter->tids.nftids) + if ((fidx != (adapter->tids.nftids + adapter->tids.nsftids + + adapter->tids.nhpftids - 1)) && + fidx >= (adapter->tids.nftids + adapter->tids.nhpftids)) return -E2BIG; - f = &adapter->tids.ftid_tab[fidx]; + if (fidx < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[fidx]; + else + f = &adapter->tids.ftid_tab[fidx - + adapter->tids.nhpftids]; if (!f->valid) return -EINVAL; } @@ -480,6 +482,7 @@ int cxgb4_get_free_ftid(struct net_device *dev, int family) ftid -= n; } spin_unlock_bh(&t->ftid_lock); + ftid += t->nhpftids; return found ? ftid : -ENOMEM; } @@ -507,6 +510,24 @@ static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family, return 0; } +static int cxgb4_set_hpftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + + if (test_bit(fidx, t->hpftid_bmap)) { + spin_unlock_bh(&t->ftid_lock); + return -EBUSY; + } + + if (family == PF_INET) + __set_bit(fidx, t->hpftid_bmap); + else + bitmap_allocate_region(t->hpftid_bmap, fidx, 1); + + spin_unlock_bh(&t->ftid_lock); + return 0; +} + static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family, unsigned int chip_ver) { @@ -522,33 +543,58 @@ static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family, spin_unlock_bh(&t->ftid_lock); } +static void cxgb4_clear_hpftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + + if (family == PF_INET) + __clear_bit(fidx, t->hpftid_bmap); + else + bitmap_release_region(t->hpftid_bmap, fidx, 1); + + spin_unlock_bh(&t->ftid_lock); +} + bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio) { + struct filter_entry *prev_fe, *next_fe, *tab; struct adapter *adap = netdev2adap(dev); - struct filter_entry *prev_fe, *next_fe; + u32 prev_ftid, next_ftid, max_tid; struct tid_info *t = &adap->tids; - u32 prev_ftid, next_ftid; + unsigned long *bmap; bool valid = true; + if (idx < t->nhpftids) { + bmap = t->hpftid_bmap; + tab = t->hpftid_tab; + max_tid = t->nhpftids; + } else { + idx -= t->nhpftids; + bmap = t->ftid_bmap; + tab = t->ftid_tab; + max_tid = t->nftids; + } + /* Only insert the rule if both of the following conditions * are met: * 1. The immediate previous rule has priority <= @prio. * 2. The immediate next rule has priority >= @prio. */ spin_lock_bh(&t->ftid_lock); + /* Don't insert if there's a rule already present at @idx. */ - if (test_bit(idx, t->ftid_bmap)) { + if (test_bit(idx, bmap)) { valid = false; goto out_unlock; } - next_ftid = find_next_bit(t->ftid_bmap, t->nftids, idx); - if (next_ftid >= t->nftids) + next_ftid = find_next_bit(bmap, max_tid, idx); + if (next_ftid >= max_tid) next_ftid = idx; - next_fe = &adap->tids.ftid_tab[next_ftid]; + next_fe = &tab[next_ftid]; - prev_ftid = find_last_bit(t->ftid_bmap, idx); + prev_ftid = find_last_bit(bmap, idx); if (prev_ftid >= idx) prev_ftid = idx; @@ -558,13 +604,13 @@ bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio) * accordingly. */ if (CHELSIO_CHIP_VERSION(adap->params.chip) < CHELSIO_T6) { - prev_fe = &adap->tids.ftid_tab[prev_ftid & ~0x3]; + prev_fe = &tab[prev_ftid & ~0x3]; if (!prev_fe->fs.type) - prev_fe = &adap->tids.ftid_tab[prev_ftid]; + prev_fe = &tab[prev_ftid]; } else { - prev_fe = &adap->tids.ftid_tab[prev_ftid & ~0x1]; + prev_fe = &tab[prev_ftid & ~0x1]; if (!prev_fe->fs.type) - prev_fe = &adap->tids.ftid_tab[prev_ftid]; + prev_fe = &tab[prev_ftid]; } if ((prev_fe->valid && prio < prev_fe->fs.tc_prio) || @@ -579,11 +625,16 @@ out_unlock: /* Delete the filter at a specified index. */ static int del_filter_wr(struct adapter *adapter, int fidx) { - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; + struct filter_entry *f; struct sk_buff *skb; unsigned int len; + if (fidx < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[fidx]; + else + f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids]; + len = sizeof(*fwr); skb = alloc_skb(len, GFP_KERNEL); @@ -609,10 +660,15 @@ static int del_filter_wr(struct adapter *adapter, int fidx) */ int set_filter_wr(struct adapter *adapter, int fidx) { - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; struct fw_filter2_wr *fwr; + struct filter_entry *f; struct sk_buff *skb; + if (fidx < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[fidx]; + else + f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids]; + skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); if (!skb) return -ENOMEM; @@ -762,10 +818,14 @@ int delete_filter(struct adapter *adapter, unsigned int fidx) struct filter_entry *f; int ret; - if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) + if (fidx >= adapter->tids.nftids + adapter->tids.nsftids + + adapter->tids.nhpftids) return -EINVAL; - f = &adapter->tids.ftid_tab[fidx]; + if (fidx < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[fidx]; + else + f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids]; ret = writable_filter(f); if (ret) return ret; @@ -811,12 +871,22 @@ void clear_all_filters(struct adapter *adapter) struct net_device *dev = adapter->port[0]; unsigned int i; + if (adapter->tids.hpftid_tab) { + struct filter_entry *f = &adapter->tids.hpftid_tab[0]; + + for (i = 0; i < adapter->tids.nhpftids; i++, f++) + if (f->valid || f->pending) + cxgb4_del_filter(dev, i, &f->fs); + } + if (adapter->tids.ftid_tab) { struct filter_entry *f = &adapter->tids.ftid_tab[0]; unsigned int max_ftid = adapter->tids.nftids + - adapter->tids.nsftids; + adapter->tids.nsftids + + adapter->tids.nhpftids; + /* Clear all TCAM filters */ - for (i = 0; i < max_ftid; i++, f++) + for (i = adapter->tids.nhpftids; i < max_ftid; i++, f++) if (f->valid || f->pending) cxgb4_del_filter(dev, i, &f->fs); } @@ -1319,17 +1389,17 @@ out_err: * filter specification in order to facilitate signaling completion of the * operation. */ -int __cxgb4_set_filter(struct net_device *dev, int filter_id, +int __cxgb4_set_filter(struct net_device *dev, int ftid, struct ch_filter_specification *fs, struct filter_ctx *ctx) { struct adapter *adapter = netdev2adap(dev); - unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); - unsigned int max_fidx, fidx; - struct filter_entry *f; + unsigned int max_fidx, fidx, chip_ver; + int iq, ret, filter_id = ftid; + struct filter_entry *f, *tab; u32 iconf; - int iq, ret; + chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); if (fs->hash) { if (is_hashfilter(adapter)) return cxgb4_set_hash_filter(dev, fs, ctx); @@ -1338,7 +1408,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, return -EINVAL; } - max_fidx = adapter->tids.nftids; + max_fidx = adapter->tids.nftids + adapter->tids.nhpftids; if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && filter_id >= max_fidx) return -E2BIG; @@ -1353,6 +1423,13 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, if (iq < 0) return iq; + if (fs->prio) { + tab = &adapter->tids.hpftid_tab[0]; + } else { + tab = &adapter->tids.ftid_tab[0]; + filter_id = ftid - adapter->tids.nhpftids; + } + /* IPv6 filters occupy four slots and must be aligned on * four-slot boundaries. IPv4 filters only occupy a single * slot and have no alignment requirements but writing a new @@ -1373,9 +1450,8 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, else fidx = filter_id & ~0x1; - if (fidx != filter_id && - adapter->tids.ftid_tab[fidx].fs.type) { - f = &adapter->tids.ftid_tab[fidx]; + if (fidx != filter_id && tab[fidx].fs.type) { + f = &tab[fidx]; if (f->valid) { dev_err(adapter->pdev_dev, "Invalid location. IPv6 requires 4 slots and is occupying slots %u to %u\n", @@ -1399,7 +1475,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, */ for (fidx = filter_id + 1; fidx < filter_id + 4; fidx++) { - f = &adapter->tids.ftid_tab[fidx]; + f = &tab[fidx]; if (f->valid) { dev_err(adapter->pdev_dev, "Invalid location. IPv6 requires 4 slots and an IPv4 filter exists at %u\n", @@ -1415,7 +1491,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, return -EINVAL; /* Check overlapping IPv4 filter slot */ fidx = filter_id + 1; - f = &adapter->tids.ftid_tab[fidx]; + f = &tab[fidx]; if (f->valid) { pr_err("%s: IPv6 filter requires 2 indices. IPv4 filter already present at %d. Please remove IPv4 filter first.\n", __func__, fidx); @@ -1427,36 +1503,35 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, /* Check to make sure that provided filter index is not * already in use by someone else */ - f = &adapter->tids.ftid_tab[filter_id]; + f = &tab[filter_id]; if (f->valid) return -EBUSY; - fidx = filter_id + adapter->tids.ftid_base; - ret = cxgb4_set_ftid(&adapter->tids, filter_id, - fs->type ? PF_INET6 : PF_INET, - chip_ver); + if (fs->prio) { + fidx = filter_id + adapter->tids.hpftid_base; + ret = cxgb4_set_hpftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + } else { + fidx = filter_id + adapter->tids.ftid_base; + ret = cxgb4_set_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET, + chip_ver); + } + if (ret) return ret; /* Check t make sure the filter requested is writable ... */ ret = writable_filter(f); - if (ret) { - /* Clear the bits we have set above */ - cxgb4_clear_ftid(&adapter->tids, filter_id, - fs->type ? PF_INET6 : PF_INET, - chip_ver); - return ret; - } + if (ret) + goto free_tid; if (is_t6(adapter->params.chip) && fs->type && ipv6_addr_type((const struct in6_addr *)fs->val.lip) != IPV6_ADDR_ANY) { ret = cxgb4_clip_get(dev, (const u32 *)&fs->val.lip, 1); - if (ret) { - cxgb4_clear_ftid(&adapter->tids, filter_id, PF_INET6, - chip_ver); - return ret; - } + if (ret) + goto free_tid; } /* Convert the filter specification into our internal format. @@ -1487,7 +1562,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, f->fs.mask.vni, 0, 1, 1); if (ret < 0) - goto free_clip; + goto free_tid; f->fs.val.ovlan = ret; f->fs.mask.ovlan = 0x1ff; @@ -1501,21 +1576,22 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, */ f->ctx = ctx; f->tid = fidx; /* Save the actual tid */ - ret = set_filter_wr(adapter, filter_id); - if (ret) { + ret = set_filter_wr(adapter, ftid); + if (ret) + goto free_tid; + + return ret; + +free_tid: + if (f->fs.prio) + cxgb4_clear_hpftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + else cxgb4_clear_ftid(&adapter->tids, filter_id, fs->type ? PF_INET6 : PF_INET, chip_ver); - clear_filter(adapter, f); - } - - return ret; -free_clip: - if (is_t6(adapter->params.chip) && f->fs.type) - cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1); - cxgb4_clear_ftid(&adapter->tids, filter_id, - fs->type ? PF_INET6 : PF_INET, chip_ver); + clear_filter(adapter, f); return ret; } @@ -1537,7 +1613,7 @@ static int cxgb4_del_hash_filter(struct net_device *dev, int filter_id, netdev_dbg(dev, "%s: filter_id = %d ; nftids = %d\n", __func__, filter_id, adapter->tids.nftids); - if (filter_id > adapter->tids.ntids) + if (tid_out_of_range(t, filter_id)) return -E2BIG; f = lookup_tid(t, filter_id); @@ -1590,11 +1666,11 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id, struct filter_ctx *ctx) { struct adapter *adapter = netdev2adap(dev); - unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); + unsigned int max_fidx, chip_ver; struct filter_entry *f; - unsigned int max_fidx; int ret; + chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); if (fs && fs->hash) { if (is_hashfilter(adapter)) return cxgb4_del_hash_filter(dev, filter_id, ctx); @@ -1603,21 +1679,31 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id, return -EINVAL; } - max_fidx = adapter->tids.nftids; + max_fidx = adapter->tids.nftids + adapter->tids.nhpftids; if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && filter_id >= max_fidx) return -E2BIG; - f = &adapter->tids.ftid_tab[filter_id]; + if (filter_id < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[filter_id]; + else + f = &adapter->tids.ftid_tab[filter_id - adapter->tids.nhpftids]; + ret = writable_filter(f); if (ret) return ret; if (f->valid) { f->ctx = ctx; - cxgb4_clear_ftid(&adapter->tids, filter_id, - f->fs.type ? PF_INET6 : PF_INET, - chip_ver); + if (f->fs.prio) + cxgb4_clear_hpftid(&adapter->tids, + f->tid - adapter->tids.hpftid_base, + f->fs.type ? PF_INET6 : PF_INET); + else + cxgb4_clear_ftid(&adapter->tids, + f->tid - adapter->tids.ftid_base, + f->fs.type ? PF_INET6 : PF_INET, + chip_ver); return del_filter_wr(adapter, filter_id); } @@ -1842,11 +1928,18 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) max_fidx = adap->tids.nftids + adap->tids.nsftids; /* Get the corresponding filter entry for this tid */ if (adap->tids.ftid_tab) { - /* Check this in normal filter region */ - idx = tid - adap->tids.ftid_base; - if (idx >= max_fidx) - return; - f = &adap->tids.ftid_tab[idx]; + idx = tid - adap->tids.hpftid_base; + if (idx < adap->tids.nhpftids) { + f = &adap->tids.hpftid_tab[idx]; + } else { + /* Check this in normal filter region */ + idx = tid - adap->tids.ftid_base; + if (idx >= max_fidx) + return; + f = &adap->tids.ftid_tab[idx]; + idx += adap->tids.nhpftids; + } + if (f->tid != tid) return; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 0dedd3e9c31e..649842a8aa28 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -804,6 +804,26 @@ static int setup_ppod_edram(struct adapter *adap) return 0; } +static void adap_config_hpfilter(struct adapter *adapter) +{ + u32 param, val = 0; + int ret; + + /* Enable HP filter region. Older fw will fail this request and + * it is fine. + */ + param = FW_PARAM_DEV(HPFILTER_REGION_SUPPORT); + ret = t4_set_params(adapter, adapter->mbox, adapter->pf, 0, + 1, ¶m, &val); + + /* An error means FW doesn't know about HP filter support, + * it's not a problem, don't return an error. + */ + if (ret < 0) + dev_err(adapter->pdev_dev, + "HP filter region isn't supported by FW\n"); +} + /** * cxgb4_write_rss - write the RSS table for a given port * @pi: the port @@ -1427,8 +1447,8 @@ static void mk_tid_release(struct sk_buff *skb, unsigned int chan, static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan, unsigned int tid) { - void **p = &t->tid_tab[tid]; struct adapter *adap = container_of(t, struct adapter, tids); + void **p = &t->tid_tab[tid - t->tid_base]; spin_lock_bh(&adap->tid_release_lock); *p = adap->tid_release_head; @@ -1480,13 +1500,13 @@ static void process_tid_release_list(struct work_struct *work) void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid, unsigned short family) { - struct sk_buff *skb; struct adapter *adap = container_of(t, struct adapter, tids); + struct sk_buff *skb; - WARN_ON(tid >= t->ntids); + WARN_ON(tid_out_of_range(&adap->tids, tid)); - if (t->tid_tab[tid]) { - t->tid_tab[tid] = NULL; + if (t->tid_tab[tid - adap->tids.tid_base]) { + t->tid_tab[tid - adap->tids.tid_base] = NULL; atomic_dec(&t->conns_in_use); if (t->hash_base && (tid >= t->hash_base)) { if (family == AF_INET6) @@ -1518,6 +1538,7 @@ static int tid_init(struct tid_info *t) struct adapter *adap = container_of(t, struct adapter, tids); unsigned int max_ftids = t->nftids + t->nsftids; unsigned int natids = t->natids; + unsigned int hpftid_bmap_size; unsigned int eotid_bmap_size; unsigned int stid_bmap_size; unsigned int ftid_bmap_size; @@ -1525,12 +1546,15 @@ static int tid_init(struct tid_info *t) stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); ftid_bmap_size = BITS_TO_LONGS(t->nftids); + hpftid_bmap_size = BITS_TO_LONGS(t->nhpftids); eotid_bmap_size = BITS_TO_LONGS(t->neotids); size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + t->nsftids * sizeof(*t->stid_tab) + stid_bmap_size * sizeof(long) + + t->nhpftids * sizeof(*t->hpftid_tab) + + hpftid_bmap_size * sizeof(long) + max_ftids * sizeof(*t->ftid_tab) + ftid_bmap_size * sizeof(long) + t->neotids * sizeof(*t->eotid_tab) + @@ -1543,7 +1567,9 @@ static int tid_init(struct tid_info *t) t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; - t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; + t->hpftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; + t->hpftid_bmap = (unsigned long *)&t->hpftid_tab[t->nhpftids]; + t->ftid_tab = (struct filter_entry *)&t->hpftid_bmap[hpftid_bmap_size]; t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids]; t->eotid_tab = (struct eotid_entry *)&t->ftid_bmap[ftid_bmap_size]; t->eotid_bmap = (unsigned long *)&t->eotid_tab[t->neotids]; @@ -1578,6 +1604,8 @@ static int tid_init(struct tid_info *t) bitmap_zero(t->eotid_bmap, t->neotids); } + if (t->nhpftids) + bitmap_zero(t->hpftid_bmap, t->nhpftids); bitmap_zero(t->ftid_bmap, t->nftids); return 0; } @@ -4359,6 +4387,7 @@ static int adap_init0_config(struct adapter *adapter, int reset) "HMA configuration failed with error %d\n", ret); if (is_t6(adapter->params.chip)) { + adap_config_hpfilter(adapter); ret = setup_ppod_edram(adapter); if (!ret) dev_info(adapter->pdev_dev, "Successfully enabled " @@ -4668,16 +4697,6 @@ static int adap_init0(struct adapter *adap, int vpd_skip) /* * Grab some of our basic fundamental operating parameters. */ -#define FW_PARAM_DEV(param) \ - (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | \ - FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_##param)) - -#define FW_PARAM_PFVF(param) \ - FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \ - FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param)| \ - FW_PARAMS_PARAM_Y_V(0) | \ - FW_PARAMS_PARAM_Z_V(0) - params[0] = FW_PARAM_PFVF(EQ_START); params[1] = FW_PARAM_PFVF(L2T_START); params[2] = FW_PARAM_PFVF(L2T_END); @@ -4695,6 +4714,16 @@ static int adap_init0(struct adapter *adap, int vpd_skip) adap->sge.ingr_start = val[5]; if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) { + params[0] = FW_PARAM_PFVF(HPFILTER_START); + params[1] = FW_PARAM_PFVF(HPFILTER_END); + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2, + params, val); + if (ret < 0) + goto bye; + + adap->tids.hpftid_base = val[0]; + adap->tids.nhpftids = val[1] - val[0] + 1; + /* Read the raw mps entries. In T6, the last 2 tcam entries * are reserved for raw mac addresses (rawf = 2, one per port). */ @@ -4706,6 +4735,9 @@ static int adap_init0(struct adapter *adap, int vpd_skip) adap->rawf_start = val[0]; adap->rawf_cnt = val[1] - val[0] + 1; } + + adap->tids.tid_base = + t4_read_reg(adap, LE_DB_ACTIVE_TABLE_START_INDEX_A); } /* qids (ingress/egress) returned from firmware can be anywhere @@ -5058,8 +5090,6 @@ static int adap_init0(struct adapter *adap, int vpd_skip) } adap->params.crypto = ntohs(caps_cmd.cryptocaps); } -#undef FW_PARAM_PFVF -#undef FW_PARAM_DEV /* The MTU/MSS Table is initialized by now, so load their values. If * we're initializing the adapter, then we'll make any modifications diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 0fa80bef575d..bb5513bdd293 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -672,10 +672,14 @@ int cxgb4_tc_flower_replace(struct net_device *dev, * 0 to driver. However, the hardware TCAM index * starts from 0. Hence, the -1 here. */ - if (cls->common.prio <= adap->tids.nftids) + if (cls->common.prio <= (adap->tids.nftids + + adap->tids.nhpftids)) { fidx = cls->common.prio - 1; - else + if (fidx < adap->tids.nhpftids) + fs->prio = 1; + } else { fidx = cxgb4_get_free_ftid(dev, inet_family); + } /* Only insert FLOWER rule if its priority doesn't * conflict with existing rules in the LETCAM. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c index 6d485803ddbe..1b7681a4eb32 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c @@ -204,7 +204,7 @@ static int cxgb4_matchall_alloc_filter(struct net_device *dev, * -1 here. 1 slot is enough to create a wildcard matchall * VIID rule. */ - if (cls->common.prio <= adap->tids.nftids) + if (cls->common.prio <= (adap->tids.nftids + adap->tids.nhpftids)) fidx = cls->common.prio - 1; else fidx = cxgb4_get_free_ftid(dev, PF_INET); @@ -223,6 +223,8 @@ static int cxgb4_matchall_alloc_filter(struct net_device *dev, fs = &tc_port_matchall->ingress.fs; memset(fs, 0, sizeof(*fs)); + if (fidx < adap->tids.nhpftids) + fs->prio = 1; fs->tc_prio = cls->common.prio; fs->tc_cookie = cls->cookie; fs->hitcnts = 1; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 133f8623ba86..269b8d9e25e0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -176,7 +176,7 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) /* Only insert U32 rule if its priority doesn't conflict with * existing rules in the LETCAM. */ - if (filter_id >= adapter->tids.nftids || + if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids || !cxgb4_filter_prio_in_range(dev, filter_id, cls->common.prio)) { NL_SET_ERR_MSG_MOD(extack, "No free LETCAM index available"); @@ -199,6 +199,8 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) memset(&fs, 0, sizeof(fs)); + if (filter_id < adapter->tids.nhpftids) + fs.prio = 1; fs.tc_prio = cls->common.prio; fs.tc_cookie = cls->knode.handle; @@ -355,6 +357,7 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) unsigned int filter_id, max_tids, i, j; struct cxgb4_link *link = NULL; struct cxgb4_tc_u32_table *t; + struct filter_entry *f; u32 handle, uhtid; int ret; @@ -363,8 +366,15 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) /* Fetch the location to delete the filter. */ filter_id = TC_U32_NODE(cls->knode.handle) - 1; - if (filter_id >= adapter->tids.nftids || - cls->knode.handle != adapter->tids.ftid_tab[filter_id].fs.tc_cookie) + if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids) + return -ERANGE; + + if (filter_id < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[filter_id]; + else + f = &adapter->tids.ftid_tab[filter_id - adapter->tids.nhpftids]; + + if (cls->knode.handle != f->fs.tc_cookie) return -ERANGE; t = adapter->tc_u32; @@ -445,7 +455,7 @@ void cxgb4_cleanup_tc_u32(struct adapter *adap) struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap) { - unsigned int max_tids = adap->tids.nftids; + unsigned int max_tids = adap->tids.nftids + adap->tids.nhpftids; struct cxgb4_tc_u32_table *t; unsigned int i; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 861b25d28ed6..d9d27bc1ae67 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -99,6 +99,7 @@ struct eotid_entry { */ struct tid_info { void **tid_tab; + unsigned int tid_base; unsigned int ntids; struct serv_entry *stid_tab; @@ -111,6 +112,11 @@ struct tid_info { unsigned int natids; unsigned int atid_base; + struct filter_entry *hpftid_tab; + unsigned long *hpftid_bmap; + unsigned int nhpftids; + unsigned int hpftid_base; + struct filter_entry *ftid_tab; unsigned long *ftid_bmap; unsigned int nftids; @@ -147,9 +153,15 @@ struct tid_info { static inline void *lookup_tid(const struct tid_info *t, unsigned int tid) { + tid -= t->tid_base; return tid < t->ntids ? t->tid_tab[tid] : NULL; } +static inline bool tid_out_of_range(const struct tid_info *t, unsigned int tid) +{ + return ((tid - t->tid_base) >= t->ntids); +} + static inline void *lookup_atid(const struct tid_info *t, unsigned int atid) { return atid < t->natids ? t->atid_tab[atid].data : NULL; @@ -171,7 +183,7 @@ static inline void *lookup_stid(const struct tid_info *t, unsigned int stid) static inline void cxgb4_insert_tid(struct tid_info *t, void *data, unsigned int tid, unsigned short family) { - t->tid_tab[tid] = data; + t->tid_tab[tid - t->tid_base] = data; if (t->hash_base && (tid >= t->hash_base)) { if (family == AF_INET6) atomic_add(2, &t->hash_tids_in_use); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index ac4fb43bdec6..accad1101ad1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1321,6 +1321,7 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21, FW_PARAMS_PARAM_DEV_PPOD_EDRAM = 0x23, FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR = 0x24, + FW_PARAMS_PARAM_DEV_HPFILTER_REGION_SUPPORT = 0x26, FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27, FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD = 0x28, FW_PARAMS_PARAM_DEV_DBQ_TIMER = 0x29, diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c index c9aebcde403a..33ace3307059 100644 --- a/drivers/net/ethernet/cirrus/cs89x0.c +++ b/drivers/net/ethernet/cirrus/cs89x0.c @@ -1128,7 +1128,7 @@ net_get_stats(struct net_device *dev) return &dev->stats; } -static void net_timeout(struct net_device *dev) +static void net_timeout(struct net_device *dev, unsigned int txqueue) { /* If we get here, some higher level has decided we are broken. There should really be a "kick me" function call instead. */ diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index acb2856936d2..bbd7b3175f09 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1095,7 +1095,7 @@ static void enic_set_rx_mode(struct net_device *netdev) } /* netif_tx_lock held, BHs disabled */ -static void enic_tx_timeout(struct net_device *netdev) +static void enic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct enic *enic = netdev_priv(netdev); schedule_work(&enic->tx_hang_reset); diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 2814b96751b4..f30fa8e6ef80 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1298,7 +1298,7 @@ out_drop: return NETDEV_TX_OK; } -static void gmac_tx_timeout(struct net_device *netdev) +static void gmac_tx_timeout(struct net_device *netdev, unsigned int txqueue) { netdev_err(netdev, "Tx timeout\n"); gmac_dump_dma_state(netdev); diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index cce90b5925d9..1ea3372775e6 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -964,7 +964,7 @@ dm9000_init_dm9000(struct net_device *dev) } /* Our watchdog timed out. Called by the networking layer */ -static void dm9000_timeout(struct net_device *dev) +static void dm9000_timeout(struct net_device *dev, unsigned int txqueue) { struct board_info *db = netdev_priv(dev); u8 reg_save; diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index f1a2da15dd0a..fd3c2abf74b5 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -1436,7 +1436,7 @@ static int de_close (struct net_device *dev) return 0; } -static void de_tx_timeout (struct net_device *dev) +static void de_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct de_private *de = netdev_priv(dev); const int irq = de->pdev->irq; diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 0efdbd1a4a6f..32d470d4122a 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -2214,15 +2214,16 @@ static int __init dmfe_init_module(void) if (cr6set) dmfe_cr6_user_set = cr6set; - switch(mode) { - case DMFE_10MHF: + switch (mode) { + case DMFE_10MHF: case DMFE_100MHF: case DMFE_10MFD: case DMFE_100MFD: case DMFE_1M_HPNA: dmfe_media_mode = mode; break; - default:dmfe_media_mode = DMFE_AUTO; + default: + dmfe_media_mode = DMFE_AUTO; break; } diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 3e3e08698876..9e9d9eee29d9 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -255,7 +255,7 @@ MODULE_DEVICE_TABLE(pci, tulip_pci_tbl); const char tulip_media_cap[32] = {0,0,0,16, 3,19,16,24, 27,4,7,5, 0,20,23,20, 28,31,0,0, }; -static void tulip_tx_timeout(struct net_device *dev); +static void tulip_tx_timeout(struct net_device *dev, unsigned int txqueue); static void tulip_init_ring(struct net_device *dev); static void tulip_free_ring(struct net_device *dev); static netdev_tx_t tulip_start_xmit(struct sk_buff *skb, @@ -534,7 +534,7 @@ free_ring: } -static void tulip_tx_timeout(struct net_device *dev) +static void tulip_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct tulip_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->base_addr; diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index b1f30b194300..117ffe08800d 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -1809,8 +1809,8 @@ static int __init uli526x_init_module(void) if (cr6set) uli526x_cr6_user_set = cr6set; - switch (mode) { - case ULI526X_10MHF: + switch (mode) { + case ULI526X_10MHF: case ULI526X_100MHF: case ULI526X_10MFD: case ULI526X_100MFD: diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 70cb2d689c2c..7f136488e67c 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -331,7 +331,7 @@ static void netdev_timer(struct timer_list *t); static void init_rxtx_rings(struct net_device *dev); static void free_rxtx_rings(struct netdev_private *np); static void init_registers(struct net_device *dev); -static void tx_timeout(struct net_device *dev); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); static int alloc_ringdesc(struct net_device *dev); static void free_ringdesc(struct netdev_private *np); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); @@ -921,7 +921,7 @@ static void init_registers(struct net_device *dev) iowrite32(0, ioaddr + RxStartDemand); } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->base_addr; diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 55e720d2ea0c..26c5da032b1e 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -66,7 +66,7 @@ static const int multicast_filter_limit = 0x40; static int rio_open (struct net_device *dev); static void rio_timer (struct timer_list *t); -static void rio_tx_timeout (struct net_device *dev); +static void rio_tx_timeout (struct net_device *dev, unsigned int txqueue); static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev); static irqreturn_t rio_interrupt (int irq, void *dev_instance); static void rio_free_tx (struct net_device *dev, int irq); @@ -696,7 +696,7 @@ rio_timer (struct timer_list *t) } static void -rio_tx_timeout (struct net_device *dev) +rio_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->ioaddr; diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 4a37a69764ce..b91387c456ba 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -432,7 +432,7 @@ static int mdio_wait_link(struct net_device *dev, int wait); static int netdev_open(struct net_device *dev); static void check_duplex(struct net_device *dev); static void netdev_timer(struct timer_list *t); -static void tx_timeout(struct net_device *dev); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); static int reset_tx (struct net_device *dev); @@ -969,7 +969,7 @@ static void netdev_timer(struct timer_list *t) add_timer(&np->timer); } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->base; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 39eb7d525043..56f59db6ebf2 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1417,7 +1417,7 @@ drop: return NETDEV_TX_OK; } -static void be_tx_timeout(struct net_device *netdev) +static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct be_adapter *adapter = netdev_priv(netdev); struct device *dev = &adapter->pdev->dev; diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index ea4f17f5cce7..66406da16b60 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -869,7 +869,7 @@ static int ethoc_change_mtu(struct net_device *dev, int new_mtu) return -ENOSYS; } -static void ethoc_tx_timeout(struct net_device *dev) +static void ethoc_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ethoc *priv = netdev_priv(dev); u32 pending = ethoc_read(priv, INT_SOURCE); diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 8ed85037f021..48b3b72fe02e 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1545,7 +1545,7 @@ static int ftgmac100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int return phy_mii_ioctl(netdev->phydev, ifr, cmd); } -static void ftgmac100_tx_timeout(struct net_device *netdev) +static void ftgmac100_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ftgmac100 *priv = netdev_priv(netdev); diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index c24fd56a2c71..84f10970299a 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -428,7 +428,7 @@ static void getlinktype(struct net_device *dev); static void getlinkstatus(struct net_device *dev); static void netdev_timer(struct timer_list *t); static void reset_timer(struct timer_list *t); -static void fealnx_tx_timeout(struct net_device *dev); +static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t intr_handler(int irq, void *dev_instance); @@ -1191,7 +1191,7 @@ static void reset_timer(struct timer_list *t) } -static void fealnx_tx_timeout(struct net_device *dev) +static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->mem; diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile index 6a93293d31e0..67c436400352 100644 --- a/drivers/net/ethernet/freescale/Makefile +++ b/drivers/net/ethernet/freescale/Makefile @@ -25,4 +25,5 @@ obj-$(CONFIG_FSL_DPAA_ETH) += dpaa/ obj-$(CONFIG_FSL_DPAA2_ETH) += dpaa2/ obj-$(CONFIG_FSL_ENETC) += enetc/ +obj-$(CONFIG_FSL_ENETC_MDIO) += enetc/ obj-$(CONFIG_FSL_ENETC_VF) += enetc/ diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index a301f0095223..09dbcd819d84 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -288,7 +288,7 @@ static int dpaa_stop(struct net_device *net_dev) return err; } -static void dpaa_tx_timeout(struct net_device *net_dev) +static void dpaa_tx_timeout(struct net_device *net_dev, unsigned int txqueue) { struct dpaa_percpu_priv *percpu_priv; const struct dpaa_priv *priv; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c index 6437fe6b9abf..cc1b7f85e433 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c @@ -27,6 +27,20 @@ static int dpaa2_ptp_enable(struct ptp_clock_info *ptp, mc_dev = to_fsl_mc_device(dev); switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + switch (rq->extts.index) { + case 0: + bit = DPRTC_EVENT_ETS1; + break; + case 1: + bit = DPRTC_EVENT_ETS2; + break; + default: + return -EINVAL; + } + if (on) + extts_clean_up(ptp_qoriq, rq->extts.index, false); + break; case PTP_CLK_REQ_PPS: bit = DPRTC_EVENT_PPS; break; @@ -96,6 +110,12 @@ static irqreturn_t dpaa2_ptp_irq_handler_thread(int irq, void *priv) ptp_clock_event(ptp_qoriq->clock, &event); } + if (status & DPRTC_EVENT_ETS1) + extts_clean_up(ptp_qoriq, 0, true); + + if (status & DPRTC_EVENT_ETS2) + extts_clean_up(ptp_qoriq, 1, true); + err = dprtc_clear_irq_status(mc_dev->mc_io, 0, mc_dev->mc_handle, DPRTC_IRQ_INDEX, status); if (unlikely(err)) { diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h index 4ac05bfef338..96ffeb948f08 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h @@ -9,9 +9,11 @@ /* Command versioning */ #define DPRTC_CMD_BASE_VERSION 1 +#define DPRTC_CMD_VERSION_2 2 #define DPRTC_CMD_ID_OFFSET 4 #define DPRTC_CMD(id) (((id) << DPRTC_CMD_ID_OFFSET) | DPRTC_CMD_BASE_VERSION) +#define DPRTC_CMD_V2(id) (((id) << DPRTC_CMD_ID_OFFSET) | DPRTC_CMD_VERSION_2) /* Command IDs */ #define DPRTC_CMDID_CLOSE DPRTC_CMD(0x800) @@ -19,7 +21,7 @@ #define DPRTC_CMDID_SET_IRQ_ENABLE DPRTC_CMD(0x012) #define DPRTC_CMDID_GET_IRQ_ENABLE DPRTC_CMD(0x013) -#define DPRTC_CMDID_SET_IRQ_MASK DPRTC_CMD(0x014) +#define DPRTC_CMDID_SET_IRQ_MASK DPRTC_CMD_V2(0x014) #define DPRTC_CMDID_GET_IRQ_MASK DPRTC_CMD(0x015) #define DPRTC_CMDID_GET_IRQ_STATUS DPRTC_CMD(0x016) #define DPRTC_CMDID_CLEAR_IRQ_STATUS DPRTC_CMD(0x017) diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.h b/drivers/net/ethernet/freescale/dpaa2/dprtc.h index 311c184e1aef..05c413719e55 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dprtc.h +++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.h @@ -20,6 +20,8 @@ struct fsl_mc_io; #define DPRTC_IRQ_INDEX 0 #define DPRTC_EVENT_PPS 0x08000000 +#define DPRTC_EVENT_ETS1 0x00800000 +#define DPRTC_EVENT_ETS2 0x00400000 int dprtc_open(struct fsl_mc_io *mc_io, u32 cmd_flags, diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index edad4ca46327..fe942de19597 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -2,6 +2,7 @@ config FSL_ENETC tristate "ENETC PF driver" depends on PCI && PCI_MSI && (ARCH_LAYERSCAPE || COMPILE_TEST) + select FSL_ENETC_MDIO select PHYLIB help This driver supports NXP ENETC gigabit ethernet controller PCIe diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile index d0db33e5b6b7..74f7ac253b8b 100644 --- a/drivers/net/ethernet/freescale/enetc/Makefile +++ b/drivers/net/ethernet/freescale/enetc/Makefile @@ -3,7 +3,7 @@ common-objs := enetc.o enetc_cbdr.o enetc_ethtool.o obj-$(CONFIG_FSL_ENETC) += fsl-enetc.o -fsl-enetc-y := enetc_pf.o enetc_mdio.o $(common-objs) +fsl-enetc-y := enetc_pf.o $(common-objs) fsl-enetc-$(CONFIG_PCI_IOV) += enetc_msg.o fsl-enetc-$(CONFIG_FSL_ENETC_QOS) += enetc_qos.o diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 17739906c966..1f79e36116a3 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -149,11 +149,21 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, if (enetc_tx_csum(skb, &temp_bd)) flags |= ENETC_TXBD_FLAGS_CSUM | ENETC_TXBD_FLAGS_L4CS; + else if (tx_ring->tsd_enable) + flags |= ENETC_TXBD_FLAGS_TSE | ENETC_TXBD_FLAGS_TXSTART; /* first BD needs frm_len and offload flags set */ temp_bd.frm_len = cpu_to_le16(skb->len); temp_bd.flags = flags; + if (flags & ENETC_TXBD_FLAGS_TSE) { + u32 temp; + + temp = (skb->skb_mstamp_ns >> 5 & ENETC_TXBD_TXSTART_MASK) + | (flags << ENETC_TXBD_FLAGS_OFFSET); + temp_bd.txstart = cpu_to_le32(temp); + } + if (flags & ENETC_TXBD_FLAGS_EX) { u8 e_flags = 0; *txbd = temp_bd; @@ -227,6 +237,8 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, enetc_bdr_idx_inc(tx_ring, &i); tx_ring->next_to_use = i; + skb_tx_timestamp(skb); + /* let H/W know BD ring has been updated */ enetc_wr_reg(tx_ring->tpir, i); /* includes wmb() */ @@ -1503,6 +1515,8 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type, return enetc_setup_tc_taprio(ndev, type_data); case TC_SETUP_QDISC_CBS: return enetc_setup_tc_cbs(ndev, type_data); + case TC_SETUP_QDISC_ETF: + return enetc_setup_tc_txtime(ndev, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 7ee0da6d0015..dd4a227ffc7a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -72,6 +72,7 @@ struct enetc_bdr { struct enetc_ring_stats stats; dma_addr_t bd_dma_base; + u8 tsd_enable; /* Time specific departure */ } ____cacheline_aligned_in_smp; static inline void enetc_bdr_idx_inc(struct enetc_bdr *bdr, int *i) @@ -256,8 +257,10 @@ int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd); int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data); void enetc_sched_speed_set(struct net_device *ndev); int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data); +int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data); #else #define enetc_setup_tc_taprio(ndev, type_data) -EOPNOTSUPP #define enetc_sched_speed_set(ndev) (void)0 #define enetc_setup_tc_cbs(ndev, type_data) -EOPNOTSUPP +#define enetc_setup_tc_txtime(ndev, type_data) -EOPNOTSUPP #endif diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 880a8ed8bb47..301ee0dde02d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -579,6 +579,7 @@ static int enetc_get_ts_info(struct net_device *ndev, (1 << HWTSTAMP_FILTER_ALL); #else info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; #endif return 0; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 51f543ef37a8..62554f28ce07 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -200,6 +200,7 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PFPMR 0x1900 #define ENETC_PFPMR_PMACE BIT(1) #define ENETC_PFPMR_MWLM BIT(0) +#define ENETC_EMDIO_BASE 0x1c00 #define ENETC_PSIUMHFR0(n, err) (((err) ? 0x1d08 : 0x1d00) + (n) * 0x10) #define ENETC_PSIUMHFR1(n) (0x1d04 + (n) * 0x10) #define ENETC_PSIMMHFR0(n, err) (((err) ? 0x1d00 : 0x1d08) + (n) * 0x10) @@ -358,6 +359,7 @@ union enetc_tx_bd { u8 l4_csoff; u8 flags; }; /* default layout */ + __le32 txstart; __le32 lstatus; }; }; @@ -378,11 +380,14 @@ union enetc_tx_bd { }; #define ENETC_TXBD_FLAGS_L4CS BIT(0) +#define ENETC_TXBD_FLAGS_TSE BIT(1) #define ENETC_TXBD_FLAGS_W BIT(2) #define ENETC_TXBD_FLAGS_CSUM BIT(3) +#define ENETC_TXBD_FLAGS_TXSTART BIT(4) #define ENETC_TXBD_FLAGS_EX BIT(6) #define ENETC_TXBD_FLAGS_F BIT(7) - +#define ENETC_TXBD_TXSTART_MASK GENMASK(24, 0) +#define ENETC_TXBD_FLAGS_OFFSET 24 static inline void enetc_clear_tx_bd(union enetc_tx_bd *txbd) { memset(txbd, 0, sizeof(*txbd)); @@ -615,3 +620,7 @@ struct enetc_cbd { /* Port time gating capability register */ #define ENETC_QBV_PTGCAPR_OFFSET 0x11a08 #define ENETC_QBV_MAX_GCL_LEN_MASK GENMASK(15, 0) + +/* Port time specific departure */ +#define ENETC_PTCTSDR(n) (0x1210 + 4 * (n)) +#define ENETC_TSDE BIT(31) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c index 149883c8f0b8..48c32a171afa 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c @@ -1,41 +1,56 @@ // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) /* Copyright 2019 NXP */ +#include <linux/fsl/enetc_mdio.h> #include <linux/mdio.h> #include <linux/of_mdio.h> #include <linux/iopoll.h> #include <linux/of.h> -#include "enetc_mdio.h" +#include "enetc_pf.h" -#define ENETC_MDIO_REG_OFFSET 0x1c00 #define ENETC_MDIO_CFG 0x0 /* MDIO configuration and status */ #define ENETC_MDIO_CTL 0x4 /* MDIO control */ #define ENETC_MDIO_DATA 0x8 /* MDIO data */ #define ENETC_MDIO_ADDR 0xc /* MDIO address */ -#define enetc_mdio_rd(hw, off) \ - enetc_port_rd(hw, ENETC_##off + ENETC_MDIO_REG_OFFSET) -#define enetc_mdio_wr(hw, off, val) \ - enetc_port_wr(hw, ENETC_##off + ENETC_MDIO_REG_OFFSET, val) -#define enetc_mdio_rd_reg(off) enetc_mdio_rd(hw, off) +static inline u32 _enetc_mdio_rd(struct enetc_mdio_priv *mdio_priv, int off) +{ + return enetc_port_rd(mdio_priv->hw, mdio_priv->mdio_base + off); +} + +static inline void _enetc_mdio_wr(struct enetc_mdio_priv *mdio_priv, int off, + u32 val) +{ + enetc_port_wr(mdio_priv->hw, mdio_priv->mdio_base + off, val); +} -#define ENETC_MDC_DIV 258 +#define enetc_mdio_rd(mdio_priv, off) \ + _enetc_mdio_rd(mdio_priv, ENETC_##off) +#define enetc_mdio_wr(mdio_priv, off, val) \ + _enetc_mdio_wr(mdio_priv, ENETC_##off, val) +#define enetc_mdio_rd_reg(off) enetc_mdio_rd(mdio_priv, off) #define MDIO_CFG_CLKDIV(x) ((((x) >> 1) & 0xff) << 8) #define MDIO_CFG_BSY BIT(0) #define MDIO_CFG_RD_ER BIT(1) +#define MDIO_CFG_HOLD(x) (((x) << 2) & GENMASK(4, 2)) #define MDIO_CFG_ENC45 BIT(6) /* external MDIO only - driven on neg MDC edge */ #define MDIO_CFG_NEG BIT(23) +#define ENETC_EMDIO_CFG \ + (MDIO_CFG_HOLD(2) | \ + MDIO_CFG_CLKDIV(258) | \ + MDIO_CFG_NEG) + #define MDIO_CTL_DEV_ADDR(x) ((x) & 0x1f) #define MDIO_CTL_PORT_ADDR(x) (((x) & 0x1f) << 5) #define MDIO_CTL_READ BIT(15) #define MDIO_DATA(x) ((x) & 0xffff) #define TIMEOUT 1000 -static int enetc_mdio_wait_complete(struct enetc_hw *hw) +static int enetc_mdio_wait_complete(struct enetc_mdio_priv *mdio_priv) { u32 val; @@ -46,12 +61,11 @@ static int enetc_mdio_wait_complete(struct enetc_hw *hw) int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value) { struct enetc_mdio_priv *mdio_priv = bus->priv; - struct enetc_hw *hw = mdio_priv->hw; u32 mdio_ctl, mdio_cfg; u16 dev_addr; int ret; - mdio_cfg = MDIO_CFG_CLKDIV(ENETC_MDC_DIV) | MDIO_CFG_NEG; + mdio_cfg = ENETC_EMDIO_CFG; if (regnum & MII_ADDR_C45) { dev_addr = (regnum >> 16) & 0x1f; mdio_cfg |= MDIO_CFG_ENC45; @@ -61,44 +75,44 @@ int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value) mdio_cfg &= ~MDIO_CFG_ENC45; } - enetc_mdio_wr(hw, MDIO_CFG, mdio_cfg); + enetc_mdio_wr(mdio_priv, MDIO_CFG, mdio_cfg); - ret = enetc_mdio_wait_complete(hw); + ret = enetc_mdio_wait_complete(mdio_priv); if (ret) return ret; /* set port and dev addr */ mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr); - enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl); + enetc_mdio_wr(mdio_priv, MDIO_CTL, mdio_ctl); /* set the register address */ if (regnum & MII_ADDR_C45) { - enetc_mdio_wr(hw, MDIO_ADDR, regnum & 0xffff); + enetc_mdio_wr(mdio_priv, MDIO_ADDR, regnum & 0xffff); - ret = enetc_mdio_wait_complete(hw); + ret = enetc_mdio_wait_complete(mdio_priv); if (ret) return ret; } /* write the value */ - enetc_mdio_wr(hw, MDIO_DATA, MDIO_DATA(value)); + enetc_mdio_wr(mdio_priv, MDIO_DATA, MDIO_DATA(value)); - ret = enetc_mdio_wait_complete(hw); + ret = enetc_mdio_wait_complete(mdio_priv); if (ret) return ret; return 0; } +EXPORT_SYMBOL_GPL(enetc_mdio_write); int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum) { struct enetc_mdio_priv *mdio_priv = bus->priv; - struct enetc_hw *hw = mdio_priv->hw; u32 mdio_ctl, mdio_cfg; u16 dev_addr, value; int ret; - mdio_cfg = MDIO_CFG_CLKDIV(ENETC_MDC_DIV) | MDIO_CFG_NEG; + mdio_cfg = ENETC_EMDIO_CFG; if (regnum & MII_ADDR_C45) { dev_addr = (regnum >> 16) & 0x1f; mdio_cfg |= MDIO_CFG_ENC45; @@ -107,86 +121,56 @@ int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum) mdio_cfg &= ~MDIO_CFG_ENC45; } - enetc_mdio_wr(hw, MDIO_CFG, mdio_cfg); + enetc_mdio_wr(mdio_priv, MDIO_CFG, mdio_cfg); - ret = enetc_mdio_wait_complete(hw); + ret = enetc_mdio_wait_complete(mdio_priv); if (ret) return ret; /* set port and device addr */ mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr); - enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl); + enetc_mdio_wr(mdio_priv, MDIO_CTL, mdio_ctl); /* set the register address */ if (regnum & MII_ADDR_C45) { - enetc_mdio_wr(hw, MDIO_ADDR, regnum & 0xffff); + enetc_mdio_wr(mdio_priv, MDIO_ADDR, regnum & 0xffff); - ret = enetc_mdio_wait_complete(hw); + ret = enetc_mdio_wait_complete(mdio_priv); if (ret) return ret; } /* initiate the read */ - enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl | MDIO_CTL_READ); + enetc_mdio_wr(mdio_priv, MDIO_CTL, mdio_ctl | MDIO_CTL_READ); - ret = enetc_mdio_wait_complete(hw); + ret = enetc_mdio_wait_complete(mdio_priv); if (ret) return ret; /* return all Fs if nothing was there */ - if (enetc_mdio_rd(hw, MDIO_CFG) & MDIO_CFG_RD_ER) { + if (enetc_mdio_rd(mdio_priv, MDIO_CFG) & MDIO_CFG_RD_ER) { dev_dbg(&bus->dev, "Error while reading PHY%d reg at %d.%hhu\n", phy_id, dev_addr, regnum); return 0xffff; } - value = enetc_mdio_rd(hw, MDIO_DATA) & 0xffff; + value = enetc_mdio_rd(mdio_priv, MDIO_DATA) & 0xffff; return value; } +EXPORT_SYMBOL_GPL(enetc_mdio_read); -int enetc_mdio_probe(struct enetc_pf *pf) +struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs) { - struct device *dev = &pf->si->pdev->dev; - struct enetc_mdio_priv *mdio_priv; - struct device_node *np; - struct mii_bus *bus; - int err; - - bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv)); - if (!bus) - return -ENOMEM; - - bus->name = "Freescale ENETC MDIO Bus"; - bus->read = enetc_mdio_read; - bus->write = enetc_mdio_write; - bus->parent = dev; - mdio_priv = bus->priv; - mdio_priv->hw = &pf->si->hw; - snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev)); - - np = of_get_child_by_name(dev->of_node, "mdio"); - if (!np) { - dev_err(dev, "MDIO node missing\n"); - return -EINVAL; - } - - err = of_mdiobus_register(bus, np); - if (err) { - of_node_put(np); - dev_err(dev, "cannot register MDIO bus\n"); - return err; - } + struct enetc_hw *hw; - of_node_put(np); - pf->mdio = bus; + hw = devm_kzalloc(dev, sizeof(*hw), GFP_KERNEL); + if (!hw) + return ERR_PTR(-ENOMEM); - return 0; -} + hw->port = port_regs; -void enetc_mdio_remove(struct enetc_pf *pf) -{ - if (pf->mdio) - mdiobus_unregister(pf->mdio); + return hw; } +EXPORT_SYMBOL_GPL(enetc_hw_alloc); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.h b/drivers/net/ethernet/freescale/enetc/enetc_mdio.h deleted file mode 100644 index 60c9a3889824..000000000000 --- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ -/* Copyright 2019 NXP */ - -#include <linux/phy.h> -#include "enetc_pf.h" - -struct enetc_mdio_priv { - struct enetc_hw *hw; -}; - -int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value); -int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c index fbd41ce01f06..ebc635f8a4cc 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) /* Copyright 2019 NXP */ +#include <linux/fsl/enetc_mdio.h> #include <linux/of_mdio.h> -#include "enetc_mdio.h" +#include "enetc_pf.h" #define ENETC_MDIO_DEV_ID 0xee01 #define ENETC_MDIO_DEV_NAME "FSL PCIe IE Central MDIO" @@ -13,17 +14,29 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev, { struct enetc_mdio_priv *mdio_priv; struct device *dev = &pdev->dev; + void __iomem *port_regs; struct enetc_hw *hw; struct mii_bus *bus; int err; - hw = devm_kzalloc(dev, sizeof(*hw), GFP_KERNEL); - if (!hw) - return -ENOMEM; + port_regs = pci_iomap(pdev, 0, 0); + if (!port_regs) { + dev_err(dev, "iomap failed\n"); + err = -ENXIO; + goto err_ioremap; + } + + hw = enetc_hw_alloc(dev, port_regs); + if (IS_ERR(hw)) { + err = PTR_ERR(hw); + goto err_hw_alloc; + } bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv)); - if (!bus) - return -ENOMEM; + if (!bus) { + err = -ENOMEM; + goto err_mdiobus_alloc; + } bus->name = ENETC_MDIO_BUS_NAME; bus->read = enetc_mdio_read; @@ -31,13 +44,14 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev, bus->parent = dev; mdio_priv = bus->priv; mdio_priv->hw = hw; + mdio_priv->mdio_base = ENETC_EMDIO_BASE; snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev)); pcie_flr(pdev); err = pci_enable_device_mem(pdev); if (err) { dev_err(dev, "device enable failed\n"); - return err; + goto err_pci_enable; } err = pci_request_region(pdev, 0, KBUILD_MODNAME); @@ -46,13 +60,6 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev, goto err_pci_mem_reg; } - hw->port = pci_iomap(pdev, 0, 0); - if (!hw->port) { - err = -ENXIO; - dev_err(dev, "iomap failed\n"); - goto err_ioremap; - } - err = of_mdiobus_register(bus, dev->of_node); if (err) goto err_mdiobus_reg; @@ -62,12 +69,14 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev, return 0; err_mdiobus_reg: - iounmap(mdio_priv->hw->port); -err_ioremap: pci_release_mem_regions(pdev); err_pci_mem_reg: pci_disable_device(pdev); - +err_pci_enable: +err_mdiobus_alloc: + iounmap(port_regs); +err_hw_alloc: +err_ioremap: return err; } diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index e7482d483b28..fc0d7d99e9a1 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -2,6 +2,7 @@ /* Copyright 2017-2019 NXP */ #include <linux/module.h> +#include <linux/fsl/enetc_mdio.h> #include <linux/of_mdio.h> #include <linux/of_net.h> #include "enetc_pf.h" @@ -749,6 +750,52 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, enetc_get_primary_mac_addr(&si->hw, ndev->dev_addr); } +static int enetc_mdio_probe(struct enetc_pf *pf) +{ + struct device *dev = &pf->si->pdev->dev; + struct enetc_mdio_priv *mdio_priv; + struct device_node *np; + struct mii_bus *bus; + int err; + + bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv)); + if (!bus) + return -ENOMEM; + + bus->name = "Freescale ENETC MDIO Bus"; + bus->read = enetc_mdio_read; + bus->write = enetc_mdio_write; + bus->parent = dev; + mdio_priv = bus->priv; + mdio_priv->hw = &pf->si->hw; + mdio_priv->mdio_base = ENETC_EMDIO_BASE; + snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev)); + + np = of_get_child_by_name(dev->of_node, "mdio"); + if (!np) { + dev_err(dev, "MDIO node missing\n"); + return -EINVAL; + } + + err = of_mdiobus_register(bus, np); + if (err) { + of_node_put(np); + dev_err(dev, "cannot register MDIO bus\n"); + return err; + } + + of_node_put(np); + pf->mdio = bus; + + return 0; +} + +static void enetc_mdio_remove(struct enetc_pf *pf) +{ + if (pf->mdio) + mdiobus_unregister(pf->mdio); +} + static int enetc_of_get_phy(struct enetc_ndev_priv *priv) { struct enetc_pf *pf = enetc_si_priv(priv->si); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.h b/drivers/net/ethernet/freescale/enetc/enetc_pf.h index 10dd1b53bb08..59e65a6f6c3e 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.h @@ -49,7 +49,3 @@ struct enetc_pf { int enetc_msg_psi_init(struct enetc_pf *pf); void enetc_msg_psi_free(struct enetc_pf *pf); void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int mbox_id, u16 *status); - -/* MDIO */ -int enetc_mdio_probe(struct enetc_pf *pf); -void enetc_mdio_remove(struct enetc_pf *pf); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 2e99438cb1bf..0c6bf3a55a9a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -36,7 +36,6 @@ void enetc_sched_speed_set(struct net_device *ndev) case SPEED_10: default: pspeed = ENETC_PMR_PSPEED_10M; - netdev_err(ndev, "Qbv PSPEED set speed link down.\n"); } priv->speed = speed; @@ -156,6 +155,11 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) int err; int i; + /* TSD and Qbv are mutually exclusive in hardware */ + for (i = 0; i < priv->num_tx_rings; i++) + if (priv->tx_ring[i]->tsd_enable) + return -EBUSY; + for (i = 0; i < priv->num_tx_rings; i++) enetc_set_bdr_prio(&priv->si->hw, priv->tx_ring[i]->index, @@ -192,7 +196,6 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) u32 hi_credit_bit, hi_credit_reg; u32 max_interference_size; u32 port_frame_max_size; - u32 tc_max_sized_frame; u8 tc = cbs->queue; u8 prio_top, prio_next; int bw_sum = 0; @@ -250,7 +253,7 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) return -EINVAL; } - tc_max_sized_frame = enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc)); + enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc)); /* For top prio TC, the max_interfrence_size is maxSizedFrame. * @@ -298,3 +301,33 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) return 0; } + +int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct tc_etf_qopt_offload *qopt = type_data; + u8 tc_nums = netdev_get_num_tc(ndev); + int tc; + + if (!tc_nums) + return -EOPNOTSUPP; + + tc = qopt->queue; + + if (tc < 0 || tc >= priv->num_tx_rings) + return -EINVAL; + + /* Do not support TXSTART and TX CSUM offload simutaniously */ + if (ndev->features & NETIF_F_CSUM_MASK) + return -EBUSY; + + /* TSD and Qbv are mutually exclusive in hardware */ + if (enetc_rd(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET) & ENETC_QBV_TGE) + return -EBUSY; + + priv->tx_ring[tc]->tsd_enable = qopt->enable; + enetc_port_wr(&priv->si->hw, ENETC_PTCTSDR(tc), + qopt->enable ? ENETC_TSDE : 0); + + return 0; +} diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 9294027e9d90..4432a59904c7 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1141,7 +1141,7 @@ fec_stop(struct net_device *ndev) static void -fec_timeout(struct net_device *ndev) +fec_timeout(struct net_device *ndev, unsigned int txqueue) { struct fec_enet_private *fep = netdev_priv(ndev); diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index 30cdb246d020..de5278485062 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -84,7 +84,7 @@ static int debug = -1; /* the above default */ module_param(debug, int, 0); MODULE_PARM_DESC(debug, "debugging messages level"); -static void mpc52xx_fec_tx_timeout(struct net_device *dev) +static void mpc52xx_fec_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mpc52xx_fec_priv *priv = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index f0806ace1ae2..55f2122c3217 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -692,7 +692,7 @@ static int mac_probe(struct platform_device *_of_dev) mac_dev->res = __devm_request_region(dev, fman_get_mem_region(priv->fman), - res.start, res.end + 1 - res.start, + res.start, resource_size(&res), "mac"); if (!mac_dev->res) { dev_err(dev, "__devm_request_mem_region(mac) failed\n"); @@ -701,7 +701,7 @@ static int mac_probe(struct platform_device *_of_dev) } priv->vaddr = devm_ioremap(dev, mac_dev->res->start, - mac_dev->res->end + 1 - mac_dev->res->start); + resource_size(mac_dev->res)); if (!priv->vaddr) { dev_err(dev, "devm_ioremap() failed\n"); err = -EIO; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 3981c06f082f..80903cd58468 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -641,7 +641,7 @@ static void fs_timeout_work(struct work_struct *work) netif_wake_queue(dev); } -static void fs_timeout(struct net_device *dev) +static void fs_timeout(struct net_device *dev, unsigned int txqueue) { struct fs_enet_private *fep = netdev_priv(dev); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 72868a28b621..f7e5cafe89a9 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2093,7 +2093,7 @@ static void gfar_reset_task(struct work_struct *work) reset_gfar(priv->ndev); } -static void gfar_timeout(struct net_device *dev) +static void gfar_timeout(struct net_device *dev, unsigned int txqueue) { struct gfar_private *priv = netdev_priv(dev); @@ -2205,13 +2205,17 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) skb_dirtytx = tx_queue->skb_dirtytx; while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) { + bool do_tstamp; + + do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en; frags = skb_shinfo(skb)->nr_frags; /* When time stamping, one additional TxBD must be freed. * Also, we need to dma_unmap_single() the TxPAL. */ - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) + if (unlikely(do_tstamp)) nr_txbds = frags + 2; else nr_txbds = frags + 1; @@ -2225,7 +2229,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) (lstatus & BD_LENGTH_MASK)) break; - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(do_tstamp)) { next = next_txbd(bdp, base, tx_ring_size); buflen = be16_to_cpu(next->length) + GMAC_FCB_LEN + GMAC_TXPAL_LEN; @@ -2235,7 +2239,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr), buflen, DMA_TO_DEVICE); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(do_tstamp)) { struct skb_shared_hwtstamps shhwtstamps; u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) & ~0x7UL); diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index f839fa94ebdd..0d101c00286f 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3545,7 +3545,7 @@ static void ucc_geth_timeout_work(struct work_struct *work) * ucc_geth_timeout gets called when a packet has not been * transmitted after a set amount of time. */ -static void ucc_geth_timeout(struct net_device *dev) +static void ucc_geth_timeout(struct net_device *dev, unsigned int txqueue) { struct ucc_geth_private *ugeth = netdev_priv(dev); diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index 1eca0fdb9933..a7b7a4aace79 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -93,7 +93,7 @@ static irqreturn_t fjn_interrupt(int irq, void *dev_id); static void fjn_rx(struct net_device *dev); static void fjn_reset(struct net_device *dev); static void set_rx_mode(struct net_device *dev); -static void fjn_tx_timeout(struct net_device *dev); +static void fjn_tx_timeout(struct net_device *dev, unsigned int txqueue); static const struct ethtool_ops netdev_ethtool_ops; /* @@ -774,7 +774,7 @@ static irqreturn_t fjn_interrupt(int dummy, void *dev_id) /*====================================================================*/ -static void fjn_tx_timeout(struct net_device *dev) +static void fjn_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct local_info *lp = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 9b7a8db9860f..e032563ceefd 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -845,7 +845,7 @@ static void gve_turnup(struct gve_priv *priv) gve_set_napi_enabled(priv); } -static void gve_tx_timeout(struct net_device *dev) +static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gve_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 150a8ccfb8b1..d9718b87279d 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -779,7 +779,7 @@ static int hip04_mac_stop(struct net_device *ndev) return 0; } -static void hip04_timeout(struct net_device *ndev) +static void hip04_timeout(struct net_device *ndev, unsigned int txqueue) { struct hip04_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 247de9105d10..4fb776920a93 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -893,7 +893,7 @@ static void hix5hd2_tx_timeout_task(struct work_struct *work) hix5hd2_net_open(priv->netdev); } -static void hix5hd2_net_timeout(struct net_device *dev) +static void hix5hd2_net_timeout(struct net_device *dev, unsigned int txqueue) { struct hix5hd2_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index eb69e5c81a4d..2fbb476584bc 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1483,7 +1483,7 @@ static int hns_nic_net_stop(struct net_device *ndev) static void hns_tx_timeout_reset(struct hns_nic_priv *priv); #define HNS_TX_TIMEO_LIMIT (40 * HZ) -static void hns_nic_net_timeout(struct net_device *ndev) +static void hns_nic_net_timeout(struct net_device *ndev, unsigned int txqueue) { struct hns_nic_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/hisilicon/hns3/Makefile b/drivers/net/ethernet/hisilicon/hns3/Makefile index d01bf536eb86..7aa2fac76c5e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/Makefile @@ -3,6 +3,8 @@ # Makefile for the HISILICON network device drivers. # +ccflags-y += -I$(srctree)/$(src) + obj-$(CONFIG_HNS3) += hns3pf/ obj-$(CONFIG_HNS3) += hns3vf/ diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 3b5e2d7251e7..a3e4081b84ba 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -164,11 +164,7 @@ enum hnae3_reset_type { HNAE3_IMP_RESET, HNAE3_UNKNOWN_RESET, HNAE3_NONE_RESET, -}; - -enum hnae3_flr_state { - HNAE3_FLR_DOWN, - HNAE3_FLR_DONE, + HNAE3_MAX_RESET, }; enum hnae3_port_base_vlan_state { @@ -575,8 +571,7 @@ struct hnae3_ae_algo { const struct pci_device_id *pdev_id_table; }; -#define HNAE3_INT_NAME_EXT_LEN 32 /* Max extra information length */ -#define HNAE3_INT_NAME_LEN (IFNAMSIZ + HNAE3_INT_NAME_EXT_LEN) +#define HNAE3_INT_NAME_LEN 32 #define HNAE3_ITR_COUNTDOWN_START 100 struct hnae3_tc_info { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index b3deb5e5ce29..914a7630ac48 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -24,6 +24,12 @@ #include "hnae3.h" #include "hns3_enet.h" +/* All hns3 tracepoints are defined by the include below, which + * must be included exactly once across the whole kernel with + * CREATE_TRACE_POINTS defined + */ +#define CREATE_TRACE_POINTS +#include "hns3_trace.h" #define hns3_set_field(origin, shift, val) ((origin) |= ((val) << (shift))) #define hns3_tx_bd_count(S) DIV_ROUND_UP(S, HNS3_MAX_BD_SIZE) @@ -129,18 +135,21 @@ static int hns3_nic_init_irq(struct hns3_nic_priv *priv) continue; if (tqp_vectors->tx_group.ring && tqp_vectors->rx_group.ring) { - snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1, - "%s-%s-%d", priv->netdev->name, "TxRx", - txrx_int_idx++); + snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN, + "%s-%s-%s-%d", hns3_driver_name, + pci_name(priv->ae_handle->pdev), + "TxRx", txrx_int_idx++); txrx_int_idx++; } else if (tqp_vectors->rx_group.ring) { - snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1, - "%s-%s-%d", priv->netdev->name, "Rx", - rx_int_idx++); + snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN, + "%s-%s-%s-%d", hns3_driver_name, + pci_name(priv->ae_handle->pdev), + "Rx", rx_int_idx++); } else if (tqp_vectors->tx_group.ring) { - snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1, - "%s-%s-%d", priv->netdev->name, "Tx", - tx_int_idx++); + snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN, + "%s-%s-%s-%d", hns3_driver_name, + pci_name(priv->ae_handle->pdev), + "Tx", tx_int_idx++); } else { /* Skip this unused q_vector */ continue; @@ -157,6 +166,8 @@ static int hns3_nic_init_irq(struct hns3_nic_priv *priv) return ret; } + disable_irq(tqp_vectors->vector_irq); + irq_set_affinity_hint(tqp_vectors->vector_irq, &tqp_vectors->affinity_mask); @@ -175,6 +186,7 @@ static void hns3_mask_vector_irq(struct hns3_enet_tqp_vector *tqp_vector, static void hns3_vector_enable(struct hns3_enet_tqp_vector *tqp_vector) { napi_enable(&tqp_vector->napi); + enable_irq(tqp_vector->vector_irq); /* enable vector */ hns3_mask_vector_irq(tqp_vector, 1); @@ -374,18 +386,6 @@ static int hns3_nic_net_up(struct net_device *netdev) if (ret) return ret; - /* the device can work without cpu rmap, only aRFS needs it */ - ret = hns3_set_rx_cpu_rmap(netdev); - if (ret) - netdev_warn(netdev, "set rx cpu rmap fail, ret=%d!\n", ret); - - /* get irq resource for all vectors */ - ret = hns3_nic_init_irq(priv); - if (ret) { - netdev_err(netdev, "init irq failed! ret=%d\n", ret); - goto free_rmap; - } - clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); /* enable the vectors */ @@ -398,22 +398,15 @@ static int hns3_nic_net_up(struct net_device *netdev) /* start the ae_dev */ ret = h->ae_algo->ops->start ? h->ae_algo->ops->start(h) : 0; - if (ret) - goto out_start_err; - - return 0; - -out_start_err: - set_bit(HNS3_NIC_STATE_DOWN, &priv->state); - while (j--) - hns3_tqp_disable(h->kinfo.tqp[j]); + if (ret) { + set_bit(HNS3_NIC_STATE_DOWN, &priv->state); + while (j--) + hns3_tqp_disable(h->kinfo.tqp[j]); - for (j = i - 1; j >= 0; j--) - hns3_vector_disable(&priv->tqp_vector[j]); + for (j = i - 1; j >= 0; j--) + hns3_vector_disable(&priv->tqp_vector[j]); + } - hns3_nic_uninit_irq(priv); -free_rmap: - hns3_free_rx_cpu_rmap(netdev); return ret; } @@ -510,11 +503,6 @@ static void hns3_nic_net_down(struct net_device *netdev) if (ops->stop) ops->stop(priv->ae_handle); - hns3_free_rx_cpu_rmap(netdev); - - /* free irq resources */ - hns3_nic_uninit_irq(priv); - /* delay ring buffer clearing to hns3_reset_notify_uninit_enet * during reset process, because driver may not be able * to disable the ring through firmware when downing the netdev. @@ -736,6 +724,8 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen, /* get MSS for TSO */ *mss = skb_shinfo(skb)->gso_size; + trace_hns3_tso(skb); + return 0; } @@ -1140,6 +1130,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, desc->tx.bdtp_fe_sc_vld_ra_ri = cpu_to_le16(BIT(HNS3_TXD_VLD_B)); + trace_hns3_tx_desc(ring, ring->next_to_use); ring_ptr_move_fw(ring, next_to_use); return HNS3_LIKELY_BD_NUM; } @@ -1163,6 +1154,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, desc->tx.bdtp_fe_sc_vld_ra_ri = cpu_to_le16(BIT(HNS3_TXD_VLD_B)); + trace_hns3_tx_desc(ring, ring->next_to_use); /* move ring pointer to next */ ring_ptr_move_fw(ring, next_to_use); @@ -1288,6 +1280,14 @@ static bool hns3_skb_need_linearized(struct sk_buff *skb, unsigned int *bd_size, return false; } +void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size) +{ + int i = 0; + + for (i = 0; i < MAX_SKB_FRAGS; i++) + size[i] = skb_frag_size(&shinfo->frags[i]); +} + static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, struct net_device *netdev, struct sk_buff *skb) @@ -1299,8 +1299,10 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, bd_num = hns3_tx_bd_num(skb, bd_size); if (unlikely(bd_num > HNS3_MAX_NON_TSO_BD_NUM)) { if (bd_num <= HNS3_MAX_TSO_BD_NUM && skb_is_gso(skb) && - !hns3_skb_need_linearized(skb, bd_size, bd_num)) + !hns3_skb_need_linearized(skb, bd_size, bd_num)) { + trace_hns3_over_8bd(skb); goto out; + } if (__skb_linearize(skb)) return -ENOMEM; @@ -1308,8 +1310,10 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, bd_num = hns3_tx_bd_count(skb->len); if ((skb_is_gso(skb) && bd_num > HNS3_MAX_TSO_BD_NUM) || (!skb_is_gso(skb) && - bd_num > HNS3_MAX_NON_TSO_BD_NUM)) + bd_num > HNS3_MAX_NON_TSO_BD_NUM)) { + trace_hns3_over_8bd(skb); return -ENOMEM; + } u64_stats_update_begin(&ring->syncp); ring->stats.tx_copy++; @@ -1454,6 +1458,7 @@ out: (ring->desc_num - 1); ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |= cpu_to_le16(BIT(HNS3_TXD_FE_B)); + trace_hns3_tx_desc(ring, pre_ntu); /* Complete translate all packets */ dev_queue = netdev_get_tx_queue(netdev, ring->queue_index); @@ -1562,6 +1567,37 @@ static int hns3_nic_set_features(struct net_device *netdev, return 0; } +static netdev_features_t hns3_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ +#define HNS3_MAX_HDR_LEN 480U +#define HNS3_MAX_L4_HDR_LEN 60U + + size_t len; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return features; + + if (skb->encapsulation) + len = skb_inner_transport_header(skb) - skb->data; + else + len = skb_transport_header(skb) - skb->data; + + /* Assume L4 is 60 byte as TCP is the only protocol with a + * a flexible value, and it's max len is 60 bytes. + */ + len += HNS3_MAX_L4_HDR_LEN; + + /* Hardware only supports checksum on the skb with a max header + * len of 480 bytes. + */ + if (len > HNS3_MAX_HDR_LEN) + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + + return features; +} + static void hns3_nic_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { @@ -1875,7 +1911,7 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev) return true; } -static void hns3_nic_net_timeout(struct net_device *ndev) +static void hns3_nic_net_timeout(struct net_device *ndev, unsigned int txqueue) { struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; @@ -1976,6 +2012,7 @@ static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_do_ioctl = hns3_nic_do_ioctl, .ndo_change_mtu = hns3_nic_change_mtu, .ndo_set_features = hns3_nic_set_features, + .ndo_features_check = hns3_features_check, .ndo_get_stats64 = hns3_nic_get_stats64, .ndo_setup_tc = hns3_nic_setup_tc, .ndo_set_rx_mode = hns3_nic_set_rx_mode, @@ -2674,6 +2711,9 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info) skb->csum_start = (unsigned char *)th - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; + + trace_hns3_gro(skb); + return 0; } @@ -2794,7 +2834,6 @@ static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring, static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, unsigned char *va) { -#define HNS3_NEED_ADD_FRAG 1 struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_clean]; struct net_device *netdev = ring_to_netdev(ring); struct sk_buff *skb; @@ -2811,6 +2850,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, return -ENOMEM; } + trace_hns3_rx_desc(ring); prefetchw(skb->data); ring->pending_buf = 1; @@ -2838,33 +2878,19 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, desc_cb); ring_ptr_move_fw(ring, next_to_clean); - return HNS3_NEED_ADD_FRAG; + return 0; } -static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc, - bool pending) +static int hns3_add_frag(struct hns3_enet_ring *ring) { struct sk_buff *skb = ring->skb; struct sk_buff *head_skb = skb; struct sk_buff *new_skb; struct hns3_desc_cb *desc_cb; - struct hns3_desc *pre_desc; + struct hns3_desc *desc; u32 bd_base_info; - int pre_bd; - /* if there is pending bd, the SW param next_to_clean has moved - * to next and the next is NULL - */ - if (pending) { - pre_bd = (ring->next_to_clean - 1 + ring->desc_num) % - ring->desc_num; - pre_desc = &ring->desc[pre_bd]; - bd_base_info = le32_to_cpu(pre_desc->rx.bd_base_info); - } else { - bd_base_info = le32_to_cpu(desc->rx.bd_base_info); - } - - while (!(bd_base_info & BIT(HNS3_RXD_FE_B))) { + do { desc = &ring->desc[ring->next_to_clean]; desc_cb = &ring->desc_cb[ring->next_to_clean]; bd_base_info = le32_to_cpu(desc->rx.bd_base_info); @@ -2899,9 +2925,10 @@ static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc, } hns3_nic_reuse_page(skb, ring->frag_num++, ring, 0, desc_cb); + trace_hns3_rx_desc(ring); ring_ptr_move_fw(ring, next_to_clean); ring->pending_buf++; - } + } while (!(bd_base_info & BIT(HNS3_RXD_FE_B))); return 0; } @@ -3069,28 +3096,23 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring) if (ret < 0) /* alloc buffer fail */ return ret; - if (ret > 0) { /* need add frag */ - ret = hns3_add_frag(ring, desc, false); + if (!(bd_base_info & BIT(HNS3_RXD_FE_B))) { /* need add frag */ + ret = hns3_add_frag(ring); if (ret) return ret; - - /* As the head data may be changed when GRO enable, copy - * the head data in after other data rx completed - */ - memcpy(skb->data, ring->va, - ALIGN(ring->pull_len, sizeof(long))); } } else { - ret = hns3_add_frag(ring, desc, true); + ret = hns3_add_frag(ring); if (ret) return ret; + } - /* As the head data may be changed when GRO enable, copy - * the head data in after other data rx completed - */ + /* As the head data may be changed when GRO enable, copy + * the head data in after other data rx completed + */ + if (skb->len > HNS3_RX_HEAD_SIZE) memcpy(skb->data, ring->va, ALIGN(ring->pull_len, sizeof(long))); - } ret = hns3_handle_bdinfo(ring, skb); if (unlikely(ret)) { @@ -3596,26 +3618,25 @@ static void hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv) if (!tqp_vector->rx_group.ring && !tqp_vector->tx_group.ring) continue; - hns3_get_vector_ring_chain(tqp_vector, &vector_ring_chain); + /* Since the mapping can be overwritten, when fail to get the + * chain between vector and ring, we should go on to deal with + * the remaining options. + */ + if (hns3_get_vector_ring_chain(tqp_vector, &vector_ring_chain)) + dev_warn(priv->dev, "failed to get ring chain\n"); h->ae_algo->ops->unmap_ring_from_vector(h, tqp_vector->vector_irq, &vector_ring_chain); hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain); - if (tqp_vector->irq_init_flag == HNS3_VECTOR_INITED) { - irq_set_affinity_hint(tqp_vector->vector_irq, NULL); - free_irq(tqp_vector->vector_irq, tqp_vector); - tqp_vector->irq_init_flag = HNS3_VECTOR_NOT_INITED; - } - hns3_clear_ring_group(&tqp_vector->rx_group); hns3_clear_ring_group(&tqp_vector->tx_group); netif_napi_del(&priv->tqp_vector[i].napi); } } -static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv) +static void hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv) { struct hnae3_handle *h = priv->ae_handle; struct pci_dev *pdev = h->pdev; @@ -3627,11 +3648,10 @@ static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv) tqp_vector = &priv->tqp_vector[i]; ret = h->ae_algo->ops->put_vector(h, tqp_vector->vector_irq); if (ret) - return ret; + return; } devm_kfree(&pdev->dev, priv->tqp_vector); - return 0; } static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, @@ -4030,6 +4050,18 @@ static int hns3_client_init(struct hnae3_handle *handle) goto out_reg_netdev_fail; } + /* the device can work without cpu rmap, only aRFS needs it */ + ret = hns3_set_rx_cpu_rmap(netdev); + if (ret) + dev_warn(priv->dev, "set rx cpu rmap fail, ret=%d\n", ret); + + ret = hns3_nic_init_irq(priv); + if (ret) { + dev_err(priv->dev, "init irq failed! ret=%d\n", ret); + hns3_free_rx_cpu_rmap(netdev); + goto out_init_irq_fail; + } + ret = hns3_client_start(handle); if (ret) { dev_err(priv->dev, "hns3_client_start fail! ret=%d\n", ret); @@ -4051,6 +4083,9 @@ static int hns3_client_init(struct hnae3_handle *handle) return ret; out_client_start: + hns3_free_rx_cpu_rmap(netdev); + hns3_nic_uninit_irq(priv); +out_init_irq_fail: unregister_netdev(netdev); out_reg_netdev_fail: hns3_uninit_phy(netdev); @@ -4088,15 +4123,17 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset) goto out_netdev_free; } + hns3_free_rx_cpu_rmap(netdev); + + hns3_nic_uninit_irq(priv); + hns3_del_all_fd_rules(netdev, true); hns3_clear_all_ring(handle, true); hns3_nic_uninit_vector_data(priv); - ret = hns3_nic_dealloc_vector_data(priv); - if (ret) - netdev_err(netdev, "dealloc vector error\n"); + hns3_nic_dealloc_vector_data(priv); ret = hns3_uninit_all_ring(priv); if (ret) @@ -4423,17 +4460,32 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle) if (ret) goto err_uninit_vector; + /* the device can work without cpu rmap, only aRFS needs it */ + ret = hns3_set_rx_cpu_rmap(netdev); + if (ret) + dev_warn(priv->dev, "set rx cpu rmap fail, ret=%d\n", ret); + + ret = hns3_nic_init_irq(priv); + if (ret) { + dev_err(priv->dev, "init irq failed! ret=%d\n", ret); + hns3_free_rx_cpu_rmap(netdev); + goto err_init_irq_fail; + } + ret = hns3_client_start(handle); if (ret) { dev_err(priv->dev, "hns3_client_start fail! ret=%d\n", ret); - goto err_uninit_ring; + goto err_client_start_fail; } set_bit(HNS3_NIC_STATE_INITED, &priv->state); return ret; -err_uninit_ring: +err_client_start_fail: + hns3_free_rx_cpu_rmap(netdev); + hns3_nic_uninit_irq(priv); +err_init_irq_fail: hns3_uninit_all_ring(priv); err_uninit_vector: hns3_nic_uninit_vector_data(priv); @@ -4483,6 +4535,8 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) return 0; } + hns3_free_rx_cpu_rmap(netdev); + hns3_nic_uninit_irq(priv); hns3_clear_all_ring(handle, true); hns3_reset_tx_queue(priv->ae_handle); @@ -4490,9 +4544,7 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) hns3_store_coal(priv); - ret = hns3_nic_dealloc_vector_data(priv); - if (ret) - netdev_err(netdev, "dealloc vector error\n"); + hns3_nic_dealloc_vector_data(priv); ret = hns3_uninit_all_ring(priv); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 9d47abd5c37c..abefd7a179f7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -673,4 +673,5 @@ void hns3_dbg_init(struct hnae3_handle *handle); void hns3_dbg_uninit(struct hnae3_handle *handle); void hns3_dbg_register_debugfs(const char *debugfs_dir_name); void hns3_dbg_unregister_debugfs(void); +void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h new file mode 100644 index 000000000000..7bddcca148a5 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2018-2019 Hisilicon Limited. */ + +/* This must be outside ifdef _HNS3_TRACE_H */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hns3 + +#if !defined(_HNS3_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _HNS3_TRACE_H_ + +#include <linux/tracepoint.h> + +#define DESC_NR (sizeof(struct hns3_desc) / sizeof(u32)) + +DECLARE_EVENT_CLASS(hns3_skb_template, + TP_PROTO(struct sk_buff *skb), + TP_ARGS(skb), + + TP_STRUCT__entry( + __field(unsigned int, headlen) + __field(unsigned int, len) + __field(__u8, nr_frags) + __field(__u8, ip_summed) + __field(unsigned int, hdr_len) + __field(unsigned short, gso_size) + __field(unsigned short, gso_segs) + __field(unsigned int, gso_type) + __field(bool, fraglist) + __array(__u32, size, MAX_SKB_FRAGS) + ), + + TP_fast_assign( + __entry->headlen = skb_headlen(skb); + __entry->len = skb->len; + __entry->nr_frags = skb_shinfo(skb)->nr_frags; + __entry->gso_size = skb_shinfo(skb)->gso_size; + __entry->gso_segs = skb_shinfo(skb)->gso_segs; + __entry->gso_type = skb_shinfo(skb)->gso_type; + __entry->hdr_len = skb->encapsulation ? + skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb) : + skb_transport_offset(skb) + tcp_hdrlen(skb); + __entry->ip_summed = skb->ip_summed; + __entry->fraglist = skb_has_frag_list(skb); + hns3_shinfo_pack(skb_shinfo(skb), __entry->size); + ), + + TP_printk( + "len: %u, %u, %u, cs: %u, gso: %u, %u, %x, frag(%d %u): %s", + __entry->headlen, __entry->len, __entry->hdr_len, + __entry->ip_summed, __entry->gso_size, __entry->gso_segs, + __entry->gso_type, __entry->fraglist, __entry->nr_frags, + __print_array(__entry->size, MAX_SKB_FRAGS, sizeof(__u32)) + ) +); + +DEFINE_EVENT(hns3_skb_template, hns3_over_8bd, + TP_PROTO(struct sk_buff *skb), + TP_ARGS(skb)); + +DEFINE_EVENT(hns3_skb_template, hns3_gro, + TP_PROTO(struct sk_buff *skb), + TP_ARGS(skb)); + +DEFINE_EVENT(hns3_skb_template, hns3_tso, + TP_PROTO(struct sk_buff *skb), + TP_ARGS(skb)); + +TRACE_EVENT(hns3_tx_desc, + TP_PROTO(struct hns3_enet_ring *ring, int cur_ntu), + TP_ARGS(ring, cur_ntu), + + TP_STRUCT__entry( + __field(int, index) + __field(int, ntu) + __field(int, ntc) + __field(dma_addr_t, desc_dma) + __array(u32, desc, DESC_NR) + __string(devname, ring->tqp->handle->kinfo.netdev->name) + ), + + TP_fast_assign( + __entry->index = ring->tqp->tqp_index; + __entry->ntu = ring->next_to_use; + __entry->ntc = ring->next_to_clean; + __entry->desc_dma = ring->desc_dma_addr, + memcpy(__entry->desc, &ring->desc[cur_ntu], + sizeof(struct hns3_desc)); + __assign_str(devname, ring->tqp->handle->kinfo.netdev->name); + ), + + TP_printk( + "%s-%d-%d/%d desc(%pad): %s", + __get_str(devname), __entry->index, __entry->ntu, + __entry->ntc, &__entry->desc_dma, + __print_array(__entry->desc, DESC_NR, sizeof(u32)) + ) +); + +TRACE_EVENT(hns3_rx_desc, + TP_PROTO(struct hns3_enet_ring *ring), + TP_ARGS(ring), + + TP_STRUCT__entry( + __field(int, index) + __field(int, ntu) + __field(int, ntc) + __field(dma_addr_t, desc_dma) + __field(dma_addr_t, buf_dma) + __array(u32, desc, DESC_NR) + __string(devname, ring->tqp->handle->kinfo.netdev->name) + ), + + TP_fast_assign( + __entry->index = ring->tqp->tqp_index; + __entry->ntu = ring->next_to_use; + __entry->ntc = ring->next_to_clean; + __entry->desc_dma = ring->desc_dma_addr; + __entry->buf_dma = ring->desc_cb[ring->next_to_clean].dma; + memcpy(__entry->desc, &ring->desc[ring->next_to_clean], + sizeof(struct hns3_desc)); + __assign_str(devname, ring->tqp->handle->kinfo.netdev->name); + ), + + TP_printk( + "%s-%d-%d/%d desc(%pad) buf(%pad): %s", + __get_str(devname), __entry->index, __entry->ntu, + __entry->ntc, &__entry->desc_dma, &__entry->buf_dma, + __print_array(__entry->desc, DESC_NR, sizeof(u32)) + ) +); + +#endif /* _HNS3_TRACE_H_ */ + +/* This must be outside ifdef _HNS3_TRACE_H */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE hns3_trace +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 940ead3970d1..7f509eff562e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -479,19 +479,6 @@ static void hclge_cmd_uninit_regs(struct hclge_hw *hw) hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0); } -static void hclge_destroy_queue(struct hclge_cmq_ring *ring) -{ - spin_lock(&ring->lock); - hclge_free_cmd_desc(ring); - spin_unlock(&ring->lock); -} - -static void hclge_destroy_cmd_queue(struct hclge_hw *hw) -{ - hclge_destroy_queue(&hw->cmq.csq); - hclge_destroy_queue(&hw->cmq.crq); -} - void hclge_cmd_uninit(struct hclge_dev *hdev) { spin_lock_bh(&hdev->hw.cmq.csq.lock); @@ -501,5 +488,6 @@ void hclge_cmd_uninit(struct hclge_dev *hdev) spin_unlock(&hdev->hw.cmq.crq.lock); spin_unlock_bh(&hdev->hw.cmq.csq.lock); - hclge_destroy_cmd_queue(&hdev->hw); + hclge_free_cmd_desc(&hdev->hw.cmq.csq); + hclge_free_cmd_desc(&hdev->hw.cmq.crq); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index d97da67f07a1..96498d9b4754 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -6,6 +6,7 @@ #include <linux/types.h> #include <linux/io.h> #include <linux/etherdevice.h> +#include "hnae3.h" #define HCLGE_CMDQ_TX_TIMEOUT 30000 #define HCLGE_DESC_DATA_LEN 6 @@ -63,6 +64,7 @@ enum hclge_cmd_status { struct hclge_misc_vector { u8 __iomem *addr; int vector_irq; + char name[HNAE3_INT_NAME_LEN]; }; struct hclge_cmq { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 112df34b3869..f3d4cbd28913 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -886,8 +886,8 @@ static void hclge_dbg_dump_mng_table(struct hclge_dev *hdev) } } -static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage, - bool sel_x, u32 loc) +static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage, + bool sel_x, u32 loc) { struct hclge_fd_tcam_config_1_cmd *req1; struct hclge_fd_tcam_config_2_cmd *req2; @@ -912,7 +912,7 @@ static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage, ret = hclge_cmd_send(&hdev->hw, desc, 3); if (ret) - return; + return ret; dev_info(&hdev->pdev->dev, " read result tcam key %s(%u):\n", sel_x ? "x" : "y", loc); @@ -931,16 +931,76 @@ static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage, req = (u32 *)req3->tcam_data; for (i = 0; i < 5; i++) dev_info(&hdev->pdev->dev, "%08x\n", *req++); + + return ret; +} + +static int hclge_dbg_get_rules_location(struct hclge_dev *hdev, u16 *rule_locs) +{ + struct hclge_fd_rule *rule; + struct hlist_node *node; + int cnt = 0; + + spin_lock_bh(&hdev->fd_rule_lock); + hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + rule_locs[cnt] = rule->location; + cnt++; + } + spin_unlock_bh(&hdev->fd_rule_lock); + + if (cnt != hdev->hclge_fd_rule_num) + return -EINVAL; + + return cnt; } static void hclge_dbg_fd_tcam(struct hclge_dev *hdev) { - u32 i; + int i, ret, rule_cnt; + u16 *rule_locs; - for (i = 0; i < hdev->fd_cfg.rule_num[0]; i++) { - hclge_dbg_fd_tcam_read(hdev, 0, true, i); - hclge_dbg_fd_tcam_read(hdev, 0, false, i); + if (!hnae3_dev_fd_supported(hdev)) { + dev_err(&hdev->pdev->dev, + "Only FD-supported dev supports dump fd tcam\n"); + return; } + + if (!hdev->hclge_fd_rule_num || + !hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) + return; + + rule_locs = kcalloc(hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1], + sizeof(u16), GFP_KERNEL); + if (!rule_locs) + return; + + rule_cnt = hclge_dbg_get_rules_location(hdev, rule_locs); + if (rule_cnt <= 0) { + dev_err(&hdev->pdev->dev, + "failed to get rule number, ret = %d\n", rule_cnt); + kfree(rule_locs); + return; + } + + for (i = 0; i < rule_cnt; i++) { + ret = hclge_dbg_fd_tcam_read(hdev, 0, true, rule_locs[i]); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get fd tcam key x, ret = %d\n", ret); + kfree(rule_locs); + return; + } + + ret = hclge_dbg_fd_tcam_read(hdev, 0, false, rule_locs[i]); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get fd tcam key y, ret = %d\n", ret); + kfree(rule_locs); + return; + } + } + + kfree(rule_locs); } void hclge_dbg_dump_rst_info(struct hclge_dev *hdev) @@ -976,6 +1036,14 @@ void hclge_dbg_dump_rst_info(struct hclge_dev *hdev) dev_info(&hdev->pdev->dev, "hdev state: 0x%lx\n", hdev->state); } +static void hclge_dbg_dump_serv_info(struct hclge_dev *hdev) +{ + dev_info(&hdev->pdev->dev, "last_serv_processed: %lu\n", + hdev->last_serv_processed); + dev_info(&hdev->pdev->dev, "last_serv_cnt: %lu\n", + hdev->serv_processed_cnt); +} + static void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev) { struct hclge_desc *desc_src, *desc_tmp; @@ -1227,6 +1295,8 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf) hclge_dbg_dump_reg_cmd(hdev, &cmd_buf[sizeof(DUMP_REG)]); } else if (strncmp(cmd_buf, "dump reset info", 15) == 0) { hclge_dbg_dump_rst_info(hdev); + } else if (strncmp(cmd_buf, "dump serv info", 14) == 0) { + hclge_dbg_dump_serv_info(hdev); } else if (strncmp(cmd_buf, "dump m7 info", 12) == 0) { hclge_dbg_get_m7_stats_info(hdev); } else if (strncmp(cmd_buf, "dump ncl_config", 15) == 0) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index dc66b4e13377..f8127d72d645 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -505,7 +505,7 @@ static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = { static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = { { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port", - .reset_level = HNAE3_GLOBAL_RESET }, + .reset_level = HNAE3_FUNC_RESET }, { .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port", .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(2), .msg = "igu_pkt_without_key_port", @@ -599,7 +599,7 @@ static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = { static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = { { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port", - .reset_level = HNAE3_GLOBAL_RESET }, + .reset_level = HNAE3_FUNC_RESET }, { .int_msk = BIT(9), .msg = "low_water_line_err_port", .reset_level = HNAE3_NONE_RESET }, { .int_msk = BIT(10), .msg = "hi_water_line_err_port", diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 13dbd249f35f..76e8aa40e1bc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -72,6 +72,8 @@ static int hclge_set_default_loopback(struct hclge_dev *hdev); static struct hnae3_ae_algo ae_algo; +static struct workqueue_struct *hclge_wq; + static const struct pci_device_id ae_algo_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0}, @@ -416,7 +418,7 @@ static int hclge_mac_update_stats_defective(struct hclge_dev *hdev) { #define HCLGE_MAC_CMD_NUM 21 - u64 *data = (u64 *)(&hdev->hw_stats.mac_stats); + u64 *data = (u64 *)(&hdev->mac_stats); struct hclge_desc desc[HCLGE_MAC_CMD_NUM]; __le64 *desc_data; int i, k, n; @@ -453,7 +455,7 @@ static int hclge_mac_update_stats_defective(struct hclge_dev *hdev) static int hclge_mac_update_stats_complete(struct hclge_dev *hdev, u32 desc_num) { - u64 *data = (u64 *)(&hdev->hw_stats.mac_stats); + u64 *data = (u64 *)(&hdev->mac_stats); struct hclge_desc *desc; __le64 *desc_data; u16 i, k, n; @@ -802,7 +804,7 @@ static void hclge_get_stats(struct hnae3_handle *handle, u64 *data) struct hclge_dev *hdev = vport->back; u64 *p; - p = hclge_comm_get_stats(&hdev->hw_stats.mac_stats, g_mac_stats_string, + p = hclge_comm_get_stats(&hdev->mac_stats, g_mac_stats_string, ARRAY_SIZE(g_mac_stats_string), data); p = hclge_tqps_get_stats(handle, p); } @@ -815,8 +817,8 @@ static void hclge_get_mac_stat(struct hnae3_handle *handle, hclge_update_stats(handle, NULL); - mac_stats->tx_pause_cnt = hdev->hw_stats.mac_stats.mac_tx_mac_pause_num; - mac_stats->rx_pause_cnt = hdev->hw_stats.mac_stats.mac_rx_mac_pause_num; + mac_stats->tx_pause_cnt = hdev->mac_stats.mac_tx_mac_pause_num; + mac_stats->rx_pause_cnt = hdev->mac_stats.mac_rx_mac_pause_num; } static int hclge_parse_func_status(struct hclge_dev *hdev, @@ -2665,31 +2667,27 @@ static int hclge_mac_init(struct hclge_dev *hdev) static void hclge_mbx_task_schedule(struct hclge_dev *hdev) { - if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) && + if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state)) - queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq, - &hdev->mbx_service_task); + mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), + hclge_wq, &hdev->service_task, 0); } static void hclge_reset_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) - queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq, - &hdev->rst_service_task); + mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), + hclge_wq, &hdev->service_task, 0); } void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time) { - if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) && - !test_bit(HCLGE_STATE_REMOVING, &hdev->state) && - !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state)) { - hdev->hw_stats.stats_timer++; - hdev->fd_arfs_expire_timer++; + if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && + !test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - system_wq, &hdev->service_task, + hclge_wq, &hdev->service_task, delay_time); - } } static int hclge_get_mac_link_status(struct hclge_dev *hdev) @@ -2748,6 +2746,10 @@ static void hclge_update_link_status(struct hclge_dev *hdev) if (!client) return; + + if (test_and_set_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state)) + return; + state = hclge_get_mac_phy_link(hdev); if (state != hdev->hw.mac.link) { for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { @@ -2761,6 +2763,8 @@ static void hclge_update_link_status(struct hclge_dev *hdev) } hdev->hw.mac.link = state; } + + clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state); } static void hclge_update_port_capability(struct hclge_mac *mac) @@ -2831,6 +2835,12 @@ static int hclge_get_sfp_info(struct hclge_dev *hdev, struct hclge_mac *mac) return ret; } + /* In some case, mac speed get from IMP may be 0, it shouldn't be + * set to mac->speed. + */ + if (!le32_to_cpu(resp->speed)) + return 0; + mac->speed = le32_to_cpu(resp->speed); /* if resp->speed_ability is 0, it means it's an old version * firmware, do not update these params @@ -2940,6 +2950,9 @@ static int hclge_get_vf_config(struct hnae3_handle *handle, int vf, ivf->trusted = vport->vf_info.trusted; ivf->min_tx_rate = 0; ivf->max_tx_rate = vport->vf_info.max_tx_rate; + ivf->vlan = vport->port_base_vlan_cfg.vlan_info.vlan_tag; + ivf->vlan_proto = htons(vport->port_base_vlan_cfg.vlan_info.vlan_proto); + ivf->qos = vport->port_base_vlan_cfg.vlan_info.qos; ether_addr_copy(ivf->mac, vport->vf_info.mac); return 0; @@ -2998,8 +3011,6 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) /* check for vector0 msix event source */ if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) { - dev_info(&hdev->pdev->dev, "received event 0x%x\n", - msix_src_reg); *clearval = msix_src_reg; return HCLGE_VECTOR0_EVENT_ERR; } @@ -3172,8 +3183,10 @@ static int hclge_misc_irq_init(struct hclge_dev *hdev) hclge_get_misc_vector(hdev); /* this would be explicitly freed in the end */ + snprintf(hdev->misc_vector.name, HNAE3_INT_NAME_LEN, "%s-misc-%s", + HCLGE_NAME, pci_name(hdev->pdev)); ret = request_irq(hdev->misc_vector.vector_irq, hclge_misc_irq_handle, - 0, "hclge_misc", hdev); + 0, hdev->misc_vector.name, hdev); if (ret) { hclge_free_vector(hdev, 0); dev_err(&hdev->pdev->dev, "request misc irq(%d) fail\n", @@ -3247,7 +3260,8 @@ static int hclge_notify_roce_client(struct hclge_dev *hdev, static int hclge_reset_wait(struct hclge_dev *hdev) { #define HCLGE_RESET_WATI_MS 100 -#define HCLGE_RESET_WAIT_CNT 200 +#define HCLGE_RESET_WAIT_CNT 350 + u32 val, reg, reg_bit; u32 cnt = 0; @@ -3264,8 +3278,6 @@ static int hclge_reset_wait(struct hclge_dev *hdev) reg = HCLGE_FUN_RST_ING; reg_bit = HCLGE_FUN_RST_ING_B; break; - case HNAE3_FLR_RESET: - break; default: dev_err(&hdev->pdev->dev, "Wait for unsupported reset type: %d\n", @@ -3273,20 +3285,6 @@ static int hclge_reset_wait(struct hclge_dev *hdev) return -EINVAL; } - if (hdev->reset_type == HNAE3_FLR_RESET) { - while (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state) && - cnt++ < HCLGE_RESET_WAIT_CNT) - msleep(HCLGE_RESET_WATI_MS); - - if (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state)) { - dev_err(&hdev->pdev->dev, - "flr wait timeout: %u\n", cnt); - return -EBUSY; - } - - return 0; - } - val = hclge_read_dev(&hdev->hw, reg); while (hnae3_get_bit(val, reg_bit) && cnt < HCLGE_RESET_WAIT_CNT) { msleep(HCLGE_RESET_WATI_MS); @@ -3352,7 +3350,19 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) return 0; } -static int hclge_func_reset_sync_vf(struct hclge_dev *hdev) +static void hclge_mailbox_service_task(struct hclge_dev *hdev) +{ + if (!test_and_clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state) || + test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) || + test_and_set_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state)) + return; + + hclge_mbx_handler(hdev); + + clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state); +} + +static void hclge_func_reset_sync_vf(struct hclge_dev *hdev) { struct hclge_pf_rst_sync_cmd *req; struct hclge_desc desc; @@ -3363,26 +3373,28 @@ static int hclge_func_reset_sync_vf(struct hclge_dev *hdev) hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_VF_RST_RDY, true); do { + /* vf need to down netdev by mbx during PF or FLR reset */ + hclge_mailbox_service_task(hdev); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); /* for compatible with old firmware, wait * 100 ms for VF to stop IO */ if (ret == -EOPNOTSUPP) { msleep(HCLGE_RESET_SYNC_TIME); - return 0; + return; } else if (ret) { - dev_err(&hdev->pdev->dev, "sync with VF fail %d!\n", - ret); - return ret; + dev_warn(&hdev->pdev->dev, "sync with VF fail %d!\n", + ret); + return; } else if (req->all_vf_ready) { - return 0; + return; } msleep(HCLGE_PF_RESET_SYNC_TIME); hclge_cmd_reuse_desc(&desc, true); } while (cnt++ < HCLGE_PF_RESET_SYNC_CNT); - dev_err(&hdev->pdev->dev, "sync with VF timeout!\n"); - return -ETIME; + dev_warn(&hdev->pdev->dev, "sync with VF timeout!\n"); } void hclge_report_hw_error(struct hclge_dev *hdev, @@ -3462,12 +3474,6 @@ static void hclge_do_reset(struct hclge_dev *hdev) set_bit(HNAE3_FUNC_RESET, &hdev->reset_pending); hclge_reset_task_schedule(hdev); break; - case HNAE3_FLR_RESET: - dev_info(&pdev->dev, "FLR requested\n"); - /* schedule again to check later */ - set_bit(HNAE3_FLR_RESET, &hdev->reset_pending); - hclge_reset_task_schedule(hdev); - break; default: dev_warn(&pdev->dev, "Unsupported reset type: %d\n", hdev->reset_type); @@ -3483,10 +3489,15 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev, /* first, resolve any unknown reset type to the known type(s) */ if (test_bit(HNAE3_UNKNOWN_RESET, addr)) { + u32 msix_sts_reg = hclge_read_dev(&hdev->hw, + HCLGE_VECTOR0_PF_OTHER_INT_STS_REG); /* we will intentionally ignore any errors from this function * as we will end up in *some* reset request in any case */ - hclge_handle_hw_msix_error(hdev, addr); + if (hclge_handle_hw_msix_error(hdev, addr)) + dev_info(&hdev->pdev->dev, "received msix interrupt 0x%x\n", + msix_sts_reg); + clear_bit(HNAE3_UNKNOWN_RESET, addr); /* We defered the clearing of the error event which caused * interrupt since it was not posssible to do that in @@ -3551,23 +3562,6 @@ static void hclge_clear_reset_cause(struct hclge_dev *hdev) hclge_enable_vector(&hdev->misc_vector, true); } -static int hclge_reset_prepare_down(struct hclge_dev *hdev) -{ - int ret = 0; - - switch (hdev->reset_type) { - case HNAE3_FUNC_RESET: - /* fall through */ - case HNAE3_FLR_RESET: - ret = hclge_set_all_vf_rst(hdev, true); - break; - default: - break; - } - - return ret; -} - static void hclge_reset_handshake(struct hclge_dev *hdev, bool enable) { u32 reg_val; @@ -3581,6 +3575,19 @@ static void hclge_reset_handshake(struct hclge_dev *hdev, bool enable) hclge_write_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG, reg_val); } +static int hclge_func_reset_notify_vf(struct hclge_dev *hdev) +{ + int ret; + + ret = hclge_set_all_vf_rst(hdev, true); + if (ret) + return ret; + + hclge_func_reset_sync_vf(hdev); + + return 0; +} + static int hclge_reset_prepare_wait(struct hclge_dev *hdev) { u32 reg_val; @@ -3588,10 +3595,7 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev) switch (hdev->reset_type) { case HNAE3_FUNC_RESET: - /* to confirm whether all running VF is ready - * before request PF reset - */ - ret = hclge_func_reset_sync_vf(hdev); + ret = hclge_func_reset_notify_vf(hdev); if (ret) return ret; @@ -3611,16 +3615,9 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev) hdev->rst_stats.pf_rst_cnt++; break; case HNAE3_FLR_RESET: - /* to confirm whether all running VF is ready - * before request PF reset - */ - ret = hclge_func_reset_sync_vf(hdev); + ret = hclge_func_reset_notify_vf(hdev); if (ret) return ret; - - set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); - set_bit(HNAE3_FLR_DOWN, &hdev->flr_state); - hdev->rst_stats.flr_rst_cnt++; break; case HNAE3_IMP_RESET: hclge_handle_imp_error(hdev); @@ -3672,6 +3669,8 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev) hclge_dbg_dump_rst_info(hdev); + set_bit(HCLGE_STATE_RST_FAIL, &hdev->state); + return false; } @@ -3747,10 +3746,9 @@ static int hclge_reset_stack(struct hclge_dev *hdev) return hclge_notify_client(hdev, HNAE3_RESTORE_CLIENT); } -static void hclge_reset(struct hclge_dev *hdev) +static int hclge_reset_prepare(struct hclge_dev *hdev) { struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); - enum hnae3_reset_type reset_level; int ret; /* Initialize ae_dev reset status as well, in case enet layer wants to @@ -3761,45 +3759,41 @@ static void hclge_reset(struct hclge_dev *hdev) /* perform reset of the stack & ae device for a client */ ret = hclge_notify_roce_client(hdev, HNAE3_DOWN_CLIENT); if (ret) - goto err_reset; - - ret = hclge_reset_prepare_down(hdev); - if (ret) - goto err_reset; + return ret; rtnl_lock(); ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT); - if (ret) - goto err_reset_lock; - rtnl_unlock(); - - ret = hclge_reset_prepare_wait(hdev); if (ret) - goto err_reset; + return ret; - if (hclge_reset_wait(hdev)) - goto err_reset; + return hclge_reset_prepare_wait(hdev); +} + +static int hclge_reset_rebuild(struct hclge_dev *hdev) +{ + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + enum hnae3_reset_type reset_level; + int ret; hdev->rst_stats.hw_reset_done_cnt++; ret = hclge_notify_roce_client(hdev, HNAE3_UNINIT_CLIENT); if (ret) - goto err_reset; + return ret; rtnl_lock(); - ret = hclge_reset_stack(hdev); + rtnl_unlock(); if (ret) - goto err_reset_lock; + return ret; hclge_clear_reset_cause(hdev); ret = hclge_reset_prepare_up(hdev); if (ret) - goto err_reset_lock; + return ret; - rtnl_unlock(); ret = hclge_notify_roce_client(hdev, HNAE3_INIT_CLIENT); /* ignore RoCE notify error if it fails HCLGE_RESET_MAX_FAIL_CNT - 1 @@ -3807,24 +3801,23 @@ static void hclge_reset(struct hclge_dev *hdev) */ if (ret && hdev->rst_stats.reset_fail_cnt < HCLGE_RESET_MAX_FAIL_CNT - 1) - goto err_reset; + return ret; rtnl_lock(); - ret = hclge_notify_client(hdev, HNAE3_UP_CLIENT); - if (ret) - goto err_reset_lock; - rtnl_unlock(); + if (ret) + return ret; ret = hclge_notify_roce_client(hdev, HNAE3_UP_CLIENT); if (ret) - goto err_reset; + return ret; hdev->last_reset_time = jiffies; hdev->rst_stats.reset_fail_cnt = 0; hdev->rst_stats.reset_done_cnt++; ae_dev->reset_type = HNAE3_NONE_RESET; + clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state); /* if default_reset_request has a higher level reset request, * it should be handled as soon as possible. since some errors @@ -3835,10 +3828,22 @@ static void hclge_reset(struct hclge_dev *hdev) if (reset_level != HNAE3_NONE_RESET) set_bit(reset_level, &hdev->reset_request); + return 0; +} + +static void hclge_reset(struct hclge_dev *hdev) +{ + if (hclge_reset_prepare(hdev)) + goto err_reset; + + if (hclge_reset_wait(hdev)) + goto err_reset; + + if (hclge_reset_rebuild(hdev)) + goto err_reset; + return; -err_reset_lock: - rtnl_unlock(); err_reset: if (hclge_reset_err_handle(hdev)) hclge_reset_task_schedule(hdev); @@ -3939,34 +3944,18 @@ static void hclge_reset_subtask(struct hclge_dev *hdev) hdev->reset_type = HNAE3_NONE_RESET; } -static void hclge_reset_service_task(struct work_struct *work) +static void hclge_reset_service_task(struct hclge_dev *hdev) { - struct hclge_dev *hdev = - container_of(work, struct hclge_dev, rst_service_task); - - if (test_and_set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + if (!test_and_clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) return; - clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state); + down(&hdev->reset_sem); + set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); hclge_reset_subtask(hdev); clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); -} - -static void hclge_mailbox_service_task(struct work_struct *work) -{ - struct hclge_dev *hdev = - container_of(work, struct hclge_dev, mbx_service_task); - - if (test_and_set_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state)) - return; - - clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state); - - hclge_mbx_handler(hdev); - - clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state); + up(&hdev->reset_sem); } static void hclge_update_vport_alive(struct hclge_dev *hdev) @@ -3986,29 +3975,62 @@ static void hclge_update_vport_alive(struct hclge_dev *hdev) } } -static void hclge_service_task(struct work_struct *work) +static void hclge_periodic_service_task(struct hclge_dev *hdev) { - struct hclge_dev *hdev = - container_of(work, struct hclge_dev, service_task.work); + unsigned long delta = round_jiffies_relative(HZ); - clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state); + /* Always handle the link updating to make sure link state is + * updated when it is triggered by mbx. + */ + hclge_update_link_status(hdev); - if (hdev->hw_stats.stats_timer >= HCLGE_STATS_TIMER_INTERVAL) { - hclge_update_stats_for_all(hdev); - hdev->hw_stats.stats_timer = 0; + if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) { + delta = jiffies - hdev->last_serv_processed; + + if (delta < round_jiffies_relative(HZ)) { + delta = round_jiffies_relative(HZ) - delta; + goto out; + } } - hclge_update_port_info(hdev); - hclge_update_link_status(hdev); + hdev->serv_processed_cnt++; hclge_update_vport_alive(hdev); + + if (test_bit(HCLGE_STATE_DOWN, &hdev->state)) { + hdev->last_serv_processed = jiffies; + goto out; + } + + if (!(hdev->serv_processed_cnt % HCLGE_STATS_TIMER_INTERVAL)) + hclge_update_stats_for_all(hdev); + + hclge_update_port_info(hdev); hclge_sync_vlan_filter(hdev); - if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) { + if (!(hdev->serv_processed_cnt % HCLGE_ARFS_EXPIRE_INTERVAL)) hclge_rfs_filter_expire(hdev); - hdev->fd_arfs_expire_timer = 0; - } - hclge_task_schedule(hdev, round_jiffies_relative(HZ)); + hdev->last_serv_processed = jiffies; + +out: + hclge_task_schedule(hdev, delta); +} + +static void hclge_service_task(struct work_struct *work) +{ + struct hclge_dev *hdev = + container_of(work, struct hclge_dev, service_task.work); + + hclge_reset_service_task(hdev); + hclge_mailbox_service_task(hdev); + hclge_periodic_service_task(hdev); + + /* Handle reset and mbx again in case periodical task delays the + * handling by calling hclge_task_schedule() in + * hclge_periodic_service_task(). + */ + hclge_reset_service_task(hdev); + hclge_mailbox_service_task(hdev); } struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle) @@ -4654,7 +4676,7 @@ static int hclge_map_ring_to_vector(struct hnae3_handle *handle, int vector, vector_id = hclge_get_vector_index(hdev, vector); if (vector_id < 0) { dev_err(&hdev->pdev->dev, - "Get vector index fail. vector_id =%d\n", vector_id); + "failed to get vector index. vector=%d\n", vector); return vector_id; } @@ -6734,6 +6756,19 @@ static void hclge_reset_tqp_stats(struct hnae3_handle *handle) } } +static void hclge_flush_link_update(struct hclge_dev *hdev) +{ +#define HCLGE_FLUSH_LINK_TIMEOUT 100000 + + unsigned long last = hdev->serv_processed_cnt; + int i = 0; + + while (test_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state) && + i++ < HCLGE_FLUSH_LINK_TIMEOUT && + last == hdev->serv_processed_cnt) + usleep_range(1, 1); +} + static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -6742,12 +6777,12 @@ static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable) if (enable) { hclge_task_schedule(hdev, round_jiffies_relative(HZ)); } else { - /* Set the DOWN flag here to disable the service to be - * scheduled again - */ + /* Set the DOWN flag here to disable link updating */ set_bit(HCLGE_STATE_DOWN, &hdev->state); - cancel_delayed_work_sync(&hdev->service_task); - clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state); + + /* flush memory to make sure DOWN is seen by service task */ + smp_mb__before_atomic(); + hclge_flush_link_update(hdev); } } @@ -7483,7 +7518,6 @@ void hclge_uninit_vport_mac_table(struct hclge_dev *hdev) struct hclge_vport *vport; int i; - mutex_lock(&hdev->vport_cfg_mutex); for (i = 0; i < hdev->num_alloc_vport; i++) { vport = &hdev->vport[i]; list_for_each_entry_safe(mac, tmp, &vport->uc_mac_list, node) { @@ -7496,7 +7530,6 @@ void hclge_uninit_vport_mac_table(struct hclge_dev *hdev) kfree(mac); } } - mutex_unlock(&hdev->vport_cfg_mutex); } static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev, @@ -8257,7 +8290,6 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) struct hclge_vport *vport; int i; - mutex_lock(&hdev->vport_cfg_mutex); for (i = 0; i < hdev->num_alloc_vport; i++) { vport = &hdev->vport[i]; list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { @@ -8265,7 +8297,6 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) kfree(vlan); } } - mutex_unlock(&hdev->vport_cfg_mutex); } static void hclge_restore_vlan_table(struct hnae3_handle *handle) @@ -8277,7 +8308,6 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle) u16 state, vlan_id; int i; - mutex_lock(&hdev->vport_cfg_mutex); for (i = 0; i < hdev->num_alloc_vport; i++) { vport = &hdev->vport[i]; vlan_proto = vport->port_base_vlan_cfg.vlan_info.vlan_proto; @@ -8303,8 +8333,6 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle) break; } } - - mutex_unlock(&hdev->vport_cfg_mutex); } int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) @@ -9256,6 +9284,7 @@ static void hclge_state_init(struct hclge_dev *hdev) set_bit(HCLGE_STATE_DOWN, &hdev->state); clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state); clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); + clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state); clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state); clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state); } @@ -9269,38 +9298,57 @@ static void hclge_state_uninit(struct hclge_dev *hdev) del_timer_sync(&hdev->reset_timer); if (hdev->service_task.work.func) cancel_delayed_work_sync(&hdev->service_task); - if (hdev->rst_service_task.func) - cancel_work_sync(&hdev->rst_service_task); - if (hdev->mbx_service_task.func) - cancel_work_sync(&hdev->mbx_service_task); } static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev) { -#define HCLGE_FLR_WAIT_MS 100 -#define HCLGE_FLR_WAIT_CNT 50 - struct hclge_dev *hdev = ae_dev->priv; - int cnt = 0; +#define HCLGE_FLR_RETRY_WAIT_MS 500 +#define HCLGE_FLR_RETRY_CNT 5 - clear_bit(HNAE3_FLR_DOWN, &hdev->flr_state); - clear_bit(HNAE3_FLR_DONE, &hdev->flr_state); - set_bit(HNAE3_FLR_RESET, &hdev->default_reset_request); - hclge_reset_event(hdev->pdev, NULL); + struct hclge_dev *hdev = ae_dev->priv; + int retry_cnt = 0; + int ret; - while (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state) && - cnt++ < HCLGE_FLR_WAIT_CNT) - msleep(HCLGE_FLR_WAIT_MS); +retry: + down(&hdev->reset_sem); + set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); + hdev->reset_type = HNAE3_FLR_RESET; + ret = hclge_reset_prepare(hdev); + if (ret) { + dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n", + ret); + if (hdev->reset_pending || + retry_cnt++ < HCLGE_FLR_RETRY_CNT) { + dev_err(&hdev->pdev->dev, + "reset_pending:0x%lx, retry_cnt:%d\n", + hdev->reset_pending, retry_cnt); + clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); + up(&hdev->reset_sem); + msleep(HCLGE_FLR_RETRY_WAIT_MS); + goto retry; + } + } - if (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state)) - dev_err(&hdev->pdev->dev, - "flr wait down timeout: %d\n", cnt); + /* disable misc vector before FLR done */ + hclge_enable_vector(&hdev->misc_vector, false); + set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + hdev->rst_stats.flr_rst_cnt++; } static void hclge_flr_done(struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; + int ret; + + hclge_enable_vector(&hdev->misc_vector, true); - set_bit(HNAE3_FLR_DONE, &hdev->flr_state); + ret = hclge_reset_rebuild(hdev); + if (ret) + dev_err(&hdev->pdev->dev, "fail to rebuild, ret=%d\n", ret); + + hdev->reset_type = HNAE3_NONE_RESET; + clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); + up(&hdev->reset_sem); } static void hclge_clear_resetting_state(struct hclge_dev *hdev) @@ -9342,8 +9390,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hdev->mps = ETH_FRAME_LEN + ETH_FCS_LEN + 2 * VLAN_HLEN; mutex_init(&hdev->vport_lock); - mutex_init(&hdev->vport_cfg_mutex); spin_lock_init(&hdev->fd_rule_lock); + sema_init(&hdev->reset_sem, 1); ret = hclge_pci_init(hdev); if (ret) { @@ -9477,8 +9525,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) timer_setup(&hdev->reset_timer, hclge_reset_timer, 0); INIT_DELAYED_WORK(&hdev->service_task, hclge_service_task); - INIT_WORK(&hdev->rst_service_task, hclge_reset_service_task); - INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task); /* Setup affinity after service timer setup because add_timer_on * is called in affinity notify. @@ -9512,6 +9558,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) dev_info(&hdev->pdev->dev, "%s driver initialization finished.\n", HCLGE_DRIVER_NAME); + hclge_task_schedule(hdev, round_jiffies_relative(HZ)); + return 0; err_mdiobus_unreg: @@ -9534,7 +9582,7 @@ out: static void hclge_stats_clear(struct hclge_dev *hdev) { - memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats)); + memset(&hdev->mac_stats, 0, sizeof(hdev->mac_stats)); } static int hclge_set_mac_spoofchk(struct hclge_dev *hdev, int vf, bool enable) @@ -9895,7 +9943,6 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) mutex_destroy(&hdev->vport_lock); hclge_uninit_vport_mac_table(hdev); hclge_uninit_vport_vlan_table(hdev); - mutex_destroy(&hdev->vport_cfg_mutex); ae_dev->priv = NULL; } @@ -10611,6 +10658,12 @@ static int hclge_init(void) { pr_info("%s is initializing\n", HCLGE_NAME); + hclge_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, HCLGE_NAME); + if (!hclge_wq) { + pr_err("%s: failed to create workqueue\n", HCLGE_NAME); + return -ENOMEM; + } + hnae3_register_ae_algo(&ae_algo); return 0; @@ -10619,6 +10672,7 @@ static int hclge_init(void) static void hclge_exit(void) { hnae3_unregister_ae_algo(&ae_algo); + destroy_workqueue(hclge_wq); } module_init(hclge_init); module_exit(hclge_exit); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index ebb4c6e9aed3..1c1d6b358897 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -208,13 +208,14 @@ enum HCLGE_DEV_STATE { HCLGE_STATE_NIC_REGISTERED, HCLGE_STATE_ROCE_REGISTERED, HCLGE_STATE_SERVICE_INITED, - HCLGE_STATE_SERVICE_SCHED, HCLGE_STATE_RST_SERVICE_SCHED, HCLGE_STATE_RST_HANDLING, HCLGE_STATE_MBX_SERVICE_SCHED, HCLGE_STATE_MBX_HANDLING, HCLGE_STATE_STATISTICS_UPDATING, HCLGE_STATE_CMD_DISABLE, + HCLGE_STATE_LINK_UPDATING, + HCLGE_STATE_RST_FAIL, HCLGE_STATE_MAX }; @@ -454,11 +455,7 @@ struct hclge_mac_stats { u64 mac_rx_ctrl_pkt_num; }; -#define HCLGE_STATS_TIMER_INTERVAL (60 * 5) -struct hclge_hw_stats { - struct hclge_mac_stats mac_stats; - u32 stats_timer; -}; +#define HCLGE_STATS_TIMER_INTERVAL 300UL struct hclge_vlan_type_cfg { u16 rx_ot_fst_vlan_type; @@ -549,7 +546,7 @@ struct key_info { /* assigned by firmware, the real filter number for each pf may be less */ #define MAX_FD_FILTER_NUM 4096 -#define HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL 5 +#define HCLGE_ARFS_EXPIRE_INTERVAL 5UL enum HCLGE_FD_ACTIVE_RULE_TYPE { HCLGE_FD_RULE_NONE, @@ -712,7 +709,7 @@ struct hclge_dev { struct hnae3_ae_dev *ae_dev; struct hclge_hw hw; struct hclge_misc_vector misc_vector; - struct hclge_hw_stats hw_stats; + struct hclge_mac_stats mac_stats; unsigned long state; unsigned long flr_state; unsigned long last_reset_time; @@ -723,6 +720,7 @@ struct hclge_dev { unsigned long reset_request; /* reset has been requested */ unsigned long reset_pending; /* client rst is pending to be served */ struct hclge_rst_stats rst_stats; + struct semaphore reset_sem; /* protect reset process */ u32 fw_version; u16 num_vmdq_vport; /* Num vmdq vport this PF has set up */ u16 num_tqps; /* Num task queue pairs of this PF */ @@ -774,8 +772,6 @@ struct hclge_dev { unsigned long service_timer_previous; struct timer_list reset_timer; struct delayed_work service_task; - struct work_struct rst_service_task; - struct work_struct mbx_service_task; bool cur_promisc; int num_alloc_vfs; /* Actual number of VFs allocated */ @@ -811,7 +807,8 @@ struct hclge_dev { struct hlist_head fd_rule_list; spinlock_t fd_rule_lock; /* protect fd_rule_list and fd_bmap */ u16 hclge_fd_rule_num; - u16 fd_arfs_expire_timer; + unsigned long serv_processed_cnt; + unsigned long last_serv_processed; unsigned long fd_bmap[BITS_TO_LONGS(MAX_FD_FILTER_NUM)]; enum HCLGE_FD_ACTIVE_RULE_TYPE fd_active_type; u8 fd_en; @@ -825,8 +822,6 @@ struct hclge_dev { u16 share_umv_size; struct mutex umv_mutex; /* protect share_umv_size */ - struct mutex vport_cfg_mutex; /* Protect stored vf table */ - DECLARE_KFIFO(mac_tnl_log, struct hclge_mac_tnl_stats, HCLGE_MAC_TNL_LOG_SIZE); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 0b433ebe6a2d..a3c0822191a9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -86,10 +86,12 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len, int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport) { struct hclge_dev *hdev = vport->back; - enum hnae3_reset_type reset_type; + u16 reset_type; u8 msg_data[2]; u8 dest_vfid; + BUILD_BUG_ON(HNAE3_MAX_RESET > U16_MAX); + dest_vfid = (u8)vport->vport_id; if (hdev->reset_type == HNAE3_FUNC_RESET) @@ -635,7 +637,6 @@ static void hclge_handle_link_change_event(struct hclge_dev *hdev, #define LINK_STATUS_OFFSET 1 #define LINK_FAIL_CODE_OFFSET 2 - clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state); hclge_task_schedule(hdev, 0); if (!req->msg[LINK_STATUS_OFFSET]) @@ -798,13 +799,11 @@ void hclge_mbx_handler(struct hclge_dev *hdev) hclge_get_link_mode(vport, req); break; case HCLGE_MBX_GET_VF_FLR_STATUS: - mutex_lock(&hdev->vport_cfg_mutex); hclge_rm_vport_all_mac_table(vport, true, HCLGE_MAC_ADDR_UC); hclge_rm_vport_all_mac_table(vport, true, HCLGE_MAC_ADDR_MC); hclge_rm_vport_all_vlan_table(vport, true); - mutex_unlock(&hdev->vport_cfg_mutex); break; case HCLGE_MBX_GET_MEDIA_TYPE: ret = hclge_get_vf_media_type(vport, req); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index af2245e3bb95..f38d236ebf4f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -443,7 +443,7 @@ void hclgevf_cmd_uninit(struct hclgevf_dev *hdev) { spin_lock_bh(&hdev->hw.cmq.csq.lock); spin_lock(&hdev->hw.cmq.crq.lock); - clear_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); + set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); hclgevf_cmd_uninit_regs(&hdev->hw); spin_unlock(&hdev->hw.cmq.crq.lock); spin_unlock_bh(&hdev->hw.cmq.csq.lock); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 25d78a5aaa34..b26b8ad170d0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -16,6 +16,8 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev); static struct hnae3_ae_algo ae_algovf; +static struct workqueue_struct *hclgevf_wq; + static const struct pci_device_id ae_algovf_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_VF), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF), 0}, @@ -440,6 +442,9 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state) struct hnae3_client *rclient; struct hnae3_client *client; + if (test_and_set_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state)) + return; + client = handle->client; rclient = hdev->roce_client; @@ -452,6 +457,8 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state) rclient->ops->link_status_change(rhandle, !!link_state); hdev->hw.mac.link = link_state; } + + clear_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state); } static void hclgevf_update_link_mode(struct hclgevf_dev *hdev) @@ -1404,32 +1411,6 @@ static int hclgevf_notify_client(struct hclgevf_dev *hdev, return ret; } -static void hclgevf_flr_done(struct hnae3_ae_dev *ae_dev) -{ - struct hclgevf_dev *hdev = ae_dev->priv; - - set_bit(HNAE3_FLR_DONE, &hdev->flr_state); -} - -static int hclgevf_flr_poll_timeout(struct hclgevf_dev *hdev, - unsigned long delay_us, - unsigned long wait_cnt) -{ - unsigned long cnt = 0; - - while (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state) && - cnt++ < wait_cnt) - usleep_range(delay_us, delay_us * 2); - - if (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state)) { - dev_err(&hdev->pdev->dev, - "flr wait timeout\n"); - return -ETIMEDOUT; - } - - return 0; -} - static int hclgevf_reset_wait(struct hclgevf_dev *hdev) { #define HCLGEVF_RESET_WAIT_US 20000 @@ -1440,11 +1421,7 @@ static int hclgevf_reset_wait(struct hclgevf_dev *hdev) u32 val; int ret; - if (hdev->reset_type == HNAE3_FLR_RESET) - return hclgevf_flr_poll_timeout(hdev, - HCLGEVF_RESET_WAIT_US, - HCLGEVF_RESET_WAIT_CNT); - else if (hdev->reset_type == HNAE3_VF_RESET) + if (hdev->reset_type == HNAE3_VF_RESET) ret = readl_poll_timeout(hdev->hw.io_base + HCLGEVF_VF_RST_ING, val, !(val & HCLGEVF_VF_RST_ING_BIT), @@ -1516,7 +1493,8 @@ static int hclgevf_reset_stack(struct hclgevf_dev *hdev) /* clear handshake status with IMP */ hclgevf_reset_handshake(hdev, false); - return 0; + /* bring up the nic to enable TX/RX again */ + return hclgevf_notify_client(hdev, HNAE3_UP_CLIENT); } static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev) @@ -1525,18 +1503,10 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev) int ret = 0; - switch (hdev->reset_type) { - case HNAE3_VF_FUNC_RESET: + if (hdev->reset_type == HNAE3_VF_FUNC_RESET) { ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL, 0, true, NULL, sizeof(u8)); hdev->rst_stats.vf_func_rst_cnt++; - break; - case HNAE3_FLR_RESET: - set_bit(HNAE3_FLR_DOWN, &hdev->flr_state); - hdev->rst_stats.flr_rst_cnt++; - break; - default: - break; } set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); @@ -1591,11 +1561,12 @@ static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev) set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state); hclgevf_reset_task_schedule(hdev); } else { + set_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state); hclgevf_dump_rst_info(hdev); } } -static int hclgevf_reset(struct hclgevf_dev *hdev) +static int hclgevf_reset_prepare(struct hclgevf_dev *hdev) { struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); int ret; @@ -1605,61 +1576,64 @@ static int hclgevf_reset(struct hclgevf_dev *hdev) */ ae_dev->reset_type = hdev->reset_type; hdev->rst_stats.rst_cnt++; - rtnl_lock(); + rtnl_lock(); /* bring down the nic to stop any ongoing TX/RX */ ret = hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT); - if (ret) - goto err_reset_lock; - rtnl_unlock(); - - ret = hclgevf_reset_prepare_wait(hdev); if (ret) - goto err_reset; + return ret; - /* check if VF could successfully fetch the hardware reset completion - * status from the hardware - */ - ret = hclgevf_reset_wait(hdev); - if (ret) { - /* can't do much in this situation, will disable VF */ - dev_err(&hdev->pdev->dev, - "VF failed(=%d) to fetch H/W reset completion status\n", - ret); - goto err_reset; - } + return hclgevf_reset_prepare_wait(hdev); +} + +static int hclgevf_reset_rebuild(struct hclgevf_dev *hdev) +{ + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + int ret; hdev->rst_stats.hw_rst_done_cnt++; rtnl_lock(); - /* now, re-initialize the nic client and ae device */ ret = hclgevf_reset_stack(hdev); + rtnl_unlock(); if (ret) { dev_err(&hdev->pdev->dev, "failed to reset VF stack\n"); - goto err_reset_lock; + return ret; } - /* bring up the nic to enable TX/RX again */ - ret = hclgevf_notify_client(hdev, HNAE3_UP_CLIENT); - if (ret) - goto err_reset_lock; - - rtnl_unlock(); - hdev->last_reset_time = jiffies; ae_dev->reset_type = HNAE3_NONE_RESET; hdev->rst_stats.rst_done_cnt++; hdev->rst_stats.rst_fail_cnt = 0; + clear_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state); + + return 0; +} + +static void hclgevf_reset(struct hclgevf_dev *hdev) +{ + if (hclgevf_reset_prepare(hdev)) + goto err_reset; + + /* check if VF could successfully fetch the hardware reset completion + * status from the hardware + */ + if (hclgevf_reset_wait(hdev)) { + /* can't do much in this situation, will disable VF */ + dev_err(&hdev->pdev->dev, + "failed to fetch H/W reset completion status\n"); + goto err_reset; + } + + if (hclgevf_reset_rebuild(hdev)) + goto err_reset; + + return; - return ret; -err_reset_lock: - rtnl_unlock(); err_reset: hclgevf_reset_err_handle(hdev); - - return ret; } static enum hnae3_reset_type hclgevf_get_reset_level(struct hclgevf_dev *hdev, @@ -1722,25 +1696,60 @@ static void hclgevf_set_def_reset_request(struct hnae3_ae_dev *ae_dev, set_bit(rst_type, &hdev->default_reset_request); } +static void hclgevf_enable_vector(struct hclgevf_misc_vector *vector, bool en) +{ + writel(en ? 1 : 0, vector->addr); +} + static void hclgevf_flr_prepare(struct hnae3_ae_dev *ae_dev) { -#define HCLGEVF_FLR_WAIT_MS 100 -#define HCLGEVF_FLR_WAIT_CNT 50 +#define HCLGEVF_FLR_RETRY_WAIT_MS 500 +#define HCLGEVF_FLR_RETRY_CNT 5 + + struct hclgevf_dev *hdev = ae_dev->priv; + int retry_cnt = 0; + int ret; + +retry: + down(&hdev->reset_sem); + set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); + hdev->reset_type = HNAE3_FLR_RESET; + ret = hclgevf_reset_prepare(hdev); + if (ret) { + dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n", + ret); + if (hdev->reset_pending || + retry_cnt++ < HCLGEVF_FLR_RETRY_CNT) { + dev_err(&hdev->pdev->dev, + "reset_pending:0x%lx, retry_cnt:%d\n", + hdev->reset_pending, retry_cnt); + clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); + up(&hdev->reset_sem); + msleep(HCLGEVF_FLR_RETRY_WAIT_MS); + goto retry; + } + } + + /* disable misc vector before FLR done */ + hclgevf_enable_vector(&hdev->misc_vector, false); + hdev->rst_stats.flr_rst_cnt++; +} + +static void hclgevf_flr_done(struct hnae3_ae_dev *ae_dev) +{ struct hclgevf_dev *hdev = ae_dev->priv; - int cnt = 0; + int ret; - clear_bit(HNAE3_FLR_DOWN, &hdev->flr_state); - clear_bit(HNAE3_FLR_DONE, &hdev->flr_state); - set_bit(HNAE3_FLR_RESET, &hdev->default_reset_request); - hclgevf_reset_event(hdev->pdev, NULL); + hclgevf_enable_vector(&hdev->misc_vector, true); - while (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state) && - cnt++ < HCLGEVF_FLR_WAIT_CNT) - msleep(HCLGEVF_FLR_WAIT_MS); + ret = hclgevf_reset_rebuild(hdev); + if (ret) + dev_warn(&hdev->pdev->dev, "fail to rebuild, ret=%d\n", + ret); - if (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state)) - dev_err(&hdev->pdev->dev, - "flr wait down timeout: %d\n", cnt); + hdev->reset_type = HNAE3_NONE_RESET; + clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); + up(&hdev->reset_sem); } static u32 hclgevf_get_fw_version(struct hnae3_handle *handle) @@ -1767,62 +1776,37 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev) void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev) { - if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) && - !test_bit(HCLGEVF_STATE_REMOVING, &hdev->state)) { - set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state); - schedule_work(&hdev->rst_service_task); - } + if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) && + !test_and_set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, + &hdev->state)) + mod_delayed_work(hclgevf_wq, &hdev->service_task, 0); } void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev) { - if (!test_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state) && - !test_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) { - set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); - schedule_work(&hdev->mbx_service_task); - } + if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) && + !test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, + &hdev->state)) + mod_delayed_work(hclgevf_wq, &hdev->service_task, 0); } -static void hclgevf_task_schedule(struct hclgevf_dev *hdev) +static void hclgevf_task_schedule(struct hclgevf_dev *hdev, + unsigned long delay) { - if (!test_bit(HCLGEVF_STATE_DOWN, &hdev->state) && - !test_and_set_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state)) - schedule_work(&hdev->service_task); -} - -static void hclgevf_deferred_task_schedule(struct hclgevf_dev *hdev) -{ - /* if we have any pending mailbox event then schedule the mbx task */ - if (hdev->mbx_event_pending) - hclgevf_mbx_task_schedule(hdev); - - if (test_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state)) - hclgevf_reset_task_schedule(hdev); + if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) && + !test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) + mod_delayed_work(hclgevf_wq, &hdev->service_task, delay); } -static void hclgevf_service_timer(struct timer_list *t) -{ - struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer); - - mod_timer(&hdev->service_timer, jiffies + - HCLGEVF_GENERAL_TASK_INTERVAL * HZ); - - hdev->stats_timer++; - hclgevf_task_schedule(hdev); -} - -static void hclgevf_reset_service_task(struct work_struct *work) +static void hclgevf_reset_service_task(struct hclgevf_dev *hdev) { #define HCLGEVF_MAX_RESET_ATTEMPTS_CNT 3 - struct hclgevf_dev *hdev = - container_of(work, struct hclgevf_dev, rst_service_task); - int ret; - - if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + if (!test_and_clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state)) return; - clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state); + down(&hdev->reset_sem); + set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); if (test_and_clear_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state)) { @@ -1836,12 +1820,8 @@ static void hclgevf_reset_service_task(struct work_struct *work) hdev->last_reset_time = jiffies; while ((hdev->reset_type = hclgevf_get_reset_level(hdev, &hdev->reset_pending)) - != HNAE3_NONE_RESET) { - ret = hclgevf_reset(hdev); - if (ret) - dev_err(&hdev->pdev->dev, - "VF stack reset failed %d.\n", ret); - } + != HNAE3_NONE_RESET) + hclgevf_reset(hdev); } else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state)) { /* we could be here when either of below happens: @@ -1883,41 +1863,27 @@ static void hclgevf_reset_service_task(struct work_struct *work) } clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); + up(&hdev->reset_sem); } -static void hclgevf_mailbox_service_task(struct work_struct *work) +static void hclgevf_mailbox_service_task(struct hclgevf_dev *hdev) { - struct hclgevf_dev *hdev; - - hdev = container_of(work, struct hclgevf_dev, mbx_service_task); + if (!test_and_clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state)) + return; if (test_and_set_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) return; - clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); - hclgevf_mbx_async_handler(hdev); clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state); } -static void hclgevf_keep_alive_timer(struct timer_list *t) -{ - struct hclgevf_dev *hdev = from_timer(hdev, t, keep_alive_timer); - - schedule_work(&hdev->keep_alive_task); - mod_timer(&hdev->keep_alive_timer, jiffies + - HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ); -} - -static void hclgevf_keep_alive_task(struct work_struct *work) +static void hclgevf_keep_alive(struct hclgevf_dev *hdev) { - struct hclgevf_dev *hdev; u8 respmsg; int ret; - hdev = container_of(work, struct hclgevf_dev, keep_alive_task); - if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state)) return; @@ -1928,19 +1894,32 @@ static void hclgevf_keep_alive_task(struct work_struct *work) "VF sends keep alive cmd failed(=%d)\n", ret); } -static void hclgevf_service_task(struct work_struct *work) +static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev) { - struct hnae3_handle *handle; - struct hclgevf_dev *hdev; + unsigned long delta = round_jiffies_relative(HZ); + struct hnae3_handle *handle = &hdev->nic; - hdev = container_of(work, struct hclgevf_dev, service_task); - handle = &hdev->nic; + if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) { + delta = jiffies - hdev->last_serv_processed; - if (hdev->stats_timer >= HCLGEVF_STATS_TIMER_INTERVAL) { - hclgevf_tqps_update_stats(handle); - hdev->stats_timer = 0; + if (delta < round_jiffies_relative(HZ)) { + delta = round_jiffies_relative(HZ) - delta; + goto out; + } } + hdev->serv_processed_cnt++; + if (!(hdev->serv_processed_cnt % HCLGEVF_KEEP_ALIVE_TASK_INTERVAL)) + hclgevf_keep_alive(hdev); + + if (test_bit(HCLGEVF_STATE_DOWN, &hdev->state)) { + hdev->last_serv_processed = jiffies; + goto out; + } + + if (!(hdev->serv_processed_cnt % HCLGEVF_STATS_TIMER_INTERVAL)) + hclgevf_tqps_update_stats(handle); + /* request the link status from the PF. PF would be able to tell VF * about such updates in future so we might remove this later */ @@ -1950,9 +1929,27 @@ static void hclgevf_service_task(struct work_struct *work) hclgevf_sync_vlan_filter(hdev); - hclgevf_deferred_task_schedule(hdev); + hdev->last_serv_processed = jiffies; - clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); +out: + hclgevf_task_schedule(hdev, delta); +} + +static void hclgevf_service_task(struct work_struct *work) +{ + struct hclgevf_dev *hdev = container_of(work, struct hclgevf_dev, + service_task.work); + + hclgevf_reset_service_task(hdev); + hclgevf_mailbox_service_task(hdev); + hclgevf_periodic_service_task(hdev); + + /* Handle reset and mbx again in case periodical task delays the + * handling by calling hclgevf_task_schedule() in + * hclgevf_periodic_service_task() + */ + hclgevf_reset_service_task(hdev); + hclgevf_mailbox_service_task(hdev); } static void hclgevf_clear_event_cause(struct hclgevf_dev *hdev, u32 regclr) @@ -2010,11 +2007,6 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev, return HCLGEVF_VECTOR0_EVENT_OTHER; } -static void hclgevf_enable_vector(struct hclgevf_misc_vector *vector, bool en) -{ - writel(en ? 1 : 0, vector->addr); -} - static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) { enum hclgevf_evt_cause event_cause; @@ -2189,16 +2181,31 @@ static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) false); } +static void hclgevf_flush_link_update(struct hclgevf_dev *hdev) +{ +#define HCLGEVF_FLUSH_LINK_TIMEOUT 100000 + + unsigned long last = hdev->serv_processed_cnt; + int i = 0; + + while (test_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state) && + i++ < HCLGEVF_FLUSH_LINK_TIMEOUT && + last == hdev->serv_processed_cnt) + usleep_range(1, 1); +} + static void hclgevf_set_timer_task(struct hnae3_handle *handle, bool enable) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); if (enable) { - mod_timer(&hdev->service_timer, jiffies + HZ); + hclgevf_task_schedule(hdev, 0); } else { - del_timer_sync(&hdev->service_timer); - cancel_work_sync(&hdev->service_task); - clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); + set_bit(HCLGEVF_STATE_DOWN, &hdev->state); + + /* flush memory to make sure DOWN is seen by service task */ + smp_mb__before_atomic(); + hclgevf_flush_link_update(hdev); } } @@ -2245,16 +2252,12 @@ static int hclgevf_set_alive(struct hnae3_handle *handle, bool alive) static int hclgevf_client_start(struct hnae3_handle *handle) { - struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); int ret; ret = hclgevf_set_alive(handle, true); if (ret) return ret; - mod_timer(&hdev->keep_alive_timer, jiffies + - HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ); - return 0; } @@ -2267,27 +2270,18 @@ static void hclgevf_client_stop(struct hnae3_handle *handle) if (ret) dev_warn(&hdev->pdev->dev, "%s failed %d\n", __func__, ret); - - del_timer_sync(&hdev->keep_alive_timer); - cancel_work_sync(&hdev->keep_alive_task); } static void hclgevf_state_init(struct hclgevf_dev *hdev) { - /* setup tasks for the MBX */ - INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task); clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state); + clear_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state); - /* setup tasks for service timer */ - timer_setup(&hdev->service_timer, hclgevf_service_timer, 0); - - INIT_WORK(&hdev->service_task, hclgevf_service_task); - clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); - - INIT_WORK(&hdev->rst_service_task, hclgevf_reset_service_task); + INIT_DELAYED_WORK(&hdev->service_task, hclgevf_service_task); mutex_init(&hdev->mbx_resp.mbx_mutex); + sema_init(&hdev->reset_sem, 1); /* bring the device down */ set_bit(HCLGEVF_STATE_DOWN, &hdev->state); @@ -2298,18 +2292,8 @@ static void hclgevf_state_uninit(struct hclgevf_dev *hdev) set_bit(HCLGEVF_STATE_DOWN, &hdev->state); set_bit(HCLGEVF_STATE_REMOVING, &hdev->state); - if (hdev->keep_alive_timer.function) - del_timer_sync(&hdev->keep_alive_timer); - if (hdev->keep_alive_task.func) - cancel_work_sync(&hdev->keep_alive_task); - if (hdev->service_timer.function) - del_timer_sync(&hdev->service_timer); - if (hdev->service_task.func) - cancel_work_sync(&hdev->service_task); - if (hdev->mbx_service_task.func) - cancel_work_sync(&hdev->mbx_service_task); - if (hdev->rst_service_task.func) - cancel_work_sync(&hdev->rst_service_task); + if (hdev->service_task.work.func) + cancel_delayed_work_sync(&hdev->service_task); mutex_destroy(&hdev->mbx_resp.mbx_mutex); } @@ -2383,8 +2367,10 @@ static int hclgevf_misc_irq_init(struct hclgevf_dev *hdev) hclgevf_get_misc_vector(hdev); + snprintf(hdev->misc_vector.name, HNAE3_INT_NAME_LEN, "%s-misc-%s", + HCLGEVF_NAME, pci_name(hdev->pdev)); ret = request_irq(hdev->misc_vector.vector_irq, hclgevf_misc_irq_handle, - 0, "hclgevf_cmd", hdev); + 0, hdev->misc_vector.name, hdev); if (ret) { dev_err(&hdev->pdev->dev, "VF failed to request misc irq(%d)\n", hdev->misc_vector.vector_irq); @@ -2807,6 +2793,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) dev_info(&hdev->pdev->dev, "finished initializing %s driver\n", HCLGEVF_DRIVER_NAME); + hclgevf_task_schedule(hdev, round_jiffies_relative(HZ)); + return 0; err_config: @@ -2838,7 +2826,6 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev) static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev) { struct pci_dev *pdev = ae_dev->pdev; - struct hclgevf_dev *hdev; int ret; ret = hclgevf_alloc_hdev(ae_dev); @@ -2853,10 +2840,6 @@ static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } - hdev = ae_dev->priv; - timer_setup(&hdev->keep_alive_timer, hclgevf_keep_alive_timer, 0); - INIT_WORK(&hdev->keep_alive_task, hclgevf_keep_alive_task); - return 0; } @@ -3213,6 +3196,12 @@ static int hclgevf_init(void) { pr_info("%s is initializing\n", HCLGEVF_NAME); + hclgevf_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, HCLGEVF_NAME); + if (!hclgevf_wq) { + pr_err("%s: failed to create workqueue\n", HCLGEVF_NAME); + return -ENOMEM; + } + hnae3_register_ae_algo(&ae_algovf); return 0; @@ -3221,6 +3210,7 @@ static int hclgevf_init(void) static void hclgevf_exit(void) { hnae3_unregister_ae_algo(&ae_algovf); + destroy_workqueue(hclgevf_wq); } module_init(hclgevf_init); module_exit(hclgevf_exit); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 2f4c81bf4169..fee8d97f323c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -142,12 +142,13 @@ enum hclgevf_states { HCLGEVF_STATE_REMOVING, HCLGEVF_STATE_NIC_REGISTERED, /* task states */ - HCLGEVF_STATE_SERVICE_SCHED, HCLGEVF_STATE_RST_SERVICE_SCHED, HCLGEVF_STATE_RST_HANDLING, HCLGEVF_STATE_MBX_SERVICE_SCHED, HCLGEVF_STATE_MBX_HANDLING, HCLGEVF_STATE_CMD_DISABLE, + HCLGEVF_STATE_LINK_UPDATING, + HCLGEVF_STATE_RST_FAIL, }; struct hclgevf_mac { @@ -220,6 +221,7 @@ struct hclgevf_rss_cfg { struct hclgevf_misc_vector { u8 __iomem *addr; int vector_irq; + char name[HNAE3_INT_NAME_LEN]; }; struct hclgevf_rst_stats { @@ -251,6 +253,7 @@ struct hclgevf_dev { unsigned long reset_state; /* requested, pending */ struct hclgevf_rst_stats rst_stats; u32 reset_attempts; + struct semaphore reset_sem; /* protect reset process */ u32 fw_version; u16 num_tqps; /* num task queue pairs of this PF */ @@ -283,12 +286,7 @@ struct hclgevf_dev { struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */ struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */ - struct timer_list service_timer; - struct timer_list keep_alive_timer; - struct work_struct service_task; - struct work_struct keep_alive_task; - struct work_struct rst_service_task; - struct work_struct mbx_service_task; + struct delayed_work service_task; struct hclgevf_tqp *htqp; @@ -298,7 +296,8 @@ struct hclgevf_dev { struct hnae3_client *nic_client; struct hnae3_client *roce_client; u32 flag; - u32 stats_timer; + unsigned long serv_processed_cnt; + unsigned long last_serv_processed; }; static inline bool hclgevf_is_reset_pending(struct hclgevf_dev *hdev) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 2411ad270c98..02a14f5e7fe3 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -766,7 +766,7 @@ static void hinic_set_rx_mode(struct net_device *netdev) queue_work(nic_dev->workq, &rx_mode_work->work); } -static void hinic_tx_timeout(struct net_device *netdev) +static void hinic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct hinic_dev *nic_dev = netdev_priv(netdev); diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index 92929750f832..bef676d93339 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -363,7 +363,7 @@ static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev); static irqreturn_t i596_interrupt(int irq, void *dev_id); static int i596_close(struct net_device *dev); static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd); -static void i596_tx_timeout (struct net_device *dev); +static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue); static void print_eth(unsigned char *buf, char *str); static void set_multicast_list(struct net_device *dev); @@ -1019,7 +1019,7 @@ err_irq_dev: return res; } -static void i596_tx_timeout (struct net_device *dev) +static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct i596_private *lp = dev->ml_priv; int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c index bb3b8adbe4f0..a0bfb509e002 100644 --- a/drivers/net/ethernet/i825xx/ether1.c +++ b/drivers/net/ethernet/i825xx/ether1.c @@ -66,7 +66,7 @@ static netdev_tx_t ether1_sendpacket(struct sk_buff *skb, static irqreturn_t ether1_interrupt(int irq, void *dev_id); static int ether1_close(struct net_device *dev); static void ether1_setmulticastlist(struct net_device *dev); -static void ether1_timeout(struct net_device *dev); +static void ether1_timeout(struct net_device *dev, unsigned int txqueue); /* ------------------------------------------------------------------------- */ @@ -650,7 +650,7 @@ ether1_open (struct net_device *dev) } static void -ether1_timeout(struct net_device *dev) +ether1_timeout(struct net_device *dev, unsigned int txqueue) { printk(KERN_WARNING "%s: transmit timeout, network cable problem?\n", dev->name); diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index f9742af7f142..b03757e169e4 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -351,7 +351,7 @@ static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev); static irqreturn_t i596_interrupt(int irq, void *dev_id); static int i596_close(struct net_device *dev); static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd); -static void i596_tx_timeout (struct net_device *dev); +static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue); static void print_eth(unsigned char *buf, char *str); static void set_multicast_list(struct net_device *dev); static inline void ca(struct net_device *dev); @@ -936,7 +936,7 @@ out_remove_rx_bufs: return -EAGAIN; } -static void i596_tx_timeout (struct net_device *dev) +static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct i596_private *lp = netdev_priv(dev); diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c index 1a86184d44c0..4564ee02c95f 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.c +++ b/drivers/net/ethernet/i825xx/sun3_82586.c @@ -125,7 +125,7 @@ static netdev_tx_t sun3_82586_send_packet(struct sk_buff *, struct net_device *); static struct net_device_stats *sun3_82586_get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void sun3_82586_timeout(struct net_device *dev); +static void sun3_82586_timeout(struct net_device *dev, unsigned int txqueue); #if 0 static void sun3_82586_dump(struct net_device *,void *); #endif @@ -965,7 +965,7 @@ static void startrecv586(struct net_device *dev) WAIT_4_SCB_CMD_RUC(); /* wait for accept cmd. (no timeout!!) */ } -static void sun3_82586_timeout(struct net_device *dev) +static void sun3_82586_timeout(struct net_device *dev, unsigned int txqueue) { struct priv *p = netdev_priv(dev); #ifndef NO_NOPCOMMANDS diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 13e30eba5349..0273fb7a9d01 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -2786,7 +2786,7 @@ out: return; } -static void ehea_tx_watchdog(struct net_device *dev) +static void ehea_tx_watchdog(struct net_device *dev, unsigned int txqueue) { struct ehea_port *port = netdev_priv(dev); diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 2e40425d8a34..b7fc17756c51 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -776,7 +776,7 @@ static void emac_reset_work(struct work_struct *work) mutex_unlock(&dev->link_lock); } -static void emac_tx_timeout(struct net_device *ndev) +static void emac_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct emac_instance *dev = netdev_priv(ndev); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 830791ab4619..c75239d8820f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2282,7 +2282,7 @@ err: return -ret; } -static void ibmvnic_tx_timeout(struct net_device *dev) +static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ibmvnic_adapter *adapter = netdev_priv(dev); diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index a65d5a9ba7db..1b8d015ebfb0 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2316,7 +2316,7 @@ static void e100_down(struct nic *nic) e100_rx_clean_list(nic); } -static void e100_tx_timeout(struct net_device *netdev) +static void e100_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct nic *nic = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index aca97b084003..2bced34c19ba 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -134,7 +134,7 @@ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); static void e1000_enter_82542_rst(struct e1000_adapter *adapter); static void e1000_leave_82542_rst(struct e1000_adapter *adapter); -static void e1000_tx_timeout(struct net_device *dev); +static void e1000_tx_timeout(struct net_device *dev, unsigned int txqueue); static void e1000_reset_task(struct work_struct *work); static void e1000_smartspeed(struct e1000_adapter *adapter); static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter, @@ -3488,7 +3488,7 @@ exit: * e1000_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void e1000_tx_timeout(struct net_device *netdev) +static void e1000_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct e1000_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 7c5b18d87b49..db4ea58bac82 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -4721,7 +4721,7 @@ int e1000e_close(struct net_device *netdev) e1000_free_irq(adapter); /* Link status message must follow this format */ - pr_info("%s NIC Link is Down\n", netdev->name); + netdev_info(netdev, "NIC Link is Down\n"); } napi_disable(&adapter->napi); @@ -5071,12 +5071,13 @@ static void e1000_print_link_info(struct e1000_adapter *adapter) u32 ctrl = er32(CTRL); /* Link status message must follow this format for user tools */ - pr_info("%s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", - adapter->netdev->name, adapter->link_speed, - adapter->link_duplex == FULL_DUPLEX ? "Full" : "Half", - (ctrl & E1000_CTRL_TFCE) && (ctrl & E1000_CTRL_RFCE) ? "Rx/Tx" : - (ctrl & E1000_CTRL_RFCE) ? "Rx" : - (ctrl & E1000_CTRL_TFCE) ? "Tx" : "None"); + netdev_info(adapter->netdev, + "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", + adapter->link_speed, + adapter->link_duplex == FULL_DUPLEX ? "Full" : "Half", + (ctrl & E1000_CTRL_TFCE) && (ctrl & E1000_CTRL_RFCE) ? "Rx/Tx" : + (ctrl & E1000_CTRL_RFCE) ? "Rx" : + (ctrl & E1000_CTRL_TFCE) ? "Tx" : "None"); } static bool e1000e_has_link(struct e1000_adapter *adapter) @@ -5319,7 +5320,7 @@ static void e1000_watchdog_task(struct work_struct *work) adapter->link_speed = 0; adapter->link_duplex = 0; /* Link status message must follow this format */ - pr_info("%s NIC Link is Down\n", adapter->netdev->name); + netdev_info(netdev, "NIC Link is Down\n"); netif_carrier_off(netdev); netif_stop_queue(netdev); if (!test_bit(__E1000_DOWN, &adapter->state)) @@ -5940,7 +5941,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, * e1000_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void e1000_tx_timeout(struct net_device *netdev) +static void e1000_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct e1000_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 68baee04dc58..0637ccadee79 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -696,21 +696,24 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) /** * fm10k_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure + * @txqueue: the index of the Tx queue that timed out **/ -static void fm10k_tx_timeout(struct net_device *netdev) +static void fm10k_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct fm10k_intfc *interface = netdev_priv(netdev); + struct fm10k_ring *tx_ring; bool real_tx_hang = false; - int i; - -#define TX_TIMEO_LIMIT 16000 - for (i = 0; i < interface->num_tx_queues; i++) { - struct fm10k_ring *tx_ring = interface->tx_ring[i]; - if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring)) - real_tx_hang = true; + if (txqueue >= interface->num_tx_queues) { + WARN(1, "invalid Tx queue index %d", txqueue); + return; } + tx_ring = interface->tx_ring[txqueue]; + if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring)) + real_tx_hang = true; + +#define TX_TIMEO_LIMIT 16000 if (real_tx_hang) { fm10k_tx_timeout_reset(interface); } else { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2c5af6d4a6b1..8c3e753bfb9d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -301,43 +301,24 @@ void i40e_service_event_schedule(struct i40e_pf *pf) * device is munged, not just the one netdev port, so go for the full * reset. **/ -static void i40e_tx_timeout(struct net_device *netdev) +static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct i40e_ring *tx_ring = NULL; - unsigned int i, hung_queue = 0; + unsigned int i; u32 head, val; pf->tx_timeout_count++; - /* find the stopped queue the same way the stack does */ - for (i = 0; i < netdev->num_tx_queues; i++) { - struct netdev_queue *q; - unsigned long trans_start; - - q = netdev_get_tx_queue(netdev, i); - trans_start = q->trans_start; - if (netif_xmit_stopped(q) && - time_after(jiffies, - (trans_start + netdev->watchdog_timeo))) { - hung_queue = i; - break; - } - } - - if (i == netdev->num_tx_queues) { - netdev_info(netdev, "tx_timeout: no netdev hung queue found\n"); - } else { - /* now that we have an index, find the tx_ring struct */ - for (i = 0; i < vsi->num_queue_pairs; i++) { - if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) { - if (hung_queue == - vsi->tx_rings[i]->queue_index) { - tx_ring = vsi->tx_rings[i]; - break; - } + /* with txqueue index, find the tx_ring struct */ + for (i = 0; i < vsi->num_queue_pairs; i++) { + if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) { + if (txqueue == + vsi->tx_rings[i]->queue_index) { + tx_ring = vsi->tx_rings[i]; + break; } } } @@ -363,14 +344,14 @@ static void i40e_tx_timeout(struct net_device *netdev) val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); netdev_info(netdev, "tx_timeout: VSI_seid: %d, Q %d, NTC: 0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x, INT: 0x%x\n", - vsi->seid, hung_queue, tx_ring->next_to_clean, + vsi->seid, txqueue, tx_ring->next_to_clean, head, tx_ring->next_to_use, readl(tx_ring->tail), val); } pf->tx_timeout_last_recovery = jiffies; - netdev_info(netdev, "tx_timeout recovery level %d, hung_queue %d\n", - pf->tx_timeout_recovery_level, hung_queue); + netdev_info(netdev, "tx_timeout recovery level %d, txqueue %d\n", + pf->tx_timeout_recovery_level, txqueue); switch (pf->tx_timeout_recovery_level) { case 1: diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index f73cd917c44f..42058fad6a3c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -269,7 +269,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring, bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); - xsk_umem_discard_addr(umem); + xsk_umem_release_addr(umem); return true; } @@ -306,7 +306,7 @@ static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring, bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); - xsk_umem_discard_addr_rq(umem); + xsk_umem_release_addr_rq(umem); return true; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 8e16be960e96..62fe56ddcb6e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -159,7 +159,7 @@ void iavf_schedule_reset(struct iavf_adapter *adapter) * iavf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void iavf_tx_timeout(struct net_device *netdev) +static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct iavf_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index f972dce8aebb..cb10abb14e11 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -174,6 +174,8 @@ struct ice_sw { struct ice_pf *pf; u16 sw_id; /* switch ID for this switch */ u16 bridge_mode; /* VEB/VEPA/Port Virtualizer */ + struct ice_vsi *dflt_vsi; /* default VSI for this switch */ + u8 dflt_vsi_ena:1; /* true if above dflt_vsi is enabled */ }; enum ice_state { @@ -275,6 +277,7 @@ struct ice_vsi { u8 current_isup:1; /* Sync 'link up' logging */ u8 stat_offsets_loaded:1; u8 vlan_ena:1; + u16 num_vlan; /* queue information */ u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ @@ -462,12 +465,13 @@ static inline void ice_set_ring_xdp(struct ice_ring *ring) static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring) { struct xdp_umem **umems = ring->vsi->xsk_umems; - int qid = ring->q_index; + u16 qid = ring->q_index; if (ice_ring_is_xdp(ring)) qid -= ring->vsi->num_xdp_txq; - if (!umems || !umems[qid] || !ice_is_xdp_ena_vsi(ring->vsi)) + if (qid >= ring->vsi->num_xsk_umems || !umems || !umems[qid] || + !ice_is_xdp_ena_vsi(ring->vsi)) return NULL; return umems[qid]; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 77d6a0291e97..d8e975cceb21 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -93,7 +93,8 @@ static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena) * @vsi: the VSI being configured * @v_idx: index of the vector in the VSI struct * - * We allocate one q_vector. If allocation fails we return -ENOMEM. + * We allocate one q_vector and set default value for ITR setting associated + * with this q_vector. If allocation fails we return -ENOMEM. */ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx) { @@ -108,6 +109,8 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx) q_vector->vsi = vsi; q_vector->v_idx = v_idx; + q_vector->tx.itr_setting = ICE_DFLT_TX_ITR; + q_vector->rx.itr_setting = ICE_DFLT_RX_ITR; if (vsi->type == ICE_VSI_VF) goto out; /* only set affinity_mask if the CPU is online */ @@ -299,6 +302,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring) if (ring->vsi->type == ICE_VSI_PF) { if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) + /* coverity[check_return] */ xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index); @@ -323,7 +327,9 @@ int ice_setup_rx_ctx(struct ice_ring *ring) dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n", ring->q_index); } else { + ring->zca.free = NULL; if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) + /* coverity[check_return] */ xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index); @@ -674,10 +680,6 @@ void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector) if (q_vector->num_ring_rx) { struct ice_ring_container *rc = &q_vector->rx; - /* if this value is set then don't overwrite with default */ - if (!rc->itr_setting) - rc->itr_setting = ICE_DFLT_RX_ITR; - rc->target_itr = ITR_TO_REG(rc->itr_setting); rc->next_update = jiffies + 1; rc->current_itr = rc->target_itr; @@ -688,10 +690,6 @@ void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector) if (q_vector->num_ring_tx) { struct ice_ring_container *rc = &q_vector->tx; - /* if this value is set then don't overwrite with default */ - if (!rc->itr_setting) - rc->itr_setting = ICE_DFLT_TX_ITR; - rc->target_itr = ITR_TO_REG(rc->itr_setting); rc->next_update = jiffies + 1; rc->current_itr = rc->target_itr; diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index fb1d930470c7..a03b4fdc01e6 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -7,25 +7,6 @@ #define ICE_PF_RESET_WAIT_COUNT 200 -#define ICE_PROG_FLEX_ENTRY(hw, rxdid, mdid, idx) \ - wr32((hw), GLFLXP_RXDID_FLX_WRD_##idx(rxdid), \ - ((ICE_RX_OPC_MDID << \ - GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_S) & \ - GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_M) | \ - (((mdid) << GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_S) & \ - GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_M)) - -#define ICE_PROG_FLG_ENTRY(hw, rxdid, flg_0, flg_1, flg_2, flg_3, idx) \ - wr32((hw), GLFLXP_RXDID_FLAGS(rxdid, idx), \ - (((flg_0) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) & \ - GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) | \ - (((flg_1) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S) & \ - GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M) | \ - (((flg_2) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S) & \ - GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M) | \ - (((flg_3) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S) & \ - GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M)) - /** * ice_set_mac_type - Sets MAC type * @hw: pointer to the HW structure @@ -348,88 +329,6 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, } /** - * ice_init_flex_flags - * @hw: pointer to the hardware structure - * @prof_id: Rx Descriptor Builder profile ID - * - * Function to initialize Rx flex flags - */ -static void ice_init_flex_flags(struct ice_hw *hw, enum ice_rxdid prof_id) -{ - u8 idx = 0; - - /* Flex-flag fields (0-2) are programmed with FLG64 bits with layout: - * flexiflags0[5:0] - TCP flags, is_packet_fragmented, is_packet_UDP_GRE - * flexiflags1[3:0] - Not used for flag programming - * flexiflags2[7:0] - Tunnel and VLAN types - * 2 invalid fields in last index - */ - switch (prof_id) { - /* Rx flex flags are currently programmed for the NIC profiles only. - * Different flag bit programming configurations can be added per - * profile as needed. - */ - case ICE_RXDID_FLEX_NIC: - case ICE_RXDID_FLEX_NIC_2: - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_FRG, - ICE_FLG_UDP_GRE, ICE_FLG_PKT_DSI, - ICE_FLG_FIN, idx++); - /* flex flag 1 is not used for flexi-flag programming, skipping - * these four FLG64 bits. - */ - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_SYN, ICE_FLG_RST, - ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx++); - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_DSI, - ICE_FLG_PKT_DSI, ICE_FLG_EVLAN_x8100, - ICE_FLG_EVLAN_x9100, idx++); - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_VLAN_x8100, - ICE_FLG_TNL_VLAN, ICE_FLG_TNL_MAC, - ICE_FLG_TNL0, idx++); - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_TNL1, ICE_FLG_TNL2, - ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx); - break; - - default: - ice_debug(hw, ICE_DBG_INIT, - "Flag programming for profile ID %d not supported\n", - prof_id); - } -} - -/** - * ice_init_flex_flds - * @hw: pointer to the hardware structure - * @prof_id: Rx Descriptor Builder profile ID - * - * Function to initialize flex descriptors - */ -static void ice_init_flex_flds(struct ice_hw *hw, enum ice_rxdid prof_id) -{ - enum ice_flex_rx_mdid mdid; - - switch (prof_id) { - case ICE_RXDID_FLEX_NIC: - case ICE_RXDID_FLEX_NIC_2: - ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_HASH_LOW, 0); - ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_HASH_HIGH, 1); - ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_FLOW_ID_LOWER, 2); - - mdid = (prof_id == ICE_RXDID_FLEX_NIC_2) ? - ICE_RX_MDID_SRC_VSI : ICE_RX_MDID_FLOW_ID_HIGH; - - ICE_PROG_FLEX_ENTRY(hw, prof_id, mdid, 3); - - ice_init_flex_flags(hw, prof_id); - break; - - default: - ice_debug(hw, ICE_DBG_INIT, - "Field init for profile ID %d not supported\n", - prof_id); - } -} - -/** * ice_init_fltr_mgmt_struct - initializes filter management list and locks * @hw: pointer to the HW struct */ @@ -882,9 +781,6 @@ enum ice_status ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_fltr_mgmt_struct; - - ice_init_flex_flds(hw, ICE_RXDID_FLEX_NIC); - ice_init_flex_flds(hw, ICE_RXDID_FLEX_NIC_2); status = ice_init_hw_tbls(hw); if (status) goto err_unroll_fltr_mgmt_struct; diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index d3d3ec29def9..0664e5b8d130 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -396,6 +396,12 @@ dcb_error: prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW; prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS; memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec)); + /* Coverity warns the return code of ice_pf_dcb_cfg() is not checked + * here as is done for other calls to that function. That check is + * not necessary since this is in this function's error cleanup path. + * Suppress the Coverity warning with the following comment... + */ + /* coverity[check_return] */ ice_pf_dcb_cfg(pf, prev_cfg, false); kfree(prev_cfg); } diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h index f8d5c661d0ba..ce63017c56c7 100644 --- a/drivers/net/ethernet/intel/ice/ice_devids.h +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -11,5 +11,23 @@ #define ICE_DEV_ID_E810C_QSFP 0x1592 /* Intel(R) Ethernet Controller E810-C for SFP */ #define ICE_DEV_ID_E810C_SFP 0x1593 +/* Intel(R) Ethernet Connection E822-C for backplane */ +#define ICE_DEV_ID_E822C_BACKPLANE 0x1890 +/* Intel(R) Ethernet Connection E822-C for QSFP */ +#define ICE_DEV_ID_E822C_QSFP 0x1891 +/* Intel(R) Ethernet Connection E822-C for SFP */ +#define ICE_DEV_ID_E822C_SFP 0x1892 +/* Intel(R) Ethernet Connection E822-C/X557-AT 10GBASE-T */ +#define ICE_DEV_ID_E822C_10G_BASE_T 0x1893 +/* Intel(R) Ethernet Connection E822-C 1GbE */ +#define ICE_DEV_ID_E822C_SGMII 0x1894 +/* Intel(R) Ethernet Connection E822-X for backplane */ +#define ICE_DEV_ID_E822X_BACKPLANE 0x1897 +/* Intel(R) Ethernet Connection E822-L for SFP */ +#define ICE_DEV_ID_E822L_SFP 0x1898 +/* Intel(R) Ethernet Connection E822-L/X557-AT 10GBASE-T */ +#define ICE_DEV_ID_E822L_10G_BASE_T 0x1899 +/* Intel(R) Ethernet Connection E822-L 1GbE */ +#define ICE_DEV_ID_E822L_SGMII 0x189A #endif /* _ICE_DEVIDS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 9ebd93e79aeb..f395457b728f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -283,12 +283,15 @@ out: */ static bool ice_active_vfs(struct ice_pf *pf) { - struct ice_vf *vf = pf->vf; int i; - for (i = 0; i < pf->num_alloc_vfs; i++, vf++) + ice_for_each_vf(pf, i) { + struct ice_vf *vf = &pf->vf[i]; + if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) return true; + } + return false; } @@ -3585,6 +3588,53 @@ ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num) } /** + * ice_is_coalesce_param_invalid - check for unsupported coalesce parameters + * @netdev: pointer to the netdev associated with this query + * @ec: ethtool structure to fill with driver's coalesce settings + * + * Print netdev info if driver doesn't support one of the parameters + * and return error. When any parameters will be implemented, remove only + * this parameter from param array. + */ +static int +ice_is_coalesce_param_invalid(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct ice_ethtool_not_used { + u32 value; + const char *name; + } param[] = { + {ec->stats_block_coalesce_usecs, "stats-block-usecs"}, + {ec->rate_sample_interval, "sample-interval"}, + {ec->pkt_rate_low, "pkt-rate-low"}, + {ec->pkt_rate_high, "pkt-rate-high"}, + {ec->rx_max_coalesced_frames, "rx-frames"}, + {ec->rx_coalesce_usecs_irq, "rx-usecs-irq"}, + {ec->rx_max_coalesced_frames_irq, "rx-frames-irq"}, + {ec->tx_max_coalesced_frames, "tx-frames"}, + {ec->tx_coalesce_usecs_irq, "tx-usecs-irq"}, + {ec->tx_max_coalesced_frames_irq, "tx-frames-irq"}, + {ec->rx_coalesce_usecs_low, "rx-usecs-low"}, + {ec->rx_max_coalesced_frames_low, "rx-frames-low"}, + {ec->tx_coalesce_usecs_low, "tx-usecs-low"}, + {ec->tx_max_coalesced_frames_low, "tx-frames-low"}, + {ec->rx_max_coalesced_frames_high, "rx-frames-high"}, + {ec->tx_max_coalesced_frames_high, "tx-frames-high"} + }; + int i; + + for (i = 0; i < ARRAY_SIZE(param); i++) { + if (param[i].value) { + netdev_info(netdev, "Setting %s not supported\n", + param[i].name); + return -EINVAL; + } + } + + return 0; +} + +/** * __ice_set_coalesce - set ITR/INTRL values for the device * @netdev: pointer to the netdev associated with this query * @ec: ethtool structure to fill with driver's coalesce settings @@ -3600,6 +3650,9 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + if (ice_is_coalesce_param_invalid(netdev, ec)) + return -EINVAL; + if (q_num < 0) { int v_idx; diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index e8f32350fed2..f2cababf2561 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -60,15 +60,6 @@ #define PRTDCB_GENS_DCBX_STATUS_M ICE_M(0x7, 0) #define GL_PREEXT_L2_PMASK0(_i) (0x0020F0FC + ((_i) * 4)) #define GL_PREEXT_L2_PMASK1(_i) (0x0020F108 + ((_i) * 4)) -#define GLFLXP_RXDID_FLAGS(_i, _j) (0x0045D000 + ((_i) * 4 + (_j) * 256)) -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S 0 -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M ICE_M(0x3F, 0) -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S 8 -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M ICE_M(0x3F, 8) -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S 16 -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M ICE_M(0x3F, 16) -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S 24 -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M ICE_M(0x3F, 24) #define GLFLXP_RXDID_FLX_WRD_0(_i) (0x0045c800 + ((_i) * 4)) #define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S 0 #define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M ICE_M(0xFF, 0) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index e7449248fab4..4cfad81ba496 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -817,12 +817,23 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) ctxt->info.valid_sections |= cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); - /* Enable MAC Antispoof with new VSI being initialized or updated */ - if (vsi->type == ICE_VSI_VF && pf->vf[vsi->vf_id].spoofchk) { + /* enable/disable MAC and VLAN anti-spoof when spoofchk is on/off + * respectively + */ + if (vsi->type == ICE_VSI_VF) { ctxt->info.valid_sections |= cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); - ctxt->info.sec_flags |= - ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF; + if (pf->vf[vsi->vf_id].spoofchk) { + ctxt->info.sec_flags |= + ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF | + (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << + ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); + } else { + ctxt->info.sec_flags &= + ~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF | + (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << + ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S)); + } } /* Allow control frames out of main VSI */ @@ -1636,22 +1647,14 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc) ctxt->info = vsi->info; - if (ena) { - ctxt->info.sec_flags |= - ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << - ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S; + if (ena) ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; - } else { - ctxt->info.sec_flags &= - ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << - ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); + else ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; - } if (!vlan_promisc) ctxt->info.valid_sections = - cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID | - ICE_AQ_VSI_PROP_SW_VALID); + cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID); status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL); if (status) { @@ -1661,7 +1664,6 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc) goto err_out; } - vsi->info.sec_flags = ctxt->info.sec_flags; vsi->info.sw_flags2 = ctxt->info.sw_flags2; kfree(ctxt); @@ -2402,6 +2404,97 @@ int ice_vsi_release(struct ice_vsi *vsi) } /** + * ice_vsi_rebuild_update_coalesce - set coalesce for a q_vector + * @q_vector: pointer to q_vector which is being updated + * @coalesce: pointer to array of struct with stored coalesce + * + * Set coalesce param in q_vector and update these parameters in HW. + */ +static void +ice_vsi_rebuild_update_coalesce(struct ice_q_vector *q_vector, + struct ice_coalesce_stored *coalesce) +{ + struct ice_ring_container *rx_rc = &q_vector->rx; + struct ice_ring_container *tx_rc = &q_vector->tx; + struct ice_hw *hw = &q_vector->vsi->back->hw; + + tx_rc->itr_setting = coalesce->itr_tx; + rx_rc->itr_setting = coalesce->itr_rx; + + /* dynamic ITR values will be updated during Tx/Rx */ + if (!ITR_IS_DYNAMIC(tx_rc->itr_setting)) + wr32(hw, GLINT_ITR(tx_rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(tx_rc->itr_setting) >> + ICE_ITR_GRAN_S); + if (!ITR_IS_DYNAMIC(rx_rc->itr_setting)) + wr32(hw, GLINT_ITR(rx_rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(rx_rc->itr_setting) >> + ICE_ITR_GRAN_S); + + q_vector->intrl = coalesce->intrl; + wr32(hw, GLINT_RATE(q_vector->reg_idx), + ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); +} + +/** + * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors + * @vsi: VSI connected with q_vectors + * @coalesce: array of struct with stored coalesce + * + * Returns array size. + */ +static int +ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi, + struct ice_coalesce_stored *coalesce) +{ + int i; + + ice_for_each_q_vector(vsi, i) { + struct ice_q_vector *q_vector = vsi->q_vectors[i]; + + coalesce[i].itr_tx = q_vector->tx.itr_setting; + coalesce[i].itr_rx = q_vector->rx.itr_setting; + coalesce[i].intrl = q_vector->intrl; + } + + return vsi->num_q_vectors; +} + +/** + * ice_vsi_rebuild_set_coalesce - set coalesce from earlier saved arrays + * @vsi: VSI connected with q_vectors + * @coalesce: pointer to array of struct with stored coalesce + * @size: size of coalesce array + * + * Before this function, ice_vsi_rebuild_get_coalesce should be called to save + * ITR params in arrays. If size is 0 or coalesce wasn't stored set coalesce + * to default value. + */ +static void +ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, + struct ice_coalesce_stored *coalesce, int size) +{ + int i; + + if ((size && !coalesce) || !vsi) + return; + + for (i = 0; i < size && i < vsi->num_q_vectors; i++) + ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i], + &coalesce[i]); + + for (; i < vsi->num_q_vectors; i++) { + struct ice_coalesce_stored coalesce_dflt = { + .itr_tx = ICE_DFLT_TX_ITR, + .itr_rx = ICE_DFLT_RX_ITR, + .intrl = 0 + }; + ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i], + &coalesce_dflt); + } +} + +/** * ice_vsi_rebuild - Rebuild VSI after reset * @vsi: VSI to be rebuild * @init_vsi: is this an initialization or a reconfigure of the VSI @@ -2411,6 +2504,8 @@ int ice_vsi_release(struct ice_vsi *vsi) int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + struct ice_coalesce_stored *coalesce; + int prev_num_q_vectors = 0; struct ice_vf *vf = NULL; enum ice_status status; struct ice_pf *pf; @@ -2423,6 +2518,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) if (vsi->type == ICE_VSI_VF) vf = &pf->vf[vsi->vf_id]; + coalesce = kcalloc(vsi->num_q_vectors, + sizeof(struct ice_coalesce_stored), GFP_KERNEL); + if (coalesce) + prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, + coalesce); ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ice_vsi_free_q_vectors(vsi); @@ -2535,6 +2635,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) return ice_schedule_reset(pf, ICE_RESET_PFR); } } + ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors); + kfree(coalesce); + return 0; err_vectors: @@ -2549,6 +2652,7 @@ err_rings: err_vsi: ice_vsi_clear(vsi); set_bit(__ICE_RESET_FAILED, pf->state); + kfree(coalesce); return ret; } @@ -2740,3 +2844,121 @@ cfg_mac_fltr_exit: ice_free_fltr_list(&vsi->back->pdev->dev, &tmp_add_list); return status; } + +/** + * ice_is_dflt_vsi_in_use - check if the default forwarding VSI is being used + * @sw: switch to check if its default forwarding VSI is free + * + * Return true if the default forwarding VSI is already being used, else returns + * false signalling that it's available to use. + */ +bool ice_is_dflt_vsi_in_use(struct ice_sw *sw) +{ + return (sw->dflt_vsi && sw->dflt_vsi_ena); +} + +/** + * ice_is_vsi_dflt_vsi - check if the VSI passed in is the default VSI + * @sw: switch for the default forwarding VSI to compare against + * @vsi: VSI to compare against default forwarding VSI + * + * If this VSI passed in is the default forwarding VSI then return true, else + * return false + */ +bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi) +{ + return (sw->dflt_vsi == vsi && sw->dflt_vsi_ena); +} + +/** + * ice_set_dflt_vsi - set the default forwarding VSI + * @sw: switch used to assign the default forwarding VSI + * @vsi: VSI getting set as the default forwarding VSI on the switch + * + * If the VSI passed in is already the default VSI and it's enabled just return + * success. + * + * If there is already a default VSI on the switch and it's enabled then return + * -EEXIST since there can only be one default VSI per switch. + * + * Otherwise try to set the VSI passed in as the switch's default VSI and + * return the result. + */ +int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi) +{ + enum ice_status status; + struct device *dev; + + if (!sw || !vsi) + return -EINVAL; + + dev = ice_pf_to_dev(vsi->back); + + /* the VSI passed in is already the default VSI */ + if (ice_is_vsi_dflt_vsi(sw, vsi)) { + dev_dbg(dev, "VSI %d passed in is already the default forwarding VSI, nothing to do\n", + vsi->vsi_num); + return 0; + } + + /* another VSI is already the default VSI for this switch */ + if (ice_is_dflt_vsi_in_use(sw)) { + dev_err(dev, + "Default forwarding VSI %d already in use, disable it and try again\n", + sw->dflt_vsi->vsi_num); + return -EEXIST; + } + + status = ice_cfg_dflt_vsi(&vsi->back->hw, vsi->idx, true, ICE_FLTR_RX); + if (status) { + dev_err(dev, + "Failed to set VSI %d as the default forwarding VSI, error %d\n", + vsi->vsi_num, status); + return -EIO; + } + + sw->dflt_vsi = vsi; + sw->dflt_vsi_ena = true; + + return 0; +} + +/** + * ice_clear_dflt_vsi - clear the default forwarding VSI + * @sw: switch used to clear the default VSI + * + * If the switch has no default VSI or it's not enabled then return error. + * + * Otherwise try to clear the default VSI and return the result. + */ +int ice_clear_dflt_vsi(struct ice_sw *sw) +{ + struct ice_vsi *dflt_vsi; + enum ice_status status; + struct device *dev; + + if (!sw) + return -EINVAL; + + dev = ice_pf_to_dev(sw->pf); + + dflt_vsi = sw->dflt_vsi; + + /* there is no default VSI configured */ + if (!ice_is_dflt_vsi_in_use(sw)) + return -ENODEV; + + status = ice_cfg_dflt_vsi(&dflt_vsi->back->hw, dflt_vsi->idx, false, + ICE_FLTR_RX); + if (status) { + dev_err(dev, + "Failed to clear the default forwarding VSI %d, error %d\n", + dflt_vsi->vsi_num, status); + return -EIO; + } + + sw->dflt_vsi = NULL; + sw->dflt_vsi_ena = false; + + return 0; +} diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 6e31e30aba39..68fd0d4505c2 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -103,4 +103,12 @@ enum ice_status ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); bool ice_is_safe_mode(struct ice_pf *pf); + +bool ice_is_dflt_vsi_in_use(struct ice_sw *sw); + +bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); + +int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); + +int ice_clear_dflt_vsi(struct ice_sw *sw); #endif /* !_ICE_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 69bff085acf7..eb9d00608e9a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -379,25 +379,29 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); if (vsi->current_netdev_flags & IFF_PROMISC) { /* Apply Rx filter rule to get traffic from wire */ - status = ice_cfg_dflt_vsi(hw, vsi->idx, true, - ICE_FLTR_RX); - if (status) { - netdev_err(netdev, "Error setting default VSI %i Rx rule\n", - vsi->vsi_num); - vsi->current_netdev_flags &= ~IFF_PROMISC; - err = -EIO; - goto out_promisc; + if (!ice_is_dflt_vsi_in_use(pf->first_sw)) { + err = ice_set_dflt_vsi(pf->first_sw, vsi); + if (err && err != -EEXIST) { + netdev_err(netdev, + "Error %d setting default VSI %i Rx rule\n", + err, vsi->vsi_num); + vsi->current_netdev_flags &= + ~IFF_PROMISC; + goto out_promisc; + } } } else { /* Clear Rx filter to remove traffic from wire */ - status = ice_cfg_dflt_vsi(hw, vsi->idx, false, - ICE_FLTR_RX); - if (status) { - netdev_err(netdev, "Error clearing default VSI %i Rx rule\n", - vsi->vsi_num); - vsi->current_netdev_flags |= IFF_PROMISC; - err = -EIO; - goto out_promisc; + if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi)) { + err = ice_clear_dflt_vsi(pf->first_sw); + if (err) { + netdev_err(netdev, + "Error %d clearing default VSI %i Rx rule\n", + err, vsi->vsi_num); + vsi->current_netdev_flags |= + IFF_PROMISC; + goto out_promisc; + } } } } @@ -472,7 +476,7 @@ ice_prepare_for_reset(struct ice_pf *pf) ice_vc_notify_reset(pf); /* Disable VFs until reset is completed */ - for (i = 0; i < pf->num_alloc_vfs; i++) + ice_for_each_vf(pf, i) ice_set_vf_state_qs_dis(&pf->vf[i]); /* clear SW filtering DB */ @@ -840,8 +844,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, ice_vsi_link_event(vsi, link_up); ice_print_link_msg(vsi, link_up); - if (pf->num_alloc_vfs) - ice_vc_notify_link_state(pf); + ice_vc_notify_link_state(pf); return result; } @@ -1291,7 +1294,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) } /* check to see if one of the VFs caused the MDD */ - for (i = 0; i < pf->num_alloc_vfs; i++) { + ice_for_each_vf(pf, i) { struct ice_vf *vf = &pf->vf[i]; bool vf_mdd_detected = false; @@ -2330,7 +2333,8 @@ static void ice_set_netdev_features(struct net_device *netdev) NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; - tso_features = NETIF_F_TSO; + tso_features = NETIF_F_TSO | + NETIF_F_GSO_UDP_L4; /* set features that user can change */ netdev->hw_features = dflt_features | csumo_features | @@ -3568,6 +3572,15 @@ static const struct pci_device_id ice_pci_tbl[] = { { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822X_BACKPLANE), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII), 0 }, /* required last entry */ { 0, } }; @@ -4670,6 +4683,13 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) goto err_init_ctrlq; } + if (pf->first_sw->dflt_vsi_ena) + dev_info(dev, + "Clearing default VSI, re-enable after reset completes\n"); + /* clear the default VSI configuration if it exists */ + pf->first_sw->dflt_vsi = NULL; + pf->first_sw->dflt_vsi_ena = false; + ice_clear_pxe_mode(hw); ret = ice_get_caps(hw); @@ -4825,7 +4845,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) } } - netdev_info(netdev, "changed MTU to %d\n", new_mtu); + netdev_dbg(netdev, "changed MTU to %d\n", new_mtu); return 0; } @@ -5060,42 +5080,23 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, * ice_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void ice_tx_timeout(struct net_device *netdev) +static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_ring *tx_ring = NULL; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; - int hung_queue = -1; u32 i; pf->tx_timeout_count++; - /* find the stopped queue the same way dev_watchdog() does */ - for (i = 0; i < netdev->num_tx_queues; i++) { - unsigned long trans_start; - struct netdev_queue *q; - - q = netdev_get_tx_queue(netdev, i); - trans_start = q->trans_start; - if (netif_xmit_stopped(q) && - time_after(jiffies, - trans_start + netdev->watchdog_timeo)) { - hung_queue = i; - break; - } - } - - if (i == netdev->num_tx_queues) - netdev_info(netdev, "tx_timeout: no netdev hung queue found\n"); - else - /* now that we have an index, find the tx_ring struct */ - for (i = 0; i < vsi->num_txq; i++) - if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) - if (hung_queue == vsi->tx_rings[i]->q_index) { - tx_ring = vsi->tx_rings[i]; - break; - } + /* now that we have an index, find the tx_ring struct */ + for (i = 0; i < vsi->num_txq; i++) + if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) + if (txqueue == vsi->tx_rings[i]->q_index) { + tx_ring = vsi->tx_rings[i]; + break; + } /* Reset recovery level if enough time has elapsed after last timeout. * Also ensure no new reset action happens before next timeout period. @@ -5110,19 +5111,19 @@ static void ice_tx_timeout(struct net_device *netdev) struct ice_hw *hw = &pf->hw; u32 head, val = 0; - head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[hung_queue])) & + head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])) & QTX_COMM_HEAD_HEAD_M) >> QTX_COMM_HEAD_HEAD_S; /* Read interrupt register */ val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %d, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n", - vsi->vsi_num, hung_queue, tx_ring->next_to_clean, + vsi->vsi_num, txqueue, tx_ring->next_to_clean, head, tx_ring->next_to_use, val); } pf->tx_timeout_last_recovery = jiffies; - netdev_info(netdev, "tx_timeout recovery level %d, hung_queue %d\n", - pf->tx_timeout_recovery_level, hung_queue); + netdev_info(netdev, "tx_timeout recovery level %d, txqueue %d\n", + pf->tx_timeout_recovery_level, txqueue); switch (pf->tx_timeout_recovery_level) { case 1: diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 57c73f613f32..7525ac50742e 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -289,6 +289,18 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) nvm->eetrack = (eetrack_hi << 16) | eetrack_lo; + /* the following devices do not have boot_cfg_tlv yet */ + if (hw->device_id == ICE_DEV_ID_E822C_BACKPLANE || + hw->device_id == ICE_DEV_ID_E822C_QSFP || + hw->device_id == ICE_DEV_ID_E822C_10G_BASE_T || + hw->device_id == ICE_DEV_ID_E822C_SGMII || + hw->device_id == ICE_DEV_ID_E822C_SFP || + hw->device_id == ICE_DEV_ID_E822X_BACKPLANE || + hw->device_id == ICE_DEV_ID_E822L_SFP || + hw->device_id == ICE_DEV_ID_E822L_10G_BASE_T || + hw->device_id == ICE_DEV_ID_E822L_SGMII) + return status; + status = ice_get_pfa_module_tlv(hw, &boot_cfg_tlv, &boot_cfg_tlv_len, ICE_SR_BOOT_CFG_PTR); if (status) { diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 2c212f64d99f..fd17ace6b226 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1071,13 +1071,16 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) ice_put_rx_buf(rx_ring, rx_buf); continue; construct_skb: - if (skb) + if (skb) { ice_add_rx_frag(rx_ring, rx_buf, skb, size); - else if (ice_ring_uses_build_skb(rx_ring)) - skb = ice_build_skb(rx_ring, rx_buf, &xdp); - else + } else if (likely(xdp.data)) { + if (ice_ring_uses_build_skb(rx_ring)) + skb = ice_build_skb(rx_ring, rx_buf, &xdp); + else + skb = ice_construct_skb(rx_ring, rx_buf, &xdp); + } else { skb = ice_construct_skb(rx_ring, rx_buf, &xdp); - + } /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->rx_stats.alloc_buf_failed++; @@ -1925,6 +1928,7 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) } ip; union { struct tcphdr *tcp; + struct udphdr *udp; unsigned char *hdr; } l4; u64 cd_mss, cd_tso_len; @@ -1958,10 +1962,18 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) /* remove payload length from checksum */ paylen = skb->len - l4_start; - csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); - /* compute length of segmentation header */ - off->header_len = (l4.tcp->doff * 4) + l4_start; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); + /* compute length of UDP segmentation header */ + off->header_len = sizeof(l4.udp) + l4_start; + } else { + csum_replace_by_diff(&l4.tcp->check, + (__force __wsum)htonl(paylen)); + /* compute length of TCP segmentation header */ + off->header_len = (l4.tcp->doff * 4) + l4_start; + } /* update gso_segs and bytecount */ first->gso_segs = skb_shinfo(skb)->gso_segs; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index a84cc0e6dd27..a86270696df1 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -341,6 +341,12 @@ struct ice_ring_container { u16 itr_setting; }; +struct ice_coalesce_stored { + u16 itr_tx; + u16 itr_rx; + u8 intrl; +}; + /* iterator for handling rings in ring container */ #define ice_for_each_ring(pos, head) \ for (pos = (head).ring; pos; pos = pos->next) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index edb374296d1f..82b1e7a4cb92 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -35,37 +35,6 @@ static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf) } /** - * ice_err_to_virt err - translate errors for VF return code - * @ice_err: error return code - */ -static enum virtchnl_status_code ice_err_to_virt_err(enum ice_status ice_err) -{ - switch (ice_err) { - case ICE_SUCCESS: - return VIRTCHNL_STATUS_SUCCESS; - case ICE_ERR_BAD_PTR: - case ICE_ERR_INVAL_SIZE: - case ICE_ERR_DEVICE_NOT_SUPPORTED: - case ICE_ERR_PARAM: - case ICE_ERR_CFG: - return VIRTCHNL_STATUS_ERR_PARAM; - case ICE_ERR_NO_MEMORY: - return VIRTCHNL_STATUS_ERR_NO_MEMORY; - case ICE_ERR_NOT_READY: - case ICE_ERR_RESET_FAILED: - case ICE_ERR_FW_API_VER: - case ICE_ERR_AQ_ERROR: - case ICE_ERR_AQ_TIMEOUT: - case ICE_ERR_AQ_FULL: - case ICE_ERR_AQ_NO_WORK: - case ICE_ERR_AQ_EMPTY: - return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; - default: - return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; - } -} - -/** * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF * @pf: pointer to the PF structure * @v_opcode: operation code @@ -78,10 +47,11 @@ ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode, enum virtchnl_status_code v_retval, u8 *msg, u16 msglen) { struct ice_hw *hw = &pf->hw; - struct ice_vf *vf = pf->vf; int i; - for (i = 0; i < pf->num_alloc_vfs; i++, vf++) { + ice_for_each_vf(pf, i) { + struct ice_vf *vf = &pf->vf[i]; + /* Not all vfs are enabled so skip the ones that are not */ if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) && !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) @@ -121,26 +91,6 @@ ice_set_pfe_link(struct ice_vf *vf, struct virtchnl_pf_event *pfe, } /** - * ice_set_pfe_link_forced - Force the virtchnl_pf_event link speed/status - * @vf: pointer to the VF structure - * @pfe: pointer to the virtchnl_pf_event to set link speed/status for - * @link_up: whether or not to set the link up/down - */ -static void -ice_set_pfe_link_forced(struct ice_vf *vf, struct virtchnl_pf_event *pfe, - bool link_up) -{ - u16 link_speed; - - if (link_up) - link_speed = ICE_AQ_LINK_SPEED_100GB; - else - link_speed = ICE_AQ_LINK_SPEED_UNKNOWN; - - ice_set_pfe_link(vf, pfe, link_speed, link_up); -} - -/** * ice_vc_notify_vf_link_state - Inform a VF of link status * @vf: pointer to the VF structure * @@ -160,13 +110,17 @@ static void ice_vc_notify_vf_link_state(struct ice_vf *vf) pfe.severity = PF_EVENT_SEVERITY_INFO; /* Always report link is down if the VF queues aren't enabled */ - if (!vf->num_qs_ena) + if (!vf->num_qs_ena) { ice_set_pfe_link(vf, &pfe, ICE_AQ_LINK_SPEED_UNKNOWN, false); - else if (vf->link_forced) - ice_set_pfe_link_forced(vf, &pfe, vf->link_up); - else - ice_set_pfe_link(vf, &pfe, ls->link_speed, ls->link_info & - ICE_AQ_LINK_UP); + } else if (vf->link_forced) { + u16 link_speed = vf->link_up ? + ls->link_speed : ICE_AQ_LINK_SPEED_UNKNOWN; + + ice_set_pfe_link(vf, &pfe, link_speed, vf->link_up); + } else { + ice_set_pfe_link(vf, &pfe, ls->link_speed, + ls->link_info & ICE_AQ_LINK_UP); + } ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT, VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, @@ -331,7 +285,7 @@ void ice_free_vfs(struct ice_pf *pf) usleep_range(1000, 2000); /* Avoid wait time by stopping all VFs at the same time */ - for (i = 0; i < pf->num_alloc_vfs; i++) + ice_for_each_vf(pf, i) if (test_bit(ICE_VF_STATE_QS_ENA, pf->vf[i].vf_states)) ice_dis_vf_qs(&pf->vf[i]); @@ -991,10 +945,17 @@ static void ice_cleanup_and_realloc_vf(struct ice_vf *vf) /* reallocate VF resources to finish resetting the VSI state */ if (!ice_alloc_vf_res(vf)) { + struct ice_vsi *vsi; + ice_ena_vf_mappings(vf); set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states); clear_bit(ICE_VF_STATE_DIS, vf->vf_states); - vf->num_vlan = 0; + + vsi = pf->vsi[vf->lan_vsi_idx]; + if (ice_vsi_add_vlan(vsi, 0)) + dev_warn(ice_pf_to_dev(pf), + "Failed to add VLAN 0 filter for VF %d, MDD events will trigger. Reset the VF, disable spoofchk, or enable 8021q module on the guest", + vf->vf_id); } /* Tell the VF driver the reset is done. This needs to be done only @@ -1023,7 +984,7 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m, struct ice_hw *hw; hw = &pf->hw; - if (vf->num_vlan) { + if (vsi->num_vlan) { status = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_m, rm_promisc); } else if (vf->port_vlan_id) { @@ -1070,7 +1031,7 @@ static bool ice_config_res_vfs(struct ice_pf *pf) ice_irq_dynamic_ena(hw, NULL, NULL); /* Finish resetting each VF and allocate resources */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + ice_for_each_vf(pf, v) { struct ice_vf *vf = &pf->vf[v]; vf->num_vf_qs = pf->num_vf_qps; @@ -1113,10 +1074,10 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) return false; /* Begin reset on all VFs at once */ - for (v = 0; v < pf->num_alloc_vfs; v++) + ice_for_each_vf(pf, v) ice_trigger_vf_reset(&pf->vf[v], is_vflr, true); - for (v = 0; v < pf->num_alloc_vfs; v++) { + ice_for_each_vf(pf, v) { struct ice_vsi *vsi; vf = &pf->vf[v]; @@ -1161,7 +1122,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) dev_warn(dev, "VF reset check timeout\n"); /* free VF resources to begin resetting the VSI state */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + ice_for_each_vf(pf, v) { vf = &pf->vf[v]; ice_free_vf_res(vf); @@ -1273,7 +1234,7 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) */ if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) { - if (vf->port_vlan_id || vf->num_vlan) + if (vf->port_vlan_id || vsi->num_vlan) promisc_m = ICE_UCAST_VLAN_PROMISC_BITS; else promisc_m = ICE_UCAST_PROMISC_BITS; @@ -1301,7 +1262,7 @@ void ice_vc_notify_link_state(struct ice_pf *pf) { int i; - for (i = 0; i < pf->num_alloc_vfs; i++) + ice_for_each_vf(pf, i) ice_vc_notify_vf_link_state(&pf->vf[i]); } @@ -1385,9 +1346,10 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs) goto err_pci_disable_sriov; } pf->vf = vfs; + pf->num_alloc_vfs = num_alloc_vfs; /* apply default profile */ - for (i = 0; i < num_alloc_vfs; i++) { + ice_for_each_vf(pf, i) { vfs[i].pf = pf; vfs[i].vf_sw_id = pf->first_sw; vfs[i].vf_id = i; @@ -1396,7 +1358,6 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs) set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vfs[i].vf_caps); vfs[i].spoofchk = true; } - pf->num_alloc_vfs = num_alloc_vfs; /* VF resources get allocated with initialization */ if (!ice_config_res_vfs(pf)) { @@ -1535,7 +1496,7 @@ void ice_process_vflr_event(struct ice_pf *pf) !pf->num_alloc_vfs) return; - for (vf_id = 0; vf_id < pf->num_alloc_vfs; vf_id++) { + ice_for_each_vf(pf, vf_id) { struct ice_vf *vf = &pf->vf[vf_id]; u32 reg_idx, bit_idx; @@ -1918,6 +1879,89 @@ error_param: } /** + * ice_set_vf_spoofchk + * @netdev: network interface device structure + * @vf_id: VF identifier + * @ena: flag to enable or disable feature + * + * Enable or disable VF spoof checking + */ +int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + struct ice_vsi_ctx *ctx; + struct ice_vsi *vf_vsi; + enum ice_status status; + struct device *dev; + struct ice_vf *vf; + int ret = 0; + + dev = ice_pf_to_dev(pf); + if (ice_validate_vf_id(pf, vf_id)) + return -EINVAL; + + vf = &pf->vf[vf_id]; + + if (ice_check_vf_init(pf, vf)) + return -EBUSY; + + vf_vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vf_vsi) { + netdev_err(netdev, "VSI %d for VF %d is null\n", + vf->lan_vsi_idx, vf->vf_id); + return -EINVAL; + } + + if (vf_vsi->type != ICE_VSI_VF) { + netdev_err(netdev, + "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n", + vf_vsi->type, vf_vsi->vsi_num, vf->vf_id); + return -ENODEV; + } + + if (ena == vf->spoofchk) { + dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF"); + return 0; + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->info.sec_flags = vf_vsi->info.sec_flags; + ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); + if (ena) { + ctx->info.sec_flags |= + ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF | + (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << + ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); + } else { + ctx->info.sec_flags &= + ~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF | + (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << + ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S)); + } + + status = ice_update_vsi(&pf->hw, vf_vsi->idx, ctx, NULL); + if (status) { + dev_err(dev, + "Failed to %sable spoofchk on VF %d VSI %d\n error %d", + ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num, status); + ret = -EIO; + goto out; + } + + /* only update spoofchk state and VSI context on success */ + vf_vsi->info.sec_flags = ctx->info.sec_flags; + vf->spoofchk = ena; + +out: + kfree(ctx); + return ret; +} + +/** * ice_vc_get_stats_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -2409,6 +2453,83 @@ static bool ice_can_vf_change_mac(struct ice_vf *vf) } /** + * ice_vc_add_mac_addr - attempt to add the MAC address passed in + * @vf: pointer to the VF info + * @vsi: pointer to the VF's VSI + * @mac_addr: MAC address to add + */ +static int +ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + enum ice_status status; + + /* default unicast MAC already added */ + if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr)) + return 0; + + if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) { + dev_err(dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n"); + return -EPERM; + } + + status = ice_vsi_cfg_mac_fltr(vsi, mac_addr, true); + if (status == ICE_ERR_ALREADY_EXISTS) { + dev_err(dev, "MAC %pM already exists for VF %d\n", mac_addr, + vf->vf_id); + return -EEXIST; + } else if (status) { + dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %d\n", + mac_addr, vf->vf_id, status); + return -EIO; + } + + /* only set dflt_lan_addr once */ + if (is_zero_ether_addr(vf->dflt_lan_addr.addr) && + is_unicast_ether_addr(mac_addr)) + ether_addr_copy(vf->dflt_lan_addr.addr, mac_addr); + + vf->num_mac++; + + return 0; +} + +/** + * ice_vc_del_mac_addr - attempt to delete the MAC address passed in + * @vf: pointer to the VF info + * @vsi: pointer to the VF's VSI + * @mac_addr: MAC address to delete + */ +static int +ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + enum ice_status status; + + if (!ice_can_vf_change_mac(vf) && + ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr)) + return 0; + + status = ice_vsi_cfg_mac_fltr(vsi, mac_addr, false); + if (status == ICE_ERR_DOES_NOT_EXIST) { + dev_err(dev, "MAC %pM does not exist for VF %d\n", mac_addr, + vf->vf_id); + return -ENOENT; + } else if (status) { + dev_err(dev, "Failed to delete MAC %pM for VF %d, error %d\n", + mac_addr, vf->vf_id, status); + return -EIO; + } + + if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr)) + eth_zero_addr(vf->dflt_lan_addr.addr); + + vf->num_mac--; + + return 0; +} + +/** * ice_vc_handle_mac_addr_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -2419,23 +2540,23 @@ static bool ice_can_vf_change_mac(struct ice_vf *vf) static int ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) { + int (*ice_vc_cfg_mac) + (struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr); enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_ether_addr_list *al = (struct virtchnl_ether_addr_list *)msg; struct ice_pf *pf = vf->pf; enum virtchnl_ops vc_op; - enum ice_status status; struct ice_vsi *vsi; - struct device *dev; - int mac_count = 0; int i; - dev = ice_pf_to_dev(pf); - - if (set) + if (set) { vc_op = VIRTCHNL_OP_ADD_ETH_ADDR; - else + ice_vc_cfg_mac = ice_vc_add_mac_addr; + } else { vc_op = VIRTCHNL_OP_DEL_ETH_ADDR; + ice_vc_cfg_mac = ice_vc_del_mac_addr; + } if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || !ice_vc_isvalid_vsi_id(vf, al->vsi_id)) { @@ -2443,14 +2564,15 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) goto handle_mac_exit; } + /* If this VF is not privileged, then we can't add more than a + * limited number of addresses. Check to make sure that the + * additions do not push us over the limit. + */ if (set && !ice_is_vf_trusted(vf) && (vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) { - dev_err(dev, + dev_err(ice_pf_to_dev(pf), "Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n", vf->vf_id); - /* There is no need to let VF know about not being trusted - * to add more MAC addr, so we can just return success message. - */ v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto handle_mac_exit; } @@ -2462,70 +2584,22 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) } for (i = 0; i < al->num_elements; i++) { - u8 *maddr = al->list[i].addr; + u8 *mac_addr = al->list[i].addr; + int result; - if (ether_addr_equal(maddr, vf->dflt_lan_addr.addr) || - is_broadcast_ether_addr(maddr)) { - if (set) { - /* VF is trying to add filters that the PF - * already added. Just continue. - */ - dev_info(dev, - "MAC %pM already set for VF %d\n", - maddr, vf->vf_id); - continue; - } else { - /* VF can't remove dflt_lan_addr/bcast MAC */ - dev_err(dev, - "VF can't remove default MAC address or MAC %pM programmed by PF for VF %d\n", - maddr, vf->vf_id); - continue; - } - } - - /* check for the invalid cases and bail if necessary */ - if (is_zero_ether_addr(maddr)) { - dev_err(dev, - "invalid MAC %pM provided for VF %d\n", - maddr, vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto handle_mac_exit; - } - - if (is_unicast_ether_addr(maddr) && - !ice_can_vf_change_mac(vf)) { - dev_err(dev, - "can't change unicast MAC for untrusted VF %d\n", - vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto handle_mac_exit; - } + if (is_broadcast_ether_addr(mac_addr) || + is_zero_ether_addr(mac_addr)) + continue; - /* program the updated filter list */ - status = ice_vsi_cfg_mac_fltr(vsi, maddr, set); - if (status == ICE_ERR_DOES_NOT_EXIST || - status == ICE_ERR_ALREADY_EXISTS) { - dev_info(dev, - "can't %s MAC filters %pM for VF %d, error %d\n", - set ? "add" : "remove", maddr, vf->vf_id, - status); - } else if (status) { - dev_err(dev, - "can't %s MAC filters for VF %d, error %d\n", - set ? "add" : "remove", vf->vf_id, status); - v_ret = ice_err_to_virt_err(status); + result = ice_vc_cfg_mac(vf, vsi, mac_addr); + if (result == -EEXIST || result == -ENOENT) { + continue; + } else if (result) { + v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; goto handle_mac_exit; } - - mac_count++; } - /* Track number of MAC filters programmed for the VF VSI */ - if (set) - vf->num_mac += mac_count; - else - vf->num_mac -= mac_count; - handle_mac_exit: /* send the response to the VF */ return ice_vc_send_msg_to_vf(vf, vc_op, v_ret, NULL, 0); @@ -2744,17 +2818,6 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) goto error_param; } - if (add_v && !ice_is_vf_trusted(vf) && - vf->num_vlan >= ICE_MAX_VLAN_PER_VF) { - dev_info(dev, - "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", - vf->vf_id); - /* There is no need to let VF know about being not trusted, - * so we can just return success message here - */ - goto error_param; - } - for (i = 0; i < vfl->num_elements; i++) { if (vfl->vlan_id[i] > ICE_MAX_VLANID) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -2771,6 +2834,17 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) goto error_param; } + if (add_v && !ice_is_vf_trusted(vf) && + vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) { + dev_info(dev, + "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", + vf->vf_id); + /* There is no need to let VF know about being not trusted, + * so we can just return success message here + */ + goto error_param; + } + if (vsi->info.pvid) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2785,7 +2859,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) u16 vid = vfl->vlan_id[i]; if (!ice_is_vf_trusted(vf) && - vf->num_vlan >= ICE_MAX_VLAN_PER_VF) { + vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) { dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", vf->vf_id); @@ -2796,12 +2870,20 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) goto error_param; } - if (ice_vsi_add_vlan(vsi, vid)) { + /* we add VLAN 0 by default for each VF so we can enable + * Tx VLAN anti-spoof without triggering MDD events so + * we don't need to add it again here + */ + if (!vid) + continue; + + status = ice_vsi_add_vlan(vsi, vid); + if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - vf->num_vlan++; + vsi->num_vlan++; /* Enable VLAN pruning when VLAN is added */ if (!vlan_promisc) { status = ice_cfg_vlan_pruning(vsi, true, false); @@ -2837,21 +2919,29 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) */ int num_vf_vlan; - num_vf_vlan = vf->num_vlan; + num_vf_vlan = vsi->num_vlan; for (i = 0; i < vfl->num_elements && i < num_vf_vlan; i++) { u16 vid = vfl->vlan_id[i]; + /* we add VLAN 0 by default for each VF so we can enable + * Tx VLAN anti-spoof without triggering MDD events so + * we don't want a VIRTCHNL request to remove it + */ + if (!vid) + continue; + /* Make sure ice_vsi_kill_vlan is successful before * updating VLAN information */ - if (ice_vsi_kill_vlan(vsi, vid)) { + status = ice_vsi_kill_vlan(vsi, vid); + if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - vf->num_vlan--; + vsi->num_vlan--; /* Disable VLAN pruning when the last VLAN is removed */ - if (!vf->num_vlan) + if (!vsi->num_vlan) ice_cfg_vlan_pruning(vsi, false, false); /* Disable Unicast/Multicast VLAN promiscuous mode */ @@ -3165,65 +3255,6 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) } /** - * ice_set_vf_spoofchk - * @netdev: network interface device structure - * @vf_id: VF identifier - * @ena: flag to enable or disable feature - * - * Enable or disable VF spoof checking - */ -int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) -{ - struct ice_pf *pf = ice_netdev_to_pf(netdev); - struct ice_vsi *vsi = pf->vsi[0]; - struct ice_vsi_ctx *ctx; - enum ice_status status; - struct device *dev; - struct ice_vf *vf; - int ret = 0; - - dev = ice_pf_to_dev(pf); - if (ice_validate_vf_id(pf, vf_id)) - return -EINVAL; - - vf = &pf->vf[vf_id]; - if (ice_check_vf_init(pf, vf)) - return -EBUSY; - - if (ena == vf->spoofchk) { - dev_dbg(dev, "VF spoofchk already %s\n", - ena ? "ON" : "OFF"); - return 0; - } - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); - - if (ena) { - ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF; - ctx->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M; - } - - status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL); - if (status) { - dev_dbg(dev, - "Error %d, failed to update VSI* parameters\n", status); - ret = -EIO; - goto out; - } - - vf->spoofchk = ena; - vsi->info.sec_flags = ctx->info.sec_flags; - vsi->info.sw_flags2 = ctx->info.sw_flags2; -out: - kfree(ctx); - return ret; -} - -/** * ice_wait_on_vf_reset * @vf: The VF being resseting * @@ -3344,28 +3375,18 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) { struct ice_pf *pf = ice_netdev_to_pf(netdev); - struct virtchnl_pf_event pfe = { 0 }; - struct ice_link_status *ls; struct ice_vf *vf; - struct ice_hw *hw; if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; vf = &pf->vf[vf_id]; - hw = &pf->hw; - ls = &pf->hw.port_info->phy.link_info; - if (ice_check_vf_init(pf, vf)) return -EBUSY; - pfe.event = VIRTCHNL_EVENT_LINK_CHANGE; - pfe.severity = PF_EVENT_SEVERITY_INFO; - switch (link_state) { case IFLA_VF_LINK_STATE_AUTO: vf->link_forced = false; - vf->link_up = ls->link_info & ICE_AQ_LINK_UP; break; case IFLA_VF_LINK_STATE_ENABLE: vf->link_forced = true; @@ -3379,15 +3400,7 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) return -EINVAL; } - if (vf->link_forced) - ice_set_pfe_link_forced(vf, &pfe, vf->link_up); - else - ice_set_pfe_link(vf, &pfe, ls->link_speed, vf->link_up); - - /* Notify the VF of its new link state */ - ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT, - VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, - sizeof(pfe), NULL); + ice_vc_notify_vf_link_state(vf); return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 88aa65d5cb31..4647d636ed36 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -40,6 +40,9 @@ #define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1) #define ICE_MAX_VF_RESET_WAIT 15 +#define ice_for_each_vf(pf, i) \ + for ((i) = 0; (i) < (pf)->num_alloc_vfs; (i)++) + /* Specific VF states */ enum ice_vf_states { ICE_VF_STATE_INIT = 0, /* PF is initializing VF */ @@ -91,7 +94,6 @@ struct ice_vf { unsigned long vf_caps; /* VF's adv. capabilities */ u8 num_req_qs; /* num of queue pairs requested by VF */ u16 num_mac; - u16 num_vlan; u16 num_vf_qs; /* num of queue configured per VF */ u16 num_qs_ena; /* total num of Tx/Rx queue enabled */ }; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index cf9b8b22d24f..149dca0012ba 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -414,7 +414,8 @@ ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid) if (vsi->type != ICE_VSI_PF) return -EINVAL; - vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq); + if (!vsi->num_xsk_umems) + vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq); if (qid >= vsi->num_xsk_umems) return -EINVAL; @@ -555,7 +556,7 @@ ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) rx_buf->handle = handle + umem->headroom; - xsk_umem_discard_addr(umem); + xsk_umem_release_addr(umem); return true; } @@ -591,7 +592,7 @@ ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) rx_buf->handle = handle + umem->headroom; - xsk_umem_discard_addr_rq(umem); + xsk_umem_release_addr_rq(umem); return true; } @@ -1019,8 +1020,8 @@ bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget) s16 ntc = xdp_ring->next_to_clean; struct ice_tx_desc *tx_desc; struct ice_tx_buf *tx_buf; - bool xmit_done = true; u32 xsk_frames = 0; + bool xmit_done; tx_desc = ICE_TX_DESC(xdp_ring, ntc); tx_buf = &xdp_ring->tx_buf[ntc]; diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index ca54e268d157..49b5fa9d4783 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -661,6 +661,7 @@ void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *); void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *); void igb_setup_tctl(struct igb_adapter *); void igb_setup_rctl(struct igb_adapter *); +void igb_setup_srrctl(struct igb_adapter *, struct igb_ring *); netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *); void igb_alloc_rx_buffers(struct igb_ring *, u16); void igb_update_stats(struct igb_adapter *); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 445fbdce3e25..f96ffa83efbe 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -396,6 +396,7 @@ static int igb_set_pauseparam(struct net_device *netdev, struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; int retval = 0; + int i; /* 100basefx does not support setting link flow control */ if (hw->dev_spec._82575.eth_flags.e100_base_fx) @@ -428,6 +429,13 @@ static int igb_set_pauseparam(struct net_device *netdev, retval = ((hw->phy.media_type == e1000_media_type_copper) ? igb_force_mac_fc(hw) : igb_setup_link(hw)); + + /* Make sure SRRCTL considers new fc settings for each ring */ + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igb_ring *ring = adapter->rx_ring[i]; + + igb_setup_srrctl(adapter, ring); + } } clear_bit(__IGB_RESETTING, &adapter->state); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 98346eb064d5..b46bff8fe056 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -146,7 +146,7 @@ static int igb_poll(struct napi_struct *, int); static bool igb_clean_tx_irq(struct igb_q_vector *, int); static int igb_clean_rx_irq(struct igb_q_vector *, int); static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); -static void igb_tx_timeout(struct net_device *); +static void igb_tx_timeout(struct net_device *, unsigned int txqueue); static void igb_reset_task(struct work_struct *); static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features); @@ -4468,6 +4468,37 @@ static inline void igb_set_vmolr(struct igb_adapter *adapter, } /** + * igb_setup_srrctl - configure the split and replication receive control + * registers + * @adapter: Board private structure + * @ring: receive ring to be configured + **/ +void igb_setup_srrctl(struct igb_adapter *adapter, struct igb_ring *ring) +{ + struct e1000_hw *hw = &adapter->hw; + int reg_idx = ring->reg_idx; + u32 srrctl = 0; + + srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; + if (ring_uses_large_buffer(ring)) + srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + else + srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; + if (hw->mac.type >= e1000_82580) + srrctl |= E1000_SRRCTL_TIMESTAMP; + /* Only set Drop Enable if VFs allocated, or we are supporting multiple + * queues and rx flow control is disabled + */ + if (adapter->vfs_allocated_count || + (!(hw->fc.current_mode & e1000_fc_rx_pause) && + adapter->num_rx_queues > 1)) + srrctl |= E1000_SRRCTL_DROP_EN; + + wr32(E1000_SRRCTL(reg_idx), srrctl); +} + +/** * igb_configure_rx_ring - Configure a receive ring after Reset * @adapter: board private structure * @ring: receive ring to be configured @@ -4481,7 +4512,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, union e1000_adv_rx_desc *rx_desc; u64 rdba = ring->dma; int reg_idx = ring->reg_idx; - u32 srrctl = 0, rxdctl = 0; + u32 rxdctl = 0; /* disable the queue */ wr32(E1000_RXDCTL(reg_idx), 0); @@ -4499,19 +4530,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, writel(0, ring->tail); /* set descriptor configuration */ - srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; - if (ring_uses_large_buffer(ring)) - srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT; - else - srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; - srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; - if (hw->mac.type >= e1000_82580) - srrctl |= E1000_SRRCTL_TIMESTAMP; - /* Only set Drop Enable if we are supporting multiple queues */ - if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1) - srrctl |= E1000_SRRCTL_DROP_EN; - - wr32(E1000_SRRCTL(reg_idx), srrctl); + igb_setup_srrctl(adapter, ring); /* set filtering for VMDQ pools */ igb_set_vmolr(adapter, reg_idx & 0x7, true); @@ -6184,7 +6203,7 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, * igb_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void igb_tx_timeout(struct net_device *netdev) +static void igb_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 6003dc3ff5fd..5b1800c3ba82 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2375,7 +2375,7 @@ static netdev_tx_t igbvf_xmit_frame(struct sk_buff *skb, * igbvf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void igbvf_tx_timeout(struct net_device *netdev) +static void igbvf_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct igbvf_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile index 88c6f88baac5..49fb1e1965cd 100644 --- a/drivers/net/ethernet/intel/igc/Makefile +++ b/drivers/net/ethernet/intel/igc/Makefile @@ -8,4 +8,4 @@ obj-$(CONFIG_IGC) += igc.o igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \ -igc_ethtool.o +igc_ethtool.o igc_ptp.o diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 0868677d43ed..52066bdbbad0 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -10,6 +10,9 @@ #include <linux/vmalloc.h> #include <linux/ethtool.h> #include <linux/sctp.h> +#include <linux/ptp_clock_kernel.h> +#include <linux/timecounter.h> +#include <linux/net_tstamp.h> #include "igc_hw.h" @@ -45,11 +48,15 @@ extern char igc_driver_version[]; #define IGC_REGS_LEN 740 #define IGC_RETA_SIZE 128 +/* flags controlling PTP/1588 function */ +#define IGC_PTP_ENABLED BIT(0) + /* Interrupt defines */ #define IGC_START_ITR 648 /* ~6000 ints/sec */ #define IGC_FLAG_HAS_MSI BIT(0) #define IGC_FLAG_QUEUE_PAIRS BIT(3) #define IGC_FLAG_DMAC BIT(4) +#define IGC_FLAG_PTP BIT(8) #define IGC_FLAG_NEED_LINK_UPDATE BIT(9) #define IGC_FLAG_MEDIA_RESET BIT(10) #define IGC_FLAG_MAS_ENABLE BIT(12) @@ -100,6 +107,20 @@ extern char igc_driver_version[]; #define AUTO_ALL_MODES 0 #define IGC_RX_HDR_LEN IGC_RXBUFFER_256 +/* Transmit and receive latency (for PTP timestamps) */ +/* FIXME: These values were estimated using the ones that i210 has as + * basis, they seem to provide good numbers with ptp4l/phc2sys, but we + * need to confirm them. + */ +#define IGC_I225_TX_LATENCY_10 9542 +#define IGC_I225_TX_LATENCY_100 1024 +#define IGC_I225_TX_LATENCY_1000 178 +#define IGC_I225_TX_LATENCY_2500 64 +#define IGC_I225_RX_LATENCY_10 20662 +#define IGC_I225_RX_LATENCY_100 2213 +#define IGC_I225_RX_LATENCY_1000 448 +#define IGC_I225_RX_LATENCY_2500 160 + /* RX and TX descriptor control thresholds. * PTHRESH - MAC will consider prefetch if it has fewer than this number of * descriptors available in its onboard memory. @@ -370,6 +391,8 @@ struct igc_adapter { struct timer_list dma_err_timer; struct timer_list phy_info_timer; + u32 wol; + u32 en_mng_pt; u16 link_speed; u16 link_duplex; @@ -430,6 +453,20 @@ struct igc_adapter { unsigned long link_check_timeout; struct igc_info ei; + + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_caps; + struct work_struct ptp_tx_work; + struct sk_buff *ptp_tx_skb; + struct hwtstamp_config tstamp_config; + unsigned long ptp_tx_start; + unsigned long last_rx_ptp_check; + unsigned long last_rx_timestamp; + unsigned int ptp_flags; + /* System time value lock */ + spinlock_t tmreg_lock; + struct cyclecounter cc; + struct timecounter tc; }; /* igc_desc_unused - calculate if we have unused descriptors */ @@ -513,6 +550,16 @@ int igc_add_filter(struct igc_adapter *adapter, int igc_erase_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input); +void igc_ptp_init(struct igc_adapter *adapter); +void igc_ptp_reset(struct igc_adapter *adapter); +void igc_ptp_stop(struct igc_adapter *adapter); +void igc_ptp_rx_rgtstamp(struct igc_q_vector *q_vector, struct sk_buff *skb); +void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, void *va, + struct sk_buff *skb); +int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); +int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); +void igc_ptp_tx_hang(struct igc_adapter *adapter); + #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring)) #define IGC_TXD_DCMD (IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS) diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index db289bcce21d..5a506440560a 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -212,6 +212,7 @@ static s32 igc_get_invariants_base(struct igc_hw *hw) case IGC_DEV_ID_I225_I: case IGC_DEV_ID_I220_V: case IGC_DEV_ID_I225_K: + case IGC_DEV_ID_I225_BLANK_NVM: mac->type = igc_i225; break; default: diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index f3788f0b95b4..58efa7a02c68 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -10,6 +10,37 @@ #define IGC_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */ +/* Definitions for power management and wakeup registers */ +/* Wake Up Control */ +#define IGC_WUC_PME_EN 0x00000002 /* PME Enable */ + +/* Wake Up Filter Control */ +#define IGC_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ +#define IGC_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ + +#define IGC_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ + +/* Wake Up Status */ +#define IGC_WUS_EX 0x00000004 /* Directed Exact */ +#define IGC_WUS_ARPD 0x00000020 /* Directed ARP Request */ +#define IGC_WUS_IPV4 0x00000040 /* Directed IPv4 */ +#define IGC_WUS_IPV6 0x00000080 /* Directed IPv6 */ +#define IGC_WUS_NSD 0x00000400 /* Directed IPv6 Neighbor Solicitation */ + +/* Packet types that are enabled for wake packet delivery */ +#define WAKE_PKT_WUS ( \ + IGC_WUS_EX | \ + IGC_WUS_ARPD | \ + IGC_WUS_IPV4 | \ + IGC_WUS_IPV6 | \ + IGC_WUS_NSD) + +/* Wake Up Packet Length */ +#define IGC_WUPL_MASK 0x00000FFF + +/* Wake Up Packet Memory stores the first 128 bytes of the wake up packet */ +#define IGC_WUPM_BYTES 128 + /* Physical Func Reset Done Indication */ #define IGC_CTRL_EXT_LINK_MODE_MASK 0x00C00000 @@ -187,6 +218,7 @@ #define IGC_ICR_RXDMT0 BIT(4) /* Rx desc min. threshold (0) */ #define IGC_ICR_RXO BIT(6) /* Rx overrun */ #define IGC_ICR_RXT0 BIT(7) /* Rx timer intr (ring 0) */ +#define IGC_ICR_TS BIT(19) /* Time Sync Interrupt */ #define IGC_ICR_DRSTA BIT(30) /* Device Reset Asserted */ /* If this bit asserted, the driver should claim the interrupt */ @@ -209,6 +241,7 @@ #define IGC_IMS_DRSTA IGC_ICR_DRSTA /* Device Reset Asserted */ #define IGC_IMS_RXT0 IGC_ICR_RXT0 /* Rx timer intr */ #define IGC_IMS_RXDMT0 IGC_ICR_RXDMT0 /* Rx desc min. threshold */ +#define IGC_IMS_TS IGC_ICR_TS /* Time Sync Interrupt */ #define IGC_QVECTOR_MASK 0x7FFC /* Q-vector mask */ #define IGC_ITR_VAL_MASK 0x04 /* ITR value mask */ @@ -249,6 +282,10 @@ #define IGC_TXD_STAT_TC 0x00000004 /* Tx Underrun */ #define IGC_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */ +/* IPSec Encrypt Enable */ +#define IGC_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ +#define IGC_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ + /* Transmit Control */ #define IGC_TCTL_EN 0x00000002 /* enable Tx */ #define IGC_TCTL_PSP 0x00000008 /* pad short packets */ @@ -281,12 +318,21 @@ #define IGC_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */ #define IGC_RCTL_BAM 0x00008000 /* broadcast enable */ +/* Split Replication Receive Control */ +#define IGC_SRRCTL_TIMESTAMP 0x40000000 +#define IGC_SRRCTL_TIMER1SEL(timer) (((timer) & 0x3) << 14) +#define IGC_SRRCTL_TIMER0SEL(timer) (((timer) & 0x3) << 17) + /* Receive Descriptor bit definitions */ #define IGC_RXD_STAT_EOP 0x02 /* End of Packet */ #define IGC_RXD_STAT_IXSM 0x04 /* Ignore checksum */ #define IGC_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ #define IGC_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ +/* Advanced Receive Descriptor bit definitions */ +#define IGC_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */ +#define IGC_RXDADV_STAT_TS 0x10000 /* Pkt was time stamped */ + #define IGC_RXDEXT_STATERR_CE 0x01000000 #define IGC_RXDEXT_STATERR_SE 0x02000000 #define IGC_RXDEXT_STATERR_SEQ 0x04000000 @@ -323,6 +369,61 @@ #define I225_RXPBSIZE_DEFAULT 0x000000A2 /* RXPBSIZE default */ #define I225_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */ +#define IGC_RXPBS_CFG_TS_EN 0x80000000 /* Timestamp in Rx buffer */ + +/* Time Sync Interrupt Causes */ +#define IGC_TSICR_SYS_WRAP BIT(0) /* SYSTIM Wrap around. */ +#define IGC_TSICR_TXTS BIT(1) /* Transmit Timestamp. */ +#define IGC_TSICR_TT0 BIT(3) /* Target Time 0 Trigger. */ +#define IGC_TSICR_TT1 BIT(4) /* Target Time 1 Trigger. */ +#define IGC_TSICR_AUTT0 BIT(5) /* Auxiliary Timestamp 0 Taken. */ +#define IGC_TSICR_AUTT1 BIT(6) /* Auxiliary Timestamp 1 Taken. */ + +#define IGC_TSICR_INTERRUPTS IGC_TSICR_TXTS + +/* PTP Queue Filter */ +#define IGC_ETQF_1588 BIT(30) + +#define IGC_FTQF_VF_BP 0x00008000 +#define IGC_FTQF_1588_TIME_STAMP 0x08000000 +#define IGC_FTQF_MASK 0xF0000000 +#define IGC_FTQF_MASK_PROTO_BP 0x10000000 + +/* Time Sync Receive Control bit definitions */ +#define IGC_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */ +#define IGC_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */ +#define IGC_TSYNCRXCTL_TYPE_L2_V2 0x00 +#define IGC_TSYNCRXCTL_TYPE_L4_V1 0x02 +#define IGC_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 +#define IGC_TSYNCRXCTL_TYPE_ALL 0x08 +#define IGC_TSYNCRXCTL_TYPE_EVENT_V2 0x0A +#define IGC_TSYNCRXCTL_ENABLED 0x00000010 /* enable Rx timestamping */ +#define IGC_TSYNCRXCTL_SYSCFI 0x00000020 /* Sys clock frequency */ +#define IGC_TSYNCRXCTL_RXSYNSIG 0x00000400 /* Sample RX tstamp in PHY sop */ + +/* Time Sync Receive Configuration */ +#define IGC_TSYNCRXCFG_PTP_V1_CTRLT_MASK 0x000000FF +#define IGC_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE 0x00 +#define IGC_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE 0x01 + +/* Immediate Interrupt Receive */ +#define IGC_IMIR_CLEAR_MASK 0xF001FFFF /* IMIR Reg Clear Mask */ +#define IGC_IMIR_PORT_BYPASS 0x20000 /* IMIR Port Bypass Bit */ +#define IGC_IMIR_PRIORITY_SHIFT 29 /* IMIR Priority Shift */ +#define IGC_IMIREXT_CLEAR_MASK 0x7FFFF /* IMIREXT Reg Clear Mask */ + +/* Immediate Interrupt Receive Extended */ +#define IGC_IMIREXT_CTRL_BP 0x00080000 /* Bypass check of ctrl bits */ +#define IGC_IMIREXT_SIZE_BP 0x00001000 /* Packet size bypass */ + +/* Time Sync Transmit Control bit definitions */ +#define IGC_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ +#define IGC_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */ +#define IGC_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK 0x0000F000 /* max delay */ +#define IGC_TSYNCTXCTL_SYNC_COMP_ERR 0x20000000 /* sync err */ +#define IGC_TSYNCTXCTL_SYNC_COMP 0x40000000 /* sync complete */ +#define IGC_TSYNCTXCTL_START_SYNC 0x80000000 /* initiate sync */ +#define IGC_TSYNCTXCTL_TXSYNSIG 0x00000020 /* Sample TX tstamp in PHY sop */ /* Receive Checksum Control */ #define IGC_RXCSUM_CRCOFL 0x00000800 /* CRC32 offload enable */ @@ -363,6 +464,7 @@ /* PHY Status Register */ #define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ #define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ +#define IGC_PHY_RST_COMP 0x0100 /* Internal PHY reset completion */ /* PHY 1000 MII Register/Bit Definitions */ /* PHY Registers defined by IEEE */ diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 455c1cdceb6e..ee07011e13e9 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1600,6 +1600,39 @@ static int igc_set_channels(struct net_device *netdev, return 0; } +static int igc_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct igc_adapter *adapter = netdev_priv(dev); + + if (adapter->ptp_clock) + info->phc_index = ptp_clock_index(adapter->ptp_clock); + else + info->phc_index = -1; + + switch (adapter->hw.mac.type) { + case igc_i225: + info->so_timestamping = + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = + BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE); + info->rx_filters |= BIT(HWTSTAMP_FILTER_ALL); + + return 0; + default: + return -EOPNOTSUPP; + } +} + static u32 igc_get_priv_flags(struct net_device *netdev) { struct igc_adapter *adapter = netdev_priv(netdev); @@ -1847,6 +1880,7 @@ static const struct ethtool_ops igc_ethtool_ops = { .get_rxfh_indir_size = igc_get_rxfh_indir_size, .get_rxfh = igc_get_rxfh, .set_rxfh = igc_set_rxfh, + .get_ts_info = igc_get_ts_info, .get_channels = igc_get_channels, .set_channels = igc_set_channels, .get_priv_flags = igc_get_priv_flags, diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index 20f710645746..90ac0e0144d8 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -21,8 +21,7 @@ #define IGC_DEV_ID_I225_I 0x15F8 #define IGC_DEV_ID_I220_V 0x15F7 #define IGC_DEV_ID_I225_K 0x3100 - -#define IGC_FUNC_0 0 +#define IGC_DEV_ID_I225_BLANK_NVM 0x15FD /* Function pointers for the MAC. */ struct igc_mac_operations { diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 9700527dd797..d9d5425fe8d9 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -8,6 +8,7 @@ #include <linux/tcp.h> #include <linux/udp.h> #include <linux/ip.h> +#include <linux/pm_runtime.h> #include <net/ipv6.h> @@ -44,31 +45,13 @@ static const struct pci_device_id igc_pci_tbl[] = { { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base }, { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base }, { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base }, + { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base }, /* required last entry */ {0, } }; MODULE_DEVICE_TABLE(pci, igc_pci_tbl); -/* forward declaration */ -static void igc_clean_tx_ring(struct igc_ring *tx_ring); -static int igc_sw_init(struct igc_adapter *); -static void igc_configure(struct igc_adapter *adapter); -static void igc_power_down_link(struct igc_adapter *adapter); -static void igc_set_default_mac_filter(struct igc_adapter *adapter); -static void igc_set_rx_mode(struct net_device *netdev); -static void igc_write_itr(struct igc_q_vector *q_vector); -static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector); -static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx); -static void igc_set_interrupt_capability(struct igc_adapter *adapter, - bool msix); -static void igc_free_q_vectors(struct igc_adapter *adapter); -static void igc_irq_disable(struct igc_adapter *adapter); -static void igc_irq_enable(struct igc_adapter *adapter); -static void igc_configure_msix(struct igc_adapter *adapter); -static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, - struct igc_rx_buffer *bi); - enum latency_range { lowest_latency = 0, low_latency = 1, @@ -76,6 +59,16 @@ enum latency_range { latency_invalid = 255 }; +/** + * igc_power_down_link - Power down the phy/serdes link + * @adapter: address of board private structure + */ +static void igc_power_down_link(struct igc_adapter *adapter) +{ + if (adapter->hw.phy.media_type == igc_media_type_copper) + igc_power_down_phy_copper_base(&adapter->hw); +} + void igc_reset(struct igc_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; @@ -110,11 +103,14 @@ void igc_reset(struct igc_adapter *adapter) if (!netif_running(adapter->netdev)) igc_power_down_link(adapter); + /* Re-enable PTP, where applicable. */ + igc_ptp_reset(adapter); + igc_get_phy_info(hw); } /** - * igc_power_up_link - Power up the phy/serdes link + * igc_power_up_link - Power up the phy link * @adapter: address of board private structure */ static void igc_power_up_link(struct igc_adapter *adapter) @@ -128,16 +124,6 @@ static void igc_power_up_link(struct igc_adapter *adapter) } /** - * igc_power_down_link - Power down the phy/serdes link - * @adapter: address of board private structure - */ -static void igc_power_down_link(struct igc_adapter *adapter) -{ - if (adapter->hw.phy.media_type == igc_media_type_copper) - igc_power_down_phy_copper_base(&adapter->hw); -} - -/** * igc_release_hw_control - release control of the h/w to f/w * @adapter: address of board private structure * @@ -176,43 +162,6 @@ static void igc_get_hw_control(struct igc_adapter *adapter) } /** - * igc_free_tx_resources - Free Tx Resources per Queue - * @tx_ring: Tx descriptor ring for a specific queue - * - * Free all transmit software resources - */ -void igc_free_tx_resources(struct igc_ring *tx_ring) -{ - igc_clean_tx_ring(tx_ring); - - vfree(tx_ring->tx_buffer_info); - tx_ring->tx_buffer_info = NULL; - - /* if not set, then don't free */ - if (!tx_ring->desc) - return; - - dma_free_coherent(tx_ring->dev, tx_ring->size, - tx_ring->desc, tx_ring->dma); - - tx_ring->desc = NULL; -} - -/** - * igc_free_all_tx_resources - Free Tx Resources for All Queues - * @adapter: board private structure - * - * Free all transmit software resources - */ -static void igc_free_all_tx_resources(struct igc_adapter *adapter) -{ - int i; - - for (i = 0; i < adapter->num_tx_queues; i++) - igc_free_tx_resources(adapter->tx_ring[i]); -} - -/** * igc_clean_tx_ring - Free Tx Buffers * @tx_ring: ring to be cleaned */ @@ -274,6 +223,43 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) } /** + * igc_free_tx_resources - Free Tx Resources per Queue + * @tx_ring: Tx descriptor ring for a specific queue + * + * Free all transmit software resources + */ +void igc_free_tx_resources(struct igc_ring *tx_ring) +{ + igc_clean_tx_ring(tx_ring); + + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + + /* if not set, then don't free */ + if (!tx_ring->desc) + return; + + dma_free_coherent(tx_ring->dev, tx_ring->size, + tx_ring->desc, tx_ring->dma); + + tx_ring->desc = NULL; +} + +/** + * igc_free_all_tx_resources - Free Tx Resources for All Queues + * @adapter: board private structure + * + * Free all transmit software resources + */ +static void igc_free_all_tx_resources(struct igc_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) + igc_free_tx_resources(adapter->tx_ring[i]); +} + +/** * igc_clean_all_tx_rings - Free Tx Buffers for all queues * @adapter: board private structure */ @@ -771,6 +757,51 @@ static void igc_setup_tctl(struct igc_adapter *adapter) } /** + * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table + * @adapter: address of board private structure + * @index: Index of the RAR entry which need to be synced with MAC table + */ +static void igc_rar_set_index(struct igc_adapter *adapter, u32 index) +{ + u8 *addr = adapter->mac_table[index].addr; + struct igc_hw *hw = &adapter->hw; + u32 rar_low, rar_high; + + /* HW expects these to be in network order when they are plugged + * into the registers which are little endian. In order to guarantee + * that ordering we need to do an leXX_to_cpup here in order to be + * ready for the byteswap that occurs with writel + */ + rar_low = le32_to_cpup((__le32 *)(addr)); + rar_high = le16_to_cpup((__le16 *)(addr + 4)); + + /* Indicate to hardware the Address is Valid. */ + if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) { + if (is_valid_ether_addr(addr)) + rar_high |= IGC_RAH_AV; + + rar_high |= IGC_RAH_POOL_1 << + adapter->mac_table[index].queue; + } + + wr32(IGC_RAL(index), rar_low); + wrfl(); + wr32(IGC_RAH(index), rar_high); + wrfl(); +} + +/* Set default MAC address for the PF in the first RAR entry */ +static void igc_set_default_mac_filter(struct igc_adapter *adapter) +{ + struct igc_mac_addr *mac_table = &adapter->mac_table[0]; + + ether_addr_copy(mac_table->addr, adapter->hw.mac.addr); + mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE; + + igc_rar_set_index(adapter, 0); +} + +/** * igc_set_mac - Change the Ethernet Address of the NIC * @netdev: network interface device structure * @p: pointer to an address structure @@ -850,7 +881,7 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, /* set bits to identify this as an advanced context descriptor */ type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT; - /* For 82575, context index must be unique per ring. */ + /* For i225, context index must be unique per ring. */ if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) mss_l4len_idx |= tx_ring->reg_idx << 4; @@ -957,6 +988,11 @@ static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) return __igc_maybe_stop_tx(tx_ring, size); } +#define IGC_SET_FLAG(_input, _flag, _result) \ + (((_flag) <= (_result)) ? \ + ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ + ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) + static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) { /* set type for advanced descriptor with frame checksum insertion */ @@ -964,6 +1000,14 @@ static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) IGC_ADVTXD_DCMD_DEXT | IGC_ADVTXD_DCMD_IFCS; + /* set segmentation bits for TSO */ + cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO, + (IGC_ADVTXD_DCMD_TSE)); + + /* set timestamp bit if present */ + cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP, + (IGC_ADVTXD_MAC_TSTAMP)); + return cmd_type; } @@ -1131,6 +1175,100 @@ dma_error: return -1; } +static int igc_tso(struct igc_ring *tx_ring, + struct igc_tx_buffer *first, + u8 *hdr_len) +{ + u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; + struct sk_buff *skb = first->skb; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + u32 paylen, l4_offset; + int err; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (!skb_is_gso(skb)) + return 0; + + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_checksum_start(skb); + + /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ + type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + unsigned char *csum_start = skb_checksum_start(skb); + unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); + + /* IP header will have to cancel out any data that + * is not a part of the outer IP header + */ + ip.v4->check = csum_fold(csum_partial(trans_start, + csum_start - trans_start, + 0)); + type_tucmd |= IGC_ADVTXD_TUCMD_IPV4; + + ip.v4->tot_len = 0; + first->tx_flags |= IGC_TX_FLAGS_TSO | + IGC_TX_FLAGS_CSUM | + IGC_TX_FLAGS_IPV4; + } else { + ip.v6->payload_len = 0; + first->tx_flags |= IGC_TX_FLAGS_TSO | + IGC_TX_FLAGS_CSUM; + } + + /* determine offset of inner transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from inner checksum */ + paylen = skb->len - l4_offset; + if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) { + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; + csum_replace_by_diff(&l4.tcp->check, + (__force __wsum)htonl(paylen)); + } else { + /* compute length of segmentation header */ + *hdr_len = sizeof(*l4.udp) + l4_offset; + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); + } + + /* update gso size and bytecount with header size */ + first->gso_segs = skb_shinfo(skb)->gso_segs; + first->bytecount += (first->gso_segs - 1) * *hdr_len; + + /* MSS L4LEN IDX */ + mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT; + mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT; + + /* VLAN MACLEN IPLEN */ + vlan_macip_lens = l4.hdr - ip.hdr; + vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; + + igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, + type_tucmd, mss_l4len_idx); + + return 1; +} + static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, struct igc_ring *tx_ring) { @@ -1140,6 +1278,7 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, u32 tx_flags = 0; unsigned short f; u8 hdr_len = 0; + int tso = 0; /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD, * + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD, @@ -1162,15 +1301,45 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, first->bytecount = skb->len; first->gso_segs = 1; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); + + /* FIXME: add support for retrieving timestamps from + * the other timer registers before skipping the + * timestamping request. + */ + if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && + !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS, + &adapter->state)) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + tx_flags |= IGC_TX_FLAGS_TSTAMP; + + adapter->ptp_tx_skb = skb_get(skb); + adapter->ptp_tx_start = jiffies; + } else { + adapter->tx_hwtstamp_skipped++; + } + } + /* record initial flags and protocol */ first->tx_flags = tx_flags; first->protocol = protocol; - igc_tx_csum(tx_ring, first); + tso = igc_tso(tx_ring, first, &hdr_len); + if (tso < 0) + goto out_drop; + else if (!tso) + igc_tx_csum(tx_ring, first); igc_tx_map(tx_ring, first, hdr_len); return NETDEV_TX_OK; + +out_drop: + dev_kfree_skb_any(first->skb); + first->skb = NULL; + + return NETDEV_TX_OK; } static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter, @@ -1269,6 +1438,10 @@ static void igc_process_skb_fields(struct igc_ring *rx_ring, igc_rx_checksum(rx_ring, rx_desc, skb); + if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TS) && + !igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) + igc_ptp_rx_rgtstamp(rx_ring->q_vector, skb); + skb_record_rx_queue(skb, rx_ring->queue_index); skb->protocol = eth_type_trans(skb, rx_ring->netdev); @@ -1388,6 +1561,12 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, if (unlikely(!skb)) return NULL; + if (unlikely(igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP))) { + igc_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); + va += IGC_TS_HDR_LEN; + size -= IGC_TS_HDR_LEN; + } + /* Determine available headroom for copy */ headlen = size; if (headlen > IGC_RX_HDR_LEN) @@ -1485,7 +1664,6 @@ static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer) * igc_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed * @rx_desc: Rx descriptor for current buffer - * @skb: current socket buffer containing buffer in progress * * This function updates next to clean. If the buffer is an EOP buffer * this function exits returning false, otherwise it will place the @@ -1565,9 +1743,56 @@ static void igc_put_rx_buffer(struct igc_ring *rx_ring, rx_buffer->page = NULL; } +static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0; +} + +static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, + struct igc_rx_buffer *bi) +{ + struct page *page = bi->page; + dma_addr_t dma; + + /* since we are recycling buffers we should seldom need to alloc */ + if (likely(page)) + return true; + + /* alloc new page for storage */ + page = dev_alloc_pages(igc_rx_pg_order(rx_ring)); + if (unlikely(!page)) { + rx_ring->rx_stats.alloc_failed++; + return false; + } + + /* map page for use */ + dma = dma_map_page_attrs(rx_ring->dev, page, 0, + igc_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IGC_RX_DMA_ATTR); + + /* if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + __free_page(page); + + rx_ring->rx_stats.alloc_failed++; + return false; + } + + bi->dma = dma; + bi->page = page; + bi->page_offset = igc_rx_offset(rx_ring); + bi->pagecnt_bias = 1; + + return true; +} + /** * igc_alloc_rx_buffers - Replace used receive buffers; packet split - * @adapter: address of board private structure + * @rx_ring: rx descriptor ring + * @cleaned_count: number of buffers to clean */ static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count) { @@ -1725,52 +1950,6 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) return total_packets; } -static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) -{ - return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0; -} - -static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, - struct igc_rx_buffer *bi) -{ - struct page *page = bi->page; - dma_addr_t dma; - - /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) - return true; - - /* alloc new page for storage */ - page = dev_alloc_pages(igc_rx_pg_order(rx_ring)); - if (unlikely(!page)) { - rx_ring->rx_stats.alloc_failed++; - return false; - } - - /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, - igc_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IGC_RX_DMA_ATTR); - - /* if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ - if (dma_mapping_error(rx_ring->dev, dma)) { - __free_page(page); - - rx_ring->rx_stats.alloc_failed++; - return false; - } - - bi->dma = dma; - bi->page = page; - bi->page_offset = igc_rx_offset(rx_ring); - bi->pagecnt_bias = 1; - - return true; -} - /** * igc_clean_tx_irq - Reclaim resources after transmit completes * @q_vector: pointer to q_vector containing needed info @@ -1942,6 +2121,1128 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) return !!budget; } +static void igc_nfc_filter_restore(struct igc_adapter *adapter) +{ + struct igc_nfc_filter *rule; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) + igc_add_filter(adapter, rule); + + spin_unlock(&adapter->nfc_lock); +} + +/* If the filter to be added and an already existing filter express + * the same address and address type, it should be possible to only + * override the other configurations, for example the queue to steer + * traffic. + */ +static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry, + const u8 *addr, const u8 flags) +{ + if (!(entry->state & IGC_MAC_STATE_IN_USE)) + return true; + + if ((entry->state & IGC_MAC_STATE_SRC_ADDR) != + (flags & IGC_MAC_STATE_SRC_ADDR)) + return false; + + if (!ether_addr_equal(addr, entry->addr)) + return false; + + return true; +} + +/* Add a MAC filter for 'addr' directing matching traffic to 'queue', + * 'flags' is used to indicate what kind of match is made, match is by + * default for the destination address, if matching by source address + * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used. + */ +static int igc_add_mac_filter(struct igc_adapter *adapter, + const u8 *addr, const u8 queue) +{ + struct igc_hw *hw = &adapter->hw; + int rar_entries = hw->mac.rar_entry_count; + int i; + + if (is_zero_ether_addr(addr)) + return -EINVAL; + + /* Search for the first empty entry in the MAC table. + * Do not touch entries at the end of the table reserved for the VF MAC + * addresses. + */ + for (i = 0; i < rar_entries; i++) { + if (!igc_mac_entry_can_be_used(&adapter->mac_table[i], + addr, 0)) + continue; + + ether_addr_copy(adapter->mac_table[i].addr, addr); + adapter->mac_table[i].queue = queue; + adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE; + + igc_rar_set_index(adapter, i); + return i; + } + + return -ENOSPC; +} + +/* Remove a MAC filter for 'addr' directing matching traffic to + * 'queue', 'flags' is used to indicate what kind of match need to be + * removed, match is by default for the destination address, if + * matching by source address is to be removed the flag + * IGC_MAC_STATE_SRC_ADDR can be used. + */ +static int igc_del_mac_filter(struct igc_adapter *adapter, + const u8 *addr, const u8 queue) +{ + struct igc_hw *hw = &adapter->hw; + int rar_entries = hw->mac.rar_entry_count; + int i; + + if (is_zero_ether_addr(addr)) + return -EINVAL; + + /* Search for matching entry in the MAC table based on given address + * and queue. Do not touch entries at the end of the table reserved + * for the VF MAC addresses. + */ + for (i = 0; i < rar_entries; i++) { + if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE)) + continue; + if (adapter->mac_table[i].state != 0) + continue; + if (adapter->mac_table[i].queue != queue) + continue; + if (!ether_addr_equal(adapter->mac_table[i].addr, addr)) + continue; + + /* When a filter for the default address is "deleted", + * we return it to its initial configuration + */ + if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) { + adapter->mac_table[i].state = + IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE; + adapter->mac_table[i].queue = 0; + } else { + adapter->mac_table[i].state = 0; + adapter->mac_table[i].queue = 0; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + } + + igc_rar_set_index(adapter, i); + return 0; + } + + return -ENOENT; +} + +static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + int ret; + + ret = igc_add_mac_filter(adapter, addr, adapter->num_rx_queues); + + return min_t(int, ret, 0); +} + +static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + igc_del_mac_filter(adapter, addr, adapter->num_rx_queues); + + return 0; +} + +/** + * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set + * @netdev: network interface device structure + * + * The set_rx_mode entry point is called whenever the unicast or multicast + * address lists or the network interface flags are updated. This routine is + * responsible for configuring the hardware for proper unicast, multicast, + * promiscuous mode, and all-multi behavior. + */ +static void igc_set_rx_mode(struct net_device *netdev) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE; + int count; + + /* Check for Promiscuous and All Multicast modes */ + if (netdev->flags & IFF_PROMISC) { + rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE; + } else { + if (netdev->flags & IFF_ALLMULTI) { + rctl |= IGC_RCTL_MPE; + } else { + /* Write addresses to the MTA, if the attempt fails + * then we should just turn on promiscuous mode so + * that we can at least receive multicast traffic + */ + count = igc_write_mc_addr_list(netdev); + if (count < 0) + rctl |= IGC_RCTL_MPE; + } + } + + /* Write addresses to available RAR registers, if there is not + * sufficient space to store all the addresses then enable + * unicast promiscuous mode + */ + if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync)) + rctl |= IGC_RCTL_UPE; + + /* update state of unicast and multicast */ + rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE); + wr32(IGC_RCTL, rctl); + +#if (PAGE_SIZE < 8192) + if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB) + rlpml = IGC_MAX_FRAME_BUILD_SKB; +#endif + wr32(IGC_RLPML, rlpml); +} + +/** + * igc_configure - configure the hardware for RX and TX + * @adapter: private board structure + */ +static void igc_configure(struct igc_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int i = 0; + + igc_get_hw_control(adapter); + igc_set_rx_mode(netdev); + + igc_setup_tctl(adapter); + igc_setup_mrqc(adapter); + igc_setup_rctl(adapter); + + igc_nfc_filter_restore(adapter); + igc_configure_tx(adapter); + igc_configure_rx(adapter); + + igc_rx_fifo_flush_base(&adapter->hw); + + /* call igc_desc_unused which always leaves + * at least 1 descriptor unused to make sure + * next_to_use != next_to_clean + */ + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igc_ring *ring = adapter->rx_ring[i]; + + igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); + } +} + +/** + * igc_write_ivar - configure ivar for given MSI-X vector + * @hw: pointer to the HW structure + * @msix_vector: vector number we are allocating to a given ring + * @index: row index of IVAR register to write within IVAR table + * @offset: column offset of in IVAR, should be multiple of 8 + * + * The IVAR table consists of 2 columns, + * each containing an cause allocation for an Rx and Tx ring, and a + * variable number of rows depending on the number of queues supported. + */ +static void igc_write_ivar(struct igc_hw *hw, int msix_vector, + int index, int offset) +{ + u32 ivar = array_rd32(IGC_IVAR0, index); + + /* clear any bits that are currently set */ + ivar &= ~((u32)0xFF << offset); + + /* write vector and valid bit */ + ivar |= (msix_vector | IGC_IVAR_VALID) << offset; + + array_wr32(IGC_IVAR0, index, ivar); +} + +static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector) +{ + struct igc_adapter *adapter = q_vector->adapter; + struct igc_hw *hw = &adapter->hw; + int rx_queue = IGC_N0_QUEUE; + int tx_queue = IGC_N0_QUEUE; + + if (q_vector->rx.ring) + rx_queue = q_vector->rx.ring->reg_idx; + if (q_vector->tx.ring) + tx_queue = q_vector->tx.ring->reg_idx; + + switch (hw->mac.type) { + case igc_i225: + if (rx_queue > IGC_N0_QUEUE) + igc_write_ivar(hw, msix_vector, + rx_queue >> 1, + (rx_queue & 0x1) << 4); + if (tx_queue > IGC_N0_QUEUE) + igc_write_ivar(hw, msix_vector, + tx_queue >> 1, + ((tx_queue & 0x1) << 4) + 8); + q_vector->eims_value = BIT(msix_vector); + break; + default: + WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n"); + break; + } + + /* add q_vector eims value to global eims_enable_mask */ + adapter->eims_enable_mask |= q_vector->eims_value; + + /* configure q_vector to set itr on first interrupt */ + q_vector->set_itr = 1; +} + +/** + * igc_configure_msix - Configure MSI-X hardware + * @adapter: Pointer to adapter structure + * + * igc_configure_msix sets up the hardware to properly + * generate MSI-X interrupts. + */ +static void igc_configure_msix(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + int i, vector = 0; + u32 tmp; + + adapter->eims_enable_mask = 0; + + /* set vector for other causes, i.e. link changes */ + switch (hw->mac.type) { + case igc_i225: + /* Turn on MSI-X capability first, or our settings + * won't stick. And it will take days to debug. + */ + wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE | + IGC_GPIE_PBA | IGC_GPIE_EIAME | + IGC_GPIE_NSICR); + + /* enable msix_other interrupt */ + adapter->eims_other = BIT(vector); + tmp = (vector++ | IGC_IVAR_VALID) << 8; + + wr32(IGC_IVAR_MISC, tmp); + break; + default: + /* do nothing, since nothing else supports MSI-X */ + break; + } /* switch (hw->mac.type) */ + + adapter->eims_enable_mask |= adapter->eims_other; + + for (i = 0; i < adapter->num_q_vectors; i++) + igc_assign_vector(adapter->q_vector[i], vector++); + + wrfl(); +} + +/** + * igc_irq_enable - Enable default interrupt generation settings + * @adapter: board private structure + */ +static void igc_irq_enable(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + if (adapter->msix_entries) { + u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA; + u32 regval = rd32(IGC_EIAC); + + wr32(IGC_EIAC, regval | adapter->eims_enable_mask); + regval = rd32(IGC_EIAM); + wr32(IGC_EIAM, regval | adapter->eims_enable_mask); + wr32(IGC_EIMS, adapter->eims_enable_mask); + wr32(IGC_IMS, ims); + } else { + wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA); + wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA); + } +} + +/** + * igc_irq_disable - Mask off interrupt generation on the NIC + * @adapter: board private structure + */ +static void igc_irq_disable(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + if (adapter->msix_entries) { + u32 regval = rd32(IGC_EIAM); + + wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask); + wr32(IGC_EIMC, adapter->eims_enable_mask); + regval = rd32(IGC_EIAC); + wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask); + } + + wr32(IGC_IAM, 0); + wr32(IGC_IMC, ~0); + wrfl(); + + if (adapter->msix_entries) { + int vector = 0, i; + + synchronize_irq(adapter->msix_entries[vector++].vector); + + for (i = 0; i < adapter->num_q_vectors; i++) + synchronize_irq(adapter->msix_entries[vector++].vector); + } else { + synchronize_irq(adapter->pdev->irq); + } +} + +void igc_set_flag_queue_pairs(struct igc_adapter *adapter, + const u32 max_rss_queues) +{ + /* Determine if we need to pair queues. */ + /* If rss_queues > half of max_rss_queues, pair the queues in + * order to conserve interrupts due to limited supply. + */ + if (adapter->rss_queues > (max_rss_queues / 2)) + adapter->flags |= IGC_FLAG_QUEUE_PAIRS; + else + adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS; +} + +unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) +{ + unsigned int max_rss_queues; + + /* Determine the maximum number of RSS queues supported. */ + max_rss_queues = IGC_MAX_RX_QUEUES; + + return max_rss_queues; +} + +static void igc_init_queue_configuration(struct igc_adapter *adapter) +{ + u32 max_rss_queues; + + max_rss_queues = igc_get_max_rss_queues(adapter); + adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); + + igc_set_flag_queue_pairs(adapter, max_rss_queues); +} + +/** + * igc_reset_q_vector - Reset config for interrupt vector + * @adapter: board private structure to initialize + * @v_idx: Index of vector to be reset + * + * If NAPI is enabled it will delete any references to the + * NAPI struct. This is preparation for igc_free_q_vector. + */ +static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx) +{ + struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; + + /* if we're coming from igc_set_interrupt_capability, the vectors are + * not yet allocated + */ + if (!q_vector) + return; + + if (q_vector->tx.ring) + adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; + + if (q_vector->rx.ring) + adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; + + netif_napi_del(&q_vector->napi); +} + +/** + * igc_free_q_vector - Free memory allocated for specific interrupt vector + * @adapter: board private structure to initialize + * @v_idx: Index of vector to be freed + * + * This function frees the memory allocated to the q_vector. + */ +static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx) +{ + struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; + + adapter->q_vector[v_idx] = NULL; + + /* igc_get_stats64() might access the rings on this vector, + * we must wait a grace period before freeing it. + */ + if (q_vector) + kfree_rcu(q_vector, rcu); +} + +/** + * igc_free_q_vectors - Free memory allocated for interrupt vectors + * @adapter: board private structure to initialize + * + * This function frees the memory allocated to the q_vectors. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. + */ +static void igc_free_q_vectors(struct igc_adapter *adapter) +{ + int v_idx = adapter->num_q_vectors; + + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + adapter->num_q_vectors = 0; + + while (v_idx--) { + igc_reset_q_vector(adapter, v_idx); + igc_free_q_vector(adapter, v_idx); + } +} + +/** + * igc_update_itr - update the dynamic ITR value based on statistics + * @q_vector: pointer to q_vector + * @ring_container: ring info to update the itr for + * + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time + * while increasing bulk throughput. + * NOTE: These calculations are only valid when operating in a single- + * queue environment. + */ +static void igc_update_itr(struct igc_q_vector *q_vector, + struct igc_ring_container *ring_container) +{ + unsigned int packets = ring_container->total_packets; + unsigned int bytes = ring_container->total_bytes; + u8 itrval = ring_container->itr; + + /* no packets, exit with status unchanged */ + if (packets == 0) + return; + + switch (itrval) { + case lowest_latency: + /* handle TSO and jumbo frames */ + if (bytes / packets > 8000) + itrval = bulk_latency; + else if ((packets < 5) && (bytes > 512)) + itrval = low_latency; + break; + case low_latency: /* 50 usec aka 20000 ints/s */ + if (bytes > 10000) { + /* this if handles the TSO accounting */ + if (bytes / packets > 8000) + itrval = bulk_latency; + else if ((packets < 10) || ((bytes / packets) > 1200)) + itrval = bulk_latency; + else if ((packets > 35)) + itrval = lowest_latency; + } else if (bytes / packets > 2000) { + itrval = bulk_latency; + } else if (packets <= 2 && bytes < 512) { + itrval = lowest_latency; + } + break; + case bulk_latency: /* 250 usec aka 4000 ints/s */ + if (bytes > 25000) { + if (packets > 35) + itrval = low_latency; + } else if (bytes < 1500) { + itrval = low_latency; + } + break; + } + + /* clear work counters since we have the values we need */ + ring_container->total_bytes = 0; + ring_container->total_packets = 0; + + /* write updated itr to ring container */ + ring_container->itr = itrval; +} + +static void igc_set_itr(struct igc_q_vector *q_vector) +{ + struct igc_adapter *adapter = q_vector->adapter; + u32 new_itr = q_vector->itr_val; + u8 current_itr = 0; + + /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ + switch (adapter->link_speed) { + case SPEED_10: + case SPEED_100: + current_itr = 0; + new_itr = IGC_4K_ITR; + goto set_itr_now; + default: + break; + } + + igc_update_itr(q_vector, &q_vector->tx); + igc_update_itr(q_vector, &q_vector->rx); + + current_itr = max(q_vector->rx.itr, q_vector->tx.itr); + + /* conservative mode (itr 3) eliminates the lowest_latency setting */ + if (current_itr == lowest_latency && + ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || + (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) + current_itr = low_latency; + + switch (current_itr) { + /* counts and packets in update_itr are dependent on these numbers */ + case lowest_latency: + new_itr = IGC_70K_ITR; /* 70,000 ints/sec */ + break; + case low_latency: + new_itr = IGC_20K_ITR; /* 20,000 ints/sec */ + break; + case bulk_latency: + new_itr = IGC_4K_ITR; /* 4,000 ints/sec */ + break; + default: + break; + } + +set_itr_now: + if (new_itr != q_vector->itr_val) { + /* this attempts to bias the interrupt rate towards Bulk + * by adding intermediate steps when interrupt rate is + * increasing + */ + new_itr = new_itr > q_vector->itr_val ? + max((new_itr * q_vector->itr_val) / + (new_itr + (q_vector->itr_val >> 2)), + new_itr) : new_itr; + /* Don't write the value here; it resets the adapter's + * internal timer, and causes us to delay far longer than + * we should between interrupts. Instead, we write the ITR + * value at the beginning of the next interrupt so the timing + * ends up being correct. + */ + q_vector->itr_val = new_itr; + q_vector->set_itr = 1; + } +} + +static void igc_reset_interrupt_capability(struct igc_adapter *adapter) +{ + int v_idx = adapter->num_q_vectors; + + if (adapter->msix_entries) { + pci_disable_msix(adapter->pdev); + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; + } else if (adapter->flags & IGC_FLAG_HAS_MSI) { + pci_disable_msi(adapter->pdev); + } + + while (v_idx--) + igc_reset_q_vector(adapter, v_idx); +} + +/** + * igc_set_interrupt_capability - set MSI or MSI-X if supported + * @adapter: Pointer to adapter structure + * @msix: boolean value for MSI-X capability + * + * Attempt to configure interrupts using the best available + * capabilities of the hardware and kernel. + */ +static void igc_set_interrupt_capability(struct igc_adapter *adapter, + bool msix) +{ + int numvecs, i; + int err; + + if (!msix) + goto msi_only; + adapter->flags |= IGC_FLAG_HAS_MSIX; + + /* Number of supported queues. */ + adapter->num_rx_queues = adapter->rss_queues; + + adapter->num_tx_queues = adapter->rss_queues; + + /* start with one vector for every Rx queue */ + numvecs = adapter->num_rx_queues; + + /* if Tx handler is separate add 1 for every Tx queue */ + if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) + numvecs += adapter->num_tx_queues; + + /* store the number of vectors reserved for queues */ + adapter->num_q_vectors = numvecs; + + /* add 1 vector for link status interrupts */ + numvecs++; + + adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), + GFP_KERNEL); + + if (!adapter->msix_entries) + return; + + /* populate entry values */ + for (i = 0; i < numvecs; i++) + adapter->msix_entries[i].entry = i; + + err = pci_enable_msix_range(adapter->pdev, + adapter->msix_entries, + numvecs, + numvecs); + if (err > 0) + return; + + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; + + igc_reset_interrupt_capability(adapter); + +msi_only: + adapter->flags &= ~IGC_FLAG_HAS_MSIX; + + adapter->rss_queues = 1; + adapter->flags |= IGC_FLAG_QUEUE_PAIRS; + adapter->num_rx_queues = 1; + adapter->num_tx_queues = 1; + adapter->num_q_vectors = 1; + if (!pci_enable_msi(adapter->pdev)) + adapter->flags |= IGC_FLAG_HAS_MSI; +} + +/** + * igc_update_ring_itr - update the dynamic ITR value based on packet size + * @q_vector: pointer to q_vector + * + * Stores a new ITR value based on strictly on packet size. This + * algorithm is less sophisticated than that used in igc_update_itr, + * due to the difficulty of synchronizing statistics across multiple + * receive rings. The divisors and thresholds used by this function + * were determined based on theoretical maximum wire speed and testing + * data, in order to minimize response time while increasing bulk + * throughput. + * NOTE: This function is called only when operating in a multiqueue + * receive environment. + */ +static void igc_update_ring_itr(struct igc_q_vector *q_vector) +{ + struct igc_adapter *adapter = q_vector->adapter; + int new_val = q_vector->itr_val; + int avg_wire_size = 0; + unsigned int packets; + + /* For non-gigabit speeds, just fix the interrupt rate at 4000 + * ints/sec - ITR timer value of 120 ticks. + */ + switch (adapter->link_speed) { + case SPEED_10: + case SPEED_100: + new_val = IGC_4K_ITR; + goto set_itr_val; + default: + break; + } + + packets = q_vector->rx.total_packets; + if (packets) + avg_wire_size = q_vector->rx.total_bytes / packets; + + packets = q_vector->tx.total_packets; + if (packets) + avg_wire_size = max_t(u32, avg_wire_size, + q_vector->tx.total_bytes / packets); + + /* if avg_wire_size isn't set no work was done */ + if (!avg_wire_size) + goto clear_counts; + + /* Add 24 bytes to size to account for CRC, preamble, and gap */ + avg_wire_size += 24; + + /* Don't starve jumbo frames */ + avg_wire_size = min(avg_wire_size, 3000); + + /* Give a little boost to mid-size frames */ + if (avg_wire_size > 300 && avg_wire_size < 1200) + new_val = avg_wire_size / 3; + else + new_val = avg_wire_size / 2; + + /* conservative mode (itr 3) eliminates the lowest_latency setting */ + if (new_val < IGC_20K_ITR && + ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || + (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) + new_val = IGC_20K_ITR; + +set_itr_val: + if (new_val != q_vector->itr_val) { + q_vector->itr_val = new_val; + q_vector->set_itr = 1; + } +clear_counts: + q_vector->rx.total_bytes = 0; + q_vector->rx.total_packets = 0; + q_vector->tx.total_bytes = 0; + q_vector->tx.total_packets = 0; +} + +static void igc_ring_irq_enable(struct igc_q_vector *q_vector) +{ + struct igc_adapter *adapter = q_vector->adapter; + struct igc_hw *hw = &adapter->hw; + + if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || + (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { + if (adapter->num_q_vectors == 1) + igc_set_itr(q_vector); + else + igc_update_ring_itr(q_vector); + } + + if (!test_bit(__IGC_DOWN, &adapter->state)) { + if (adapter->msix_entries) + wr32(IGC_EIMS, q_vector->eims_value); + else + igc_irq_enable(adapter); + } +} + +static void igc_add_ring(struct igc_ring *ring, + struct igc_ring_container *head) +{ + head->ring = ring; + head->count++; +} + +/** + * igc_cache_ring_register - Descriptor ring to register mapping + * @adapter: board private structure to initialize + * + * Once we know the feature-set enabled for the device, we'll cache + * the register offset the descriptor ring is assigned to. + */ +static void igc_cache_ring_register(struct igc_adapter *adapter) +{ + int i = 0, j = 0; + + switch (adapter->hw.mac.type) { + case igc_i225: + /* Fall through */ + default: + for (; i < adapter->num_rx_queues; i++) + adapter->rx_ring[i]->reg_idx = i; + for (; j < adapter->num_tx_queues; j++) + adapter->tx_ring[j]->reg_idx = j; + break; + } +} + +/** + * igc_poll - NAPI Rx polling callback + * @napi: napi polling structure + * @budget: count of how many packets we should handle + */ +static int igc_poll(struct napi_struct *napi, int budget) +{ + struct igc_q_vector *q_vector = container_of(napi, + struct igc_q_vector, + napi); + bool clean_complete = true; + int work_done = 0; + + if (q_vector->tx.ring) + clean_complete = igc_clean_tx_irq(q_vector, budget); + + if (q_vector->rx.ring) { + int cleaned = igc_clean_rx_irq(q_vector, budget); + + work_done += cleaned; + if (cleaned >= budget) + clean_complete = false; + } + + /* If all work not completed, return budget and keep polling */ + if (!clean_complete) + return budget; + + /* Exit the polling mode, but don't re-enable interrupts if stack might + * poll us due to busy-polling + */ + if (likely(napi_complete_done(napi, work_done))) + igc_ring_irq_enable(q_vector); + + return min(work_done, budget - 1); +} + +/** + * igc_alloc_q_vector - Allocate memory for a single interrupt vector + * @adapter: board private structure to initialize + * @v_count: q_vectors allocated on adapter, used for ring interleaving + * @v_idx: index of vector in adapter struct + * @txr_count: total number of Tx rings to allocate + * @txr_idx: index of first Tx ring to allocate + * @rxr_count: total number of Rx rings to allocate + * @rxr_idx: index of first Rx ring to allocate + * + * We allocate one q_vector. If allocation fails we return -ENOMEM. + */ +static int igc_alloc_q_vector(struct igc_adapter *adapter, + unsigned int v_count, unsigned int v_idx, + unsigned int txr_count, unsigned int txr_idx, + unsigned int rxr_count, unsigned int rxr_idx) +{ + struct igc_q_vector *q_vector; + struct igc_ring *ring; + int ring_count; + + /* igc only supports 1 Tx and/or 1 Rx queue per vector */ + if (txr_count > 1 || rxr_count > 1) + return -ENOMEM; + + ring_count = txr_count + rxr_count; + + /* allocate q_vector and rings */ + q_vector = adapter->q_vector[v_idx]; + if (!q_vector) + q_vector = kzalloc(struct_size(q_vector, ring, ring_count), + GFP_KERNEL); + else + memset(q_vector, 0, struct_size(q_vector, ring, ring_count)); + if (!q_vector) + return -ENOMEM; + + /* initialize NAPI */ + netif_napi_add(adapter->netdev, &q_vector->napi, + igc_poll, 64); + + /* tie q_vector and adapter together */ + adapter->q_vector[v_idx] = q_vector; + q_vector->adapter = adapter; + + /* initialize work limits */ + q_vector->tx.work_limit = adapter->tx_work_limit; + + /* initialize ITR configuration */ + q_vector->itr_register = adapter->io_addr + IGC_EITR(0); + q_vector->itr_val = IGC_START_ITR; + + /* initialize pointer to rings */ + ring = q_vector->ring; + + /* initialize ITR */ + if (rxr_count) { + /* rx or rx/tx vector */ + if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) + q_vector->itr_val = adapter->rx_itr_setting; + } else { + /* tx only vector */ + if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) + q_vector->itr_val = adapter->tx_itr_setting; + } + + if (txr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Tx values */ + igc_add_ring(ring, &q_vector->tx); + + /* apply Tx specific ring traits */ + ring->count = adapter->tx_ring_count; + ring->queue_index = txr_idx; + + /* assign ring to adapter */ + adapter->tx_ring[txr_idx] = ring; + + /* push pointer to next ring */ + ring++; + } + + if (rxr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Rx values */ + igc_add_ring(ring, &q_vector->rx); + + /* apply Rx specific ring traits */ + ring->count = adapter->rx_ring_count; + ring->queue_index = rxr_idx; + + /* assign ring to adapter */ + adapter->rx_ring[rxr_idx] = ring; + } + + return 0; +} + +/** + * igc_alloc_q_vectors - Allocate memory for interrupt vectors + * @adapter: board private structure to initialize + * + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. + */ +static int igc_alloc_q_vectors(struct igc_adapter *adapter) +{ + int rxr_remaining = adapter->num_rx_queues; + int txr_remaining = adapter->num_tx_queues; + int rxr_idx = 0, txr_idx = 0, v_idx = 0; + int q_vectors = adapter->num_q_vectors; + int err; + + if (q_vectors >= (rxr_remaining + txr_remaining)) { + for (; rxr_remaining; v_idx++) { + err = igc_alloc_q_vector(adapter, q_vectors, v_idx, + 0, 0, 1, rxr_idx); + + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining--; + rxr_idx++; + } + } + + for (; v_idx < q_vectors; v_idx++) { + int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); + int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); + + err = igc_alloc_q_vector(adapter, q_vectors, v_idx, + tqpv, txr_idx, rqpv, rxr_idx); + + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining -= rqpv; + txr_remaining -= tqpv; + rxr_idx++; + txr_idx++; + } + + return 0; + +err_out: + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + adapter->num_q_vectors = 0; + + while (v_idx--) + igc_free_q_vector(adapter, v_idx); + + return -ENOMEM; +} + +/** + * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors + * @adapter: Pointer to adapter structure + * @msix: boolean for MSI-X capability + * + * This function initializes the interrupts and allocates all of the queues. + */ +static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) +{ + struct pci_dev *pdev = adapter->pdev; + int err = 0; + + igc_set_interrupt_capability(adapter, msix); + + err = igc_alloc_q_vectors(adapter); + if (err) { + dev_err(&pdev->dev, "Unable to allocate memory for vectors\n"); + goto err_alloc_q_vectors; + } + + igc_cache_ring_register(adapter); + + return 0; + +err_alloc_q_vectors: + igc_reset_interrupt_capability(adapter); + return err; +} + +/** + * igc_sw_init - Initialize general software structures (struct igc_adapter) + * @adapter: board private structure to initialize + * + * igc_sw_init initializes the Adapter private data structure. + * Fields are initialized based on PCI device information and + * OS network device settings (MTU size). + */ +static int igc_sw_init(struct igc_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + struct igc_hw *hw = &adapter->hw; + + int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count; + + pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); + + /* set default ring sizes */ + adapter->tx_ring_count = IGC_DEFAULT_TXD; + adapter->rx_ring_count = IGC_DEFAULT_RXD; + + /* set default ITR values */ + adapter->rx_itr_setting = IGC_DEFAULT_ITR; + adapter->tx_itr_setting = IGC_DEFAULT_ITR; + + /* set default work limits */ + adapter->tx_work_limit = IGC_DEFAULT_TX_WORK; + + /* adjust max frame to be at least the size of a standard frame */ + adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + + VLAN_HLEN; + adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; + + spin_lock_init(&adapter->nfc_lock); + spin_lock_init(&adapter->stats64_lock); + /* Assume MSI-X interrupts, will be checked during IRQ allocation */ + adapter->flags |= IGC_FLAG_HAS_MSIX; + + adapter->mac_table = kzalloc(size, GFP_ATOMIC); + if (!adapter->mac_table) + return -ENOMEM; + + igc_init_queue_configuration(adapter); + + /* This call may decrease the number of queues */ + if (igc_init_interrupt_scheme(adapter, true)) { + dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + /* Explicitly disable IRQ since the NIC can be in any state. */ + igc_irq_disable(adapter); + + set_bit(__IGC_DOWN, &adapter->state); + + return 0; +} + /** * igc_up - Open the interface and prepare it to handle traffic * @adapter: board private structure @@ -2163,18 +3464,6 @@ static void igc_nfc_filter_exit(struct igc_adapter *adapter) spin_unlock(&adapter->nfc_lock); } -static void igc_nfc_filter_restore(struct igc_adapter *adapter) -{ - struct igc_nfc_filter *rule; - - spin_lock(&adapter->nfc_lock); - - hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) - igc_add_filter(adapter, rule); - - spin_unlock(&adapter->nfc_lock); -} - /** * igc_down - Close the interface * @adapter: board private structure @@ -2398,105 +3687,6 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev, return features; } -/** - * igc_configure - configure the hardware for RX and TX - * @adapter: private board structure - */ -static void igc_configure(struct igc_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - int i = 0; - - igc_get_hw_control(adapter); - igc_set_rx_mode(netdev); - - igc_setup_tctl(adapter); - igc_setup_mrqc(adapter); - igc_setup_rctl(adapter); - - igc_nfc_filter_restore(adapter); - igc_configure_tx(adapter); - igc_configure_rx(adapter); - - igc_rx_fifo_flush_base(&adapter->hw); - - /* call igc_desc_unused which always leaves - * at least 1 descriptor unused to make sure - * next_to_use != next_to_clean - */ - for (i = 0; i < adapter->num_rx_queues; i++) { - struct igc_ring *ring = adapter->rx_ring[i]; - - igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); - } -} - -/** - * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table - * @adapter: address of board private structure - * @index: Index of the RAR entry which need to be synced with MAC table - */ -static void igc_rar_set_index(struct igc_adapter *adapter, u32 index) -{ - u8 *addr = adapter->mac_table[index].addr; - struct igc_hw *hw = &adapter->hw; - u32 rar_low, rar_high; - - /* HW expects these to be in network order when they are plugged - * into the registers which are little endian. In order to guarantee - * that ordering we need to do an leXX_to_cpup here in order to be - * ready for the byteswap that occurs with writel - */ - rar_low = le32_to_cpup((__le32 *)(addr)); - rar_high = le16_to_cpup((__le16 *)(addr + 4)); - - /* Indicate to hardware the Address is Valid. */ - if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) { - if (is_valid_ether_addr(addr)) - rar_high |= IGC_RAH_AV; - - rar_high |= IGC_RAH_POOL_1 << - adapter->mac_table[index].queue; - } - - wr32(IGC_RAL(index), rar_low); - wrfl(); - wr32(IGC_RAH(index), rar_high); - wrfl(); -} - -/* Set default MAC address for the PF in the first RAR entry */ -static void igc_set_default_mac_filter(struct igc_adapter *adapter) -{ - struct igc_mac_addr *mac_table = &adapter->mac_table[0]; - - ether_addr_copy(mac_table->addr, adapter->hw.mac.addr); - mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE; - - igc_rar_set_index(adapter, 0); -} - -/* If the filter to be added and an already existing filter express - * the same address and address type, it should be possible to only - * override the other configurations, for example the queue to steer - * traffic. - */ -static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry, - const u8 *addr, const u8 flags) -{ - if (!(entry->state & IGC_MAC_STATE_IN_USE)) - return true; - - if ((entry->state & IGC_MAC_STATE_SRC_ADDR) != - (flags & IGC_MAC_STATE_SRC_ADDR)) - return false; - - if (!ether_addr_equal(addr, entry->addr)) - return false; - - return true; -} - /* Add a MAC filter for 'addr' directing matching traffic to 'queue', * 'flags' is used to indicate what kind of match is made, match is by * default for the destination address, if matching by source address @@ -2597,159 +3787,20 @@ int igc_del_mac_steering_filter(struct igc_adapter *adapter, IGC_MAC_STATE_QUEUE_STEERING | flags); } -/* Add a MAC filter for 'addr' directing matching traffic to 'queue', - * 'flags' is used to indicate what kind of match is made, match is by - * default for the destination address, if matching by source address - * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used. - */ -static int igc_add_mac_filter(struct igc_adapter *adapter, - const u8 *addr, const u8 queue) +static void igc_tsync_interrupt(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; - int rar_entries = hw->mac.rar_entry_count; - int i; - - if (is_zero_ether_addr(addr)) - return -EINVAL; - - /* Search for the first empty entry in the MAC table. - * Do not touch entries at the end of the table reserved for the VF MAC - * addresses. - */ - for (i = 0; i < rar_entries; i++) { - if (!igc_mac_entry_can_be_used(&adapter->mac_table[i], - addr, 0)) - continue; - - ether_addr_copy(adapter->mac_table[i].addr, addr); - adapter->mac_table[i].queue = queue; - adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE; - - igc_rar_set_index(adapter, i); - return i; - } - - return -ENOSPC; -} - -/* Remove a MAC filter for 'addr' directing matching traffic to - * 'queue', 'flags' is used to indicate what kind of match need to be - * removed, match is by default for the destination address, if - * matching by source address is to be removed the flag - * IGC_MAC_STATE_SRC_ADDR can be used. - */ -static int igc_del_mac_filter(struct igc_adapter *adapter, - const u8 *addr, const u8 queue) -{ - struct igc_hw *hw = &adapter->hw; - int rar_entries = hw->mac.rar_entry_count; - int i; - - if (is_zero_ether_addr(addr)) - return -EINVAL; - - /* Search for matching entry in the MAC table based on given address - * and queue. Do not touch entries at the end of the table reserved - * for the VF MAC addresses. - */ - for (i = 0; i < rar_entries; i++) { - if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE)) - continue; - if (adapter->mac_table[i].state != 0) - continue; - if (adapter->mac_table[i].queue != queue) - continue; - if (!ether_addr_equal(adapter->mac_table[i].addr, addr)) - continue; - - /* When a filter for the default address is "deleted", - * we return it to its initial configuration - */ - if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) { - adapter->mac_table[i].state = - IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE; - adapter->mac_table[i].queue = 0; - } else { - adapter->mac_table[i].state = 0; - adapter->mac_table[i].queue = 0; - memset(adapter->mac_table[i].addr, 0, ETH_ALEN); - } + u32 tsicr = rd32(IGC_TSICR); + u32 ack = 0; - igc_rar_set_index(adapter, i); - return 0; + if (tsicr & IGC_TSICR_TXTS) { + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + ack |= IGC_TSICR_TXTS; } - return -ENOENT; -} - -static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr) -{ - struct igc_adapter *adapter = netdev_priv(netdev); - int ret; - - ret = igc_add_mac_filter(adapter, addr, adapter->num_rx_queues); - - return min_t(int, ret, 0); -} - -static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) -{ - struct igc_adapter *adapter = netdev_priv(netdev); - - igc_del_mac_filter(adapter, addr, adapter->num_rx_queues); - - return 0; -} - -/** - * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set - * @netdev: network interface device structure - * - * The set_rx_mode entry point is called whenever the unicast or multicast - * address lists or the network interface flags are updated. This routine is - * responsible for configuring the hardware for proper unicast, multicast, - * promiscuous mode, and all-multi behavior. - */ -static void igc_set_rx_mode(struct net_device *netdev) -{ - struct igc_adapter *adapter = netdev_priv(netdev); - struct igc_hw *hw = &adapter->hw; - u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE; - int count; - - /* Check for Promiscuous and All Multicast modes */ - if (netdev->flags & IFF_PROMISC) { - rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE; - } else { - if (netdev->flags & IFF_ALLMULTI) { - rctl |= IGC_RCTL_MPE; - } else { - /* Write addresses to the MTA, if the attempt fails - * then we should just turn on promiscuous mode so - * that we can at least receive multicast traffic - */ - count = igc_write_mc_addr_list(netdev); - if (count < 0) - rctl |= IGC_RCTL_MPE; - } - } - - /* Write addresses to available RAR registers, if there is not - * sufficient space to store all the addresses then enable - * unicast promiscuous mode - */ - if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync)) - rctl |= IGC_RCTL_UPE; - - /* update state of unicast and multicast */ - rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE); - wr32(IGC_RCTL, rctl); - -#if (PAGE_SIZE < 8192) - if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB) - rlpml = IGC_MAX_FRAME_BUILD_SKB; -#endif - wr32(IGC_RLPML, rlpml); + /* acknowledge the interrupts */ + wr32(IGC_TSICR, ack); } /** @@ -2779,114 +3830,28 @@ static irqreturn_t igc_msix_other(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & IGC_ICR_TS) + igc_tsync_interrupt(adapter); + wr32(IGC_EIMS, adapter->eims_other); return IRQ_HANDLED; } -/** - * igc_write_ivar - configure ivar for given MSI-X vector - * @hw: pointer to the HW structure - * @msix_vector: vector number we are allocating to a given ring - * @index: row index of IVAR register to write within IVAR table - * @offset: column offset of in IVAR, should be multiple of 8 - * - * The IVAR table consists of 2 columns, - * each containing an cause allocation for an Rx and Tx ring, and a - * variable number of rows depending on the number of queues supported. - */ -static void igc_write_ivar(struct igc_hw *hw, int msix_vector, - int index, int offset) -{ - u32 ivar = array_rd32(IGC_IVAR0, index); - - /* clear any bits that are currently set */ - ivar &= ~((u32)0xFF << offset); - - /* write vector and valid bit */ - ivar |= (msix_vector | IGC_IVAR_VALID) << offset; - - array_wr32(IGC_IVAR0, index, ivar); -} - -static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector) -{ - struct igc_adapter *adapter = q_vector->adapter; - struct igc_hw *hw = &adapter->hw; - int rx_queue = IGC_N0_QUEUE; - int tx_queue = IGC_N0_QUEUE; - - if (q_vector->rx.ring) - rx_queue = q_vector->rx.ring->reg_idx; - if (q_vector->tx.ring) - tx_queue = q_vector->tx.ring->reg_idx; - - switch (hw->mac.type) { - case igc_i225: - if (rx_queue > IGC_N0_QUEUE) - igc_write_ivar(hw, msix_vector, - rx_queue >> 1, - (rx_queue & 0x1) << 4); - if (tx_queue > IGC_N0_QUEUE) - igc_write_ivar(hw, msix_vector, - tx_queue >> 1, - ((tx_queue & 0x1) << 4) + 8); - q_vector->eims_value = BIT(msix_vector); - break; - default: - WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n"); - break; - } - - /* add q_vector eims value to global eims_enable_mask */ - adapter->eims_enable_mask |= q_vector->eims_value; - - /* configure q_vector to set itr on first interrupt */ - q_vector->set_itr = 1; -} - -/** - * igc_configure_msix - Configure MSI-X hardware - * @adapter: Pointer to adapter structure - * - * igc_configure_msix sets up the hardware to properly - * generate MSI-X interrupts. - */ -static void igc_configure_msix(struct igc_adapter *adapter) +static void igc_write_itr(struct igc_q_vector *q_vector) { - struct igc_hw *hw = &adapter->hw; - int i, vector = 0; - u32 tmp; - - adapter->eims_enable_mask = 0; - - /* set vector for other causes, i.e. link changes */ - switch (hw->mac.type) { - case igc_i225: - /* Turn on MSI-X capability first, or our settings - * won't stick. And it will take days to debug. - */ - wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE | - IGC_GPIE_PBA | IGC_GPIE_EIAME | - IGC_GPIE_NSICR); - - /* enable msix_other interrupt */ - adapter->eims_other = BIT(vector); - tmp = (vector++ | IGC_IVAR_VALID) << 8; + u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK; - wr32(IGC_IVAR_MISC, tmp); - break; - default: - /* do nothing, since nothing else supports MSI-X */ - break; - } /* switch (hw->mac.type) */ + if (!q_vector->set_itr) + return; - adapter->eims_enable_mask |= adapter->eims_other; + if (!itr_val) + itr_val = IGC_ITR_VAL_MASK; - for (i = 0; i < adapter->num_q_vectors; i++) - igc_assign_vector(adapter->q_vector[i], vector++); + itr_val |= IGC_EITR_CNT_IGNR; - wrfl(); + writel(itr_val, q_vector->itr_register); + q_vector->set_itr = 0; } static irqreturn_t igc_msix_ring(int irq, void *data) @@ -2961,49 +3926,6 @@ err_out: } /** - * igc_reset_q_vector - Reset config for interrupt vector - * @adapter: board private structure to initialize - * @v_idx: Index of vector to be reset - * - * If NAPI is enabled it will delete any references to the - * NAPI struct. This is preparation for igc_free_q_vector. - */ -static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx) -{ - struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; - - /* if we're coming from igc_set_interrupt_capability, the vectors are - * not yet allocated - */ - if (!q_vector) - return; - - if (q_vector->tx.ring) - adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; - - if (q_vector->rx.ring) - adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; - - netif_napi_del(&q_vector->napi); -} - -static void igc_reset_interrupt_capability(struct igc_adapter *adapter) -{ - int v_idx = adapter->num_q_vectors; - - if (adapter->msix_entries) { - pci_disable_msix(adapter->pdev); - kfree(adapter->msix_entries); - adapter->msix_entries = NULL; - } else if (adapter->flags & IGC_FLAG_HAS_MSI) { - pci_disable_msi(adapter->pdev); - } - - while (v_idx--) - igc_reset_q_vector(adapter, v_idx); -} - -/** * igc_clear_interrupt_scheme - reset the device to a state of no interrupts * @adapter: Pointer to adapter structure * @@ -3016,48 +3938,6 @@ static void igc_clear_interrupt_scheme(struct igc_adapter *adapter) igc_reset_interrupt_capability(adapter); } -/** - * igc_free_q_vectors - Free memory allocated for interrupt vectors - * @adapter: board private structure to initialize - * - * This function frees the memory allocated to the q_vectors. In addition if - * NAPI is enabled it will delete any references to the NAPI struct prior - * to freeing the q_vector. - */ -static void igc_free_q_vectors(struct igc_adapter *adapter) -{ - int v_idx = adapter->num_q_vectors; - - adapter->num_tx_queues = 0; - adapter->num_rx_queues = 0; - adapter->num_q_vectors = 0; - - while (v_idx--) { - igc_reset_q_vector(adapter, v_idx); - igc_free_q_vector(adapter, v_idx); - } -} - -/** - * igc_free_q_vector - Free memory allocated for specific interrupt vector - * @adapter: board private structure to initialize - * @v_idx: Index of vector to be freed - * - * This function frees the memory allocated to the q_vector. - */ -static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx) -{ - struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; - - adapter->q_vector[v_idx] = NULL; - - /* igc_get_stats64() might access the rings on this vector, - * we must wait a grace period before freeing it. - */ - if (q_vector) - kfree_rcu(q_vector, rcu); -} - /* Need to wait a few seconds after link up to get diagnostic information from * the phy */ @@ -3109,7 +3989,7 @@ bool igc_has_link(struct igc_adapter *adapter) /** * igc_watchdog - Timer Call-back - * @data: pointer to adapter cast into an unsigned long + * @t: timer for the watchdog */ static void igc_watchdog(struct timer_list *t) { @@ -3282,6 +4162,8 @@ no_wait: wr32(IGC_ICS, IGC_ICS_RXDMT0); } + igc_ptp_tx_hang(adapter); + /* Reset the timer */ if (!test_bit(__IGC_DOWN, &adapter->state)) { if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) @@ -3294,149 +4176,6 @@ no_wait: } /** - * igc_update_ring_itr - update the dynamic ITR value based on packet size - * @q_vector: pointer to q_vector - * - * Stores a new ITR value based on strictly on packet size. This - * algorithm is less sophisticated than that used in igc_update_itr, - * due to the difficulty of synchronizing statistics across multiple - * receive rings. The divisors and thresholds used by this function - * were determined based on theoretical maximum wire speed and testing - * data, in order to minimize response time while increasing bulk - * throughput. - * NOTE: This function is called only when operating in a multiqueue - * receive environment. - */ -static void igc_update_ring_itr(struct igc_q_vector *q_vector) -{ - struct igc_adapter *adapter = q_vector->adapter; - int new_val = q_vector->itr_val; - int avg_wire_size = 0; - unsigned int packets; - - /* For non-gigabit speeds, just fix the interrupt rate at 4000 - * ints/sec - ITR timer value of 120 ticks. - */ - switch (adapter->link_speed) { - case SPEED_10: - case SPEED_100: - new_val = IGC_4K_ITR; - goto set_itr_val; - default: - break; - } - - packets = q_vector->rx.total_packets; - if (packets) - avg_wire_size = q_vector->rx.total_bytes / packets; - - packets = q_vector->tx.total_packets; - if (packets) - avg_wire_size = max_t(u32, avg_wire_size, - q_vector->tx.total_bytes / packets); - - /* if avg_wire_size isn't set no work was done */ - if (!avg_wire_size) - goto clear_counts; - - /* Add 24 bytes to size to account for CRC, preamble, and gap */ - avg_wire_size += 24; - - /* Don't starve jumbo frames */ - avg_wire_size = min(avg_wire_size, 3000); - - /* Give a little boost to mid-size frames */ - if (avg_wire_size > 300 && avg_wire_size < 1200) - new_val = avg_wire_size / 3; - else - new_val = avg_wire_size / 2; - - /* conservative mode (itr 3) eliminates the lowest_latency setting */ - if (new_val < IGC_20K_ITR && - ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || - (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) - new_val = IGC_20K_ITR; - -set_itr_val: - if (new_val != q_vector->itr_val) { - q_vector->itr_val = new_val; - q_vector->set_itr = 1; - } -clear_counts: - q_vector->rx.total_bytes = 0; - q_vector->rx.total_packets = 0; - q_vector->tx.total_bytes = 0; - q_vector->tx.total_packets = 0; -} - -/** - * igc_update_itr - update the dynamic ITR value based on statistics - * @q_vector: pointer to q_vector - * @ring_container: ring info to update the itr for - * - * Stores a new ITR value based on packets and byte - * counts during the last interrupt. The advantage of per interrupt - * computation is faster updates and more accurate ITR for the current - * traffic pattern. Constants in this function were computed - * based on theoretical maximum wire speed and thresholds were set based - * on testing data as well as attempting to minimize response time - * while increasing bulk throughput. - * NOTE: These calculations are only valid when operating in a single- - * queue environment. - */ -static void igc_update_itr(struct igc_q_vector *q_vector, - struct igc_ring_container *ring_container) -{ - unsigned int packets = ring_container->total_packets; - unsigned int bytes = ring_container->total_bytes; - u8 itrval = ring_container->itr; - - /* no packets, exit with status unchanged */ - if (packets == 0) - return; - - switch (itrval) { - case lowest_latency: - /* handle TSO and jumbo frames */ - if (bytes / packets > 8000) - itrval = bulk_latency; - else if ((packets < 5) && (bytes > 512)) - itrval = low_latency; - break; - case low_latency: /* 50 usec aka 20000 ints/s */ - if (bytes > 10000) { - /* this if handles the TSO accounting */ - if (bytes / packets > 8000) - itrval = bulk_latency; - else if ((packets < 10) || ((bytes / packets) > 1200)) - itrval = bulk_latency; - else if ((packets > 35)) - itrval = lowest_latency; - } else if (bytes / packets > 2000) { - itrval = bulk_latency; - } else if (packets <= 2 && bytes < 512) { - itrval = lowest_latency; - } - break; - case bulk_latency: /* 250 usec aka 4000 ints/s */ - if (bytes > 25000) { - if (packets > 35) - itrval = low_latency; - } else if (bytes < 1500) { - itrval = low_latency; - } - break; - } - - /* clear work counters since we have the values we need */ - ring_container->total_bytes = 0; - ring_container->total_packets = 0; - - /* write updated itr to ring container */ - ring_container->itr = itrval; -} - -/** * igc_intr_msi - Interrupt Handler * @irq: interrupt number * @data: pointer to a network interface device structure @@ -3513,424 +4252,6 @@ static irqreturn_t igc_intr(int irq, void *data) return IRQ_HANDLED; } -static void igc_set_itr(struct igc_q_vector *q_vector) -{ - struct igc_adapter *adapter = q_vector->adapter; - u32 new_itr = q_vector->itr_val; - u8 current_itr = 0; - - /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ - switch (adapter->link_speed) { - case SPEED_10: - case SPEED_100: - current_itr = 0; - new_itr = IGC_4K_ITR; - goto set_itr_now; - default: - break; - } - - igc_update_itr(q_vector, &q_vector->tx); - igc_update_itr(q_vector, &q_vector->rx); - - current_itr = max(q_vector->rx.itr, q_vector->tx.itr); - - /* conservative mode (itr 3) eliminates the lowest_latency setting */ - if (current_itr == lowest_latency && - ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || - (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) - current_itr = low_latency; - - switch (current_itr) { - /* counts and packets in update_itr are dependent on these numbers */ - case lowest_latency: - new_itr = IGC_70K_ITR; /* 70,000 ints/sec */ - break; - case low_latency: - new_itr = IGC_20K_ITR; /* 20,000 ints/sec */ - break; - case bulk_latency: - new_itr = IGC_4K_ITR; /* 4,000 ints/sec */ - break; - default: - break; - } - -set_itr_now: - if (new_itr != q_vector->itr_val) { - /* this attempts to bias the interrupt rate towards Bulk - * by adding intermediate steps when interrupt rate is - * increasing - */ - new_itr = new_itr > q_vector->itr_val ? - max((new_itr * q_vector->itr_val) / - (new_itr + (q_vector->itr_val >> 2)), - new_itr) : new_itr; - /* Don't write the value here; it resets the adapter's - * internal timer, and causes us to delay far longer than - * we should between interrupts. Instead, we write the ITR - * value at the beginning of the next interrupt so the timing - * ends up being correct. - */ - q_vector->itr_val = new_itr; - q_vector->set_itr = 1; - } -} - -static void igc_ring_irq_enable(struct igc_q_vector *q_vector) -{ - struct igc_adapter *adapter = q_vector->adapter; - struct igc_hw *hw = &adapter->hw; - - if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || - (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { - if (adapter->num_q_vectors == 1) - igc_set_itr(q_vector); - else - igc_update_ring_itr(q_vector); - } - - if (!test_bit(__IGC_DOWN, &adapter->state)) { - if (adapter->msix_entries) - wr32(IGC_EIMS, q_vector->eims_value); - else - igc_irq_enable(adapter); - } -} - -/** - * igc_poll - NAPI Rx polling callback - * @napi: napi polling structure - * @budget: count of how many packets we should handle - */ -static int igc_poll(struct napi_struct *napi, int budget) -{ - struct igc_q_vector *q_vector = container_of(napi, - struct igc_q_vector, - napi); - bool clean_complete = true; - int work_done = 0; - - if (q_vector->tx.ring) - clean_complete = igc_clean_tx_irq(q_vector, budget); - - if (q_vector->rx.ring) { - int cleaned = igc_clean_rx_irq(q_vector, budget); - - work_done += cleaned; - if (cleaned >= budget) - clean_complete = false; - } - - /* If all work not completed, return budget and keep polling */ - if (!clean_complete) - return budget; - - /* Exit the polling mode, but don't re-enable interrupts if stack might - * poll us due to busy-polling - */ - if (likely(napi_complete_done(napi, work_done))) - igc_ring_irq_enable(q_vector); - - return min(work_done, budget - 1); -} - -/** - * igc_set_interrupt_capability - set MSI or MSI-X if supported - * @adapter: Pointer to adapter structure - * - * Attempt to configure interrupts using the best available - * capabilities of the hardware and kernel. - */ -static void igc_set_interrupt_capability(struct igc_adapter *adapter, - bool msix) -{ - int numvecs, i; - int err; - - if (!msix) - goto msi_only; - adapter->flags |= IGC_FLAG_HAS_MSIX; - - /* Number of supported queues. */ - adapter->num_rx_queues = adapter->rss_queues; - - adapter->num_tx_queues = adapter->rss_queues; - - /* start with one vector for every Rx queue */ - numvecs = adapter->num_rx_queues; - - /* if Tx handler is separate add 1 for every Tx queue */ - if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) - numvecs += adapter->num_tx_queues; - - /* store the number of vectors reserved for queues */ - adapter->num_q_vectors = numvecs; - - /* add 1 vector for link status interrupts */ - numvecs++; - - adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), - GFP_KERNEL); - - if (!adapter->msix_entries) - return; - - /* populate entry values */ - for (i = 0; i < numvecs; i++) - adapter->msix_entries[i].entry = i; - - err = pci_enable_msix_range(adapter->pdev, - adapter->msix_entries, - numvecs, - numvecs); - if (err > 0) - return; - - kfree(adapter->msix_entries); - adapter->msix_entries = NULL; - - igc_reset_interrupt_capability(adapter); - -msi_only: - adapter->flags &= ~IGC_FLAG_HAS_MSIX; - - adapter->rss_queues = 1; - adapter->flags |= IGC_FLAG_QUEUE_PAIRS; - adapter->num_rx_queues = 1; - adapter->num_tx_queues = 1; - adapter->num_q_vectors = 1; - if (!pci_enable_msi(adapter->pdev)) - adapter->flags |= IGC_FLAG_HAS_MSI; -} - -static void igc_add_ring(struct igc_ring *ring, - struct igc_ring_container *head) -{ - head->ring = ring; - head->count++; -} - -/** - * igc_alloc_q_vector - Allocate memory for a single interrupt vector - * @adapter: board private structure to initialize - * @v_count: q_vectors allocated on adapter, used for ring interleaving - * @v_idx: index of vector in adapter struct - * @txr_count: total number of Tx rings to allocate - * @txr_idx: index of first Tx ring to allocate - * @rxr_count: total number of Rx rings to allocate - * @rxr_idx: index of first Rx ring to allocate - * - * We allocate one q_vector. If allocation fails we return -ENOMEM. - */ -static int igc_alloc_q_vector(struct igc_adapter *adapter, - unsigned int v_count, unsigned int v_idx, - unsigned int txr_count, unsigned int txr_idx, - unsigned int rxr_count, unsigned int rxr_idx) -{ - struct igc_q_vector *q_vector; - struct igc_ring *ring; - int ring_count; - - /* igc only supports 1 Tx and/or 1 Rx queue per vector */ - if (txr_count > 1 || rxr_count > 1) - return -ENOMEM; - - ring_count = txr_count + rxr_count; - - /* allocate q_vector and rings */ - q_vector = adapter->q_vector[v_idx]; - if (!q_vector) - q_vector = kzalloc(struct_size(q_vector, ring, ring_count), - GFP_KERNEL); - else - memset(q_vector, 0, struct_size(q_vector, ring, ring_count)); - if (!q_vector) - return -ENOMEM; - - /* initialize NAPI */ - netif_napi_add(adapter->netdev, &q_vector->napi, - igc_poll, 64); - - /* tie q_vector and adapter together */ - adapter->q_vector[v_idx] = q_vector; - q_vector->adapter = adapter; - - /* initialize work limits */ - q_vector->tx.work_limit = adapter->tx_work_limit; - - /* initialize ITR configuration */ - q_vector->itr_register = adapter->io_addr + IGC_EITR(0); - q_vector->itr_val = IGC_START_ITR; - - /* initialize pointer to rings */ - ring = q_vector->ring; - - /* initialize ITR */ - if (rxr_count) { - /* rx or rx/tx vector */ - if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) - q_vector->itr_val = adapter->rx_itr_setting; - } else { - /* tx only vector */ - if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) - q_vector->itr_val = adapter->tx_itr_setting; - } - - if (txr_count) { - /* assign generic ring traits */ - ring->dev = &adapter->pdev->dev; - ring->netdev = adapter->netdev; - - /* configure backlink on ring */ - ring->q_vector = q_vector; - - /* update q_vector Tx values */ - igc_add_ring(ring, &q_vector->tx); - - /* apply Tx specific ring traits */ - ring->count = adapter->tx_ring_count; - ring->queue_index = txr_idx; - - /* assign ring to adapter */ - adapter->tx_ring[txr_idx] = ring; - - /* push pointer to next ring */ - ring++; - } - - if (rxr_count) { - /* assign generic ring traits */ - ring->dev = &adapter->pdev->dev; - ring->netdev = adapter->netdev; - - /* configure backlink on ring */ - ring->q_vector = q_vector; - - /* update q_vector Rx values */ - igc_add_ring(ring, &q_vector->rx); - - /* apply Rx specific ring traits */ - ring->count = adapter->rx_ring_count; - ring->queue_index = rxr_idx; - - /* assign ring to adapter */ - adapter->rx_ring[rxr_idx] = ring; - } - - return 0; -} - -/** - * igc_alloc_q_vectors - Allocate memory for interrupt vectors - * @adapter: board private structure to initialize - * - * We allocate one q_vector per queue interrupt. If allocation fails we - * return -ENOMEM. - */ -static int igc_alloc_q_vectors(struct igc_adapter *adapter) -{ - int rxr_remaining = adapter->num_rx_queues; - int txr_remaining = adapter->num_tx_queues; - int rxr_idx = 0, txr_idx = 0, v_idx = 0; - int q_vectors = adapter->num_q_vectors; - int err; - - if (q_vectors >= (rxr_remaining + txr_remaining)) { - for (; rxr_remaining; v_idx++) { - err = igc_alloc_q_vector(adapter, q_vectors, v_idx, - 0, 0, 1, rxr_idx); - - if (err) - goto err_out; - - /* update counts and index */ - rxr_remaining--; - rxr_idx++; - } - } - - for (; v_idx < q_vectors; v_idx++) { - int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); - int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); - - err = igc_alloc_q_vector(adapter, q_vectors, v_idx, - tqpv, txr_idx, rqpv, rxr_idx); - - if (err) - goto err_out; - - /* update counts and index */ - rxr_remaining -= rqpv; - txr_remaining -= tqpv; - rxr_idx++; - txr_idx++; - } - - return 0; - -err_out: - adapter->num_tx_queues = 0; - adapter->num_rx_queues = 0; - adapter->num_q_vectors = 0; - - while (v_idx--) - igc_free_q_vector(adapter, v_idx); - - return -ENOMEM; -} - -/** - * igc_cache_ring_register - Descriptor ring to register mapping - * @adapter: board private structure to initialize - * - * Once we know the feature-set enabled for the device, we'll cache - * the register offset the descriptor ring is assigned to. - */ -static void igc_cache_ring_register(struct igc_adapter *adapter) -{ - int i = 0, j = 0; - - switch (adapter->hw.mac.type) { - case igc_i225: - /* Fall through */ - default: - for (; i < adapter->num_rx_queues; i++) - adapter->rx_ring[i]->reg_idx = i; - for (; j < adapter->num_tx_queues; j++) - adapter->tx_ring[j]->reg_idx = j; - break; - } -} - -/** - * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors - * @adapter: Pointer to adapter structure - * - * This function initializes the interrupts and allocates all of the queues. - */ -static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) -{ - struct pci_dev *pdev = adapter->pdev; - int err = 0; - - igc_set_interrupt_capability(adapter, msix); - - err = igc_alloc_q_vectors(adapter); - if (err) { - dev_err(&pdev->dev, "Unable to allocate memory for vectors\n"); - goto err_alloc_q_vectors; - } - - igc_cache_ring_register(adapter); - - return 0; - -err_alloc_q_vectors: - igc_reset_interrupt_capability(adapter); - return err; -} - static void igc_free_irq(struct igc_adapter *adapter) { if (adapter->msix_entries) { @@ -3947,62 +4268,6 @@ static void igc_free_irq(struct igc_adapter *adapter) } /** - * igc_irq_disable - Mask off interrupt generation on the NIC - * @adapter: board private structure - */ -static void igc_irq_disable(struct igc_adapter *adapter) -{ - struct igc_hw *hw = &adapter->hw; - - if (adapter->msix_entries) { - u32 regval = rd32(IGC_EIAM); - - wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask); - wr32(IGC_EIMC, adapter->eims_enable_mask); - regval = rd32(IGC_EIAC); - wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask); - } - - wr32(IGC_IAM, 0); - wr32(IGC_IMC, ~0); - wrfl(); - - if (adapter->msix_entries) { - int vector = 0, i; - - synchronize_irq(adapter->msix_entries[vector++].vector); - - for (i = 0; i < adapter->num_q_vectors; i++) - synchronize_irq(adapter->msix_entries[vector++].vector); - } else { - synchronize_irq(adapter->pdev->irq); - } -} - -/** - * igc_irq_enable - Enable default interrupt generation settings - * @adapter: board private structure - */ -static void igc_irq_enable(struct igc_adapter *adapter) -{ - struct igc_hw *hw = &adapter->hw; - - if (adapter->msix_entries) { - u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA; - u32 regval = rd32(IGC_EIAC); - - wr32(IGC_EIAC, regval | adapter->eims_enable_mask); - regval = rd32(IGC_EIAM); - wr32(IGC_EIAM, regval | adapter->eims_enable_mask); - wr32(IGC_EIMS, adapter->eims_enable_mask); - wr32(IGC_IMS, ims); - } else { - wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA); - wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA); - } -} - -/** * igc_request_irq - initialize interrupts * @adapter: Pointer to adapter structure * @@ -4056,25 +4321,10 @@ request_done: return err; } -static void igc_write_itr(struct igc_q_vector *q_vector) -{ - u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK; - - if (!q_vector->set_itr) - return; - - if (!itr_val) - itr_val = IGC_ITR_VAL_MASK; - - itr_val |= IGC_EITR_CNT_IGNR; - - writel(itr_val, q_vector->itr_register); - q_vector->set_itr = 0; -} - /** - * igc_open - Called when a network interface is made active + * __igc_open - Called when a network interface is made active * @netdev: network interface device structure + * @resuming: boolean indicating if the device is resuming * * Returns 0 on success, negative value on failure * @@ -4164,8 +4414,9 @@ static int igc_open(struct net_device *netdev) } /** - * igc_close - Disables a network interface + * __igc_close - Disables a network interface * @netdev: network interface device structure + * @suspending: boolean indicating the device is suspending * * Returns 0, this is not allowed to fail * @@ -4199,6 +4450,24 @@ static int igc_close(struct net_device *netdev) return 0; } +/** + * igc_ioctl - Access the hwtstamp interface + * @netdev: network interface device structure + * @ifreq: interface request data + * @cmd: ioctl command + **/ +static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + case SIOCGHWTSTAMP: + return igc_ptp_get_ts_config(netdev, ifr); + case SIOCSHWTSTAMP: + return igc_ptp_set_ts_config(netdev, ifr); + default: + return -EOPNOTSUPP; + } +} + static const struct net_device_ops igc_netdev_ops = { .ndo_open = igc_open, .ndo_stop = igc_close, @@ -4210,6 +4479,7 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_fix_features = igc_fix_features, .ndo_set_features = igc_set_features, .ndo_features_check = igc_features_check, + .ndo_do_ioctl = igc_ioctl, }; /* PCIe configuration access */ @@ -4345,32 +4615,26 @@ static int igc_probe(struct pci_dev *pdev, struct net_device *netdev; struct igc_hw *hw; const struct igc_info *ei = igc_info_tbl[ent->driver_data]; - int err; + int err, pci_using_dac; err = pci_enable_device_mem(pdev); if (err) return err; - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + pci_using_dac = 0; + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (!err) { - err = dma_set_coherent_mask(&pdev->dev, - DMA_BIT_MASK(64)); + pci_using_dac = 1; } else { - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { - err = dma_set_coherent_mask(&pdev->dev, - DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, "igc: Wrong DMA config\n"); - goto err_dma; - } + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } } - err = pci_request_selected_regions(pdev, - pci_select_bars(pdev, - IORESOURCE_MEM), - igc_driver_name); + err = pci_request_mem_regions(pdev, igc_driver_name); if (err) goto err_pci_reg; @@ -4433,6 +4697,9 @@ static int igc_probe(struct pci_dev *pdev, goto err_sw_init; /* Add supported features to the features list*/ + netdev->features |= NETIF_F_SG; + netdev->features |= NETIF_F_TSO; + netdev->features |= NETIF_F_TSO6; netdev->features |= NETIF_F_RXCSUM; netdev->features |= NETIF_F_HW_CSUM; netdev->features |= NETIF_F_SCTP_CRC; @@ -4446,6 +4713,9 @@ static int igc_probe(struct pci_dev *pdev, netdev->hw_features |= NETIF_F_NTUPLE; netdev->hw_features |= netdev->features; + if (pci_using_dac) + netdev->features |= NETIF_F_HIGHDMA; + /* MTU range: 68 - 9216 */ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; @@ -4512,6 +4782,9 @@ static int igc_probe(struct pci_dev *pdev, /* carrier off reporting is important to ethtool even BEFORE open */ netif_carrier_off(netdev); + /* do hw tstamp init after resetting */ + igc_ptp_init(adapter); + /* Check if Media Autosense is enabled */ adapter->ei = *ei; @@ -4532,8 +4805,7 @@ err_sw_init: err_ioremap: free_netdev(netdev); err_alloc_etherdev: - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_release_mem_regions(pdev); err_pci_reg: err_dma: pci_disable_device(pdev); @@ -4554,6 +4826,8 @@ static void igc_remove(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct igc_adapter *adapter = netdev_priv(netdev); + igc_ptp_stop(adapter); + set_bit(__IGC_DOWN, &adapter->state); del_timer_sync(&adapter->watchdog_timer); @@ -4580,105 +4854,216 @@ static void igc_remove(struct pci_dev *pdev) pci_disable_device(pdev); } -static struct pci_driver igc_driver = { - .name = igc_driver_name, - .id_table = igc_pci_tbl, - .probe = igc_probe, - .remove = igc_remove, -}; - -void igc_set_flag_queue_pairs(struct igc_adapter *adapter, - const u32 max_rss_queues) +static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake, + bool runtime) { - /* Determine if we need to pair queues. */ - /* If rss_queues > half of max_rss_queues, pair the queues in - * order to conserve interrupts due to limited supply. - */ - if (adapter->rss_queues > (max_rss_queues / 2)) - adapter->flags |= IGC_FLAG_QUEUE_PAIRS; + struct net_device *netdev = pci_get_drvdata(pdev); + struct igc_adapter *adapter = netdev_priv(netdev); + u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol; + struct igc_hw *hw = &adapter->hw; + u32 ctrl, rctl, status; + bool wake; + + rtnl_lock(); + netif_device_detach(netdev); + + if (netif_running(netdev)) + __igc_close(netdev, true); + + igc_clear_interrupt_scheme(adapter); + rtnl_unlock(); + + status = rd32(IGC_STATUS); + if (status & IGC_STATUS_LU) + wufc &= ~IGC_WUFC_LNKC; + + if (wufc) { + igc_setup_rctl(adapter); + igc_set_rx_mode(netdev); + + /* turn on all-multi mode if wake on multicast is enabled */ + if (wufc & IGC_WUFC_MC) { + rctl = rd32(IGC_RCTL); + rctl |= IGC_RCTL_MPE; + wr32(IGC_RCTL, rctl); + } + + ctrl = rd32(IGC_CTRL); + ctrl |= IGC_CTRL_ADVD3WUC; + wr32(IGC_CTRL, ctrl); + + /* Allow time for pending master requests to run */ + igc_disable_pcie_master(hw); + + wr32(IGC_WUC, IGC_WUC_PME_EN); + wr32(IGC_WUFC, wufc); + } else { + wr32(IGC_WUC, 0); + wr32(IGC_WUFC, 0); + } + + wake = wufc || adapter->en_mng_pt; + if (!wake) + igc_power_down_link(adapter); else - adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS; -} + igc_power_up_link(adapter); -unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) -{ - unsigned int max_rss_queues; + if (enable_wake) + *enable_wake = wake; - /* Determine the maximum number of RSS queues supported. */ - max_rss_queues = IGC_MAX_RX_QUEUES; + /* Release control of h/w to f/w. If f/w is AMT enabled, this + * would have already happened in close and is redundant. + */ + igc_release_hw_control(adapter); - return max_rss_queues; + pci_disable_device(pdev); + + return 0; } -static void igc_init_queue_configuration(struct igc_adapter *adapter) +#ifdef CONFIG_PM +static int __maybe_unused igc_runtime_suspend(struct device *dev) { - u32 max_rss_queues; - - max_rss_queues = igc_get_max_rss_queues(adapter); - adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); - - igc_set_flag_queue_pairs(adapter, max_rss_queues); + return __igc_shutdown(to_pci_dev(dev), NULL, 1); } -/** - * igc_sw_init - Initialize general software structures (struct igc_adapter) - * @adapter: board private structure to initialize - * - * igc_sw_init initializes the Adapter private data structure. - * Fields are initialized based on PCI device information and - * OS network device settings (MTU size). - */ -static int igc_sw_init(struct igc_adapter *adapter) +static void igc_deliver_wake_packet(struct net_device *netdev) { - struct net_device *netdev = adapter->netdev; - struct pci_dev *pdev = adapter->pdev; + struct igc_adapter *adapter = netdev_priv(netdev); struct igc_hw *hw = &adapter->hw; + struct sk_buff *skb; + u32 wupl; - int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count; + wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK; - pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); + /* WUPM stores only the first 128 bytes of the wake packet. + * Read the packet only if we have the whole thing. + */ + if (wupl == 0 || wupl > IGC_WUPM_BYTES) + return; - /* set default ring sizes */ - adapter->tx_ring_count = IGC_DEFAULT_TXD; - adapter->rx_ring_count = IGC_DEFAULT_RXD; + skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES); + if (!skb) + return; - /* set default ITR values */ - adapter->rx_itr_setting = IGC_DEFAULT_ITR; - adapter->tx_itr_setting = IGC_DEFAULT_ITR; + skb_put(skb, wupl); - /* set default work limits */ - adapter->tx_work_limit = IGC_DEFAULT_TX_WORK; + /* Ensure reads are 32-bit aligned */ + wupl = roundup(wupl, 4); - /* adjust max frame to be at least the size of a standard frame */ - adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + - VLAN_HLEN; - adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; + memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl); - spin_lock_init(&adapter->nfc_lock); - spin_lock_init(&adapter->stats64_lock); - /* Assume MSI-X interrupts, will be checked during IRQ allocation */ - adapter->flags |= IGC_FLAG_HAS_MSIX; + skb->protocol = eth_type_trans(skb, netdev); + netif_rx(skb); +} - adapter->mac_table = kzalloc(size, GFP_ATOMIC); - if (!adapter->mac_table) - return -ENOMEM; +static int __maybe_unused igc_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct net_device *netdev = pci_get_drvdata(pdev); + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + u32 err, val; - igc_init_queue_configuration(adapter); + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + pci_save_state(pdev); + + if (!pci_device_is_present(pdev)) + return -ENODEV; + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(&pdev->dev, + "igc: Cannot enable PCI device from suspend\n"); + return err; + } + pci_set_master(pdev); + + pci_enable_wake(pdev, PCI_D3hot, 0); + pci_enable_wake(pdev, PCI_D3cold, 0); - /* This call may decrease the number of queues */ if (igc_init_interrupt_scheme(adapter, true)) { dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); return -ENOMEM; } - /* Explicitly disable IRQ since the NIC can be in any state. */ - igc_irq_disable(adapter); + igc_reset(adapter); - set_bit(__IGC_DOWN, &adapter->state); + /* let the f/w know that the h/w is now under the control of the + * driver. + */ + igc_get_hw_control(adapter); - return 0; + val = rd32(IGC_WUS); + if (val & WAKE_PKT_WUS) + igc_deliver_wake_packet(netdev); + + wr32(IGC_WUS, ~0); + + rtnl_lock(); + if (!err && netif_running(netdev)) + err = __igc_open(netdev, true); + + if (!err) + netif_device_attach(netdev); + rtnl_unlock(); + + return err; +} + +static int __maybe_unused igc_runtime_resume(struct device *dev) +{ + return igc_resume(dev); +} + +static int __maybe_unused igc_suspend(struct device *dev) +{ + return __igc_shutdown(to_pci_dev(dev), NULL, 0); } +static int __maybe_unused igc_runtime_idle(struct device *dev) +{ + struct net_device *netdev = dev_get_drvdata(dev); + struct igc_adapter *adapter = netdev_priv(netdev); + + if (!igc_has_link(adapter)) + pm_schedule_suspend(dev, MSEC_PER_SEC * 5); + + return -EBUSY; +} +#endif /* CONFIG_PM */ + +static void igc_shutdown(struct pci_dev *pdev) +{ + bool wake; + + __igc_shutdown(pdev, &wake, 0); + + if (system_state == SYSTEM_POWER_OFF) { + pci_wake_from_d3(pdev, wake); + pci_set_power_state(pdev, PCI_D3hot); + } +} + +#ifdef CONFIG_PM +static const struct dev_pm_ops igc_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume) + SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume, + igc_runtime_idle) +}; +#endif + +static struct pci_driver igc_driver = { + .name = igc_driver_name, + .id_table = igc_pci_tbl, + .probe = igc_probe, + .remove = igc_remove, +#ifdef CONFIG_PM + .driver.pm = &igc_pm_ops, +#endif + .shutdown = igc_shutdown, +}; + /** * igc_reinit_queues - return error * @adapter: pointer to adapter structure diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index f4b05af0dd2f..8e1799508edc 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -173,6 +173,7 @@ s32 igc_check_downshift(struct igc_hw *hw) s32 igc_phy_hw_reset(struct igc_hw *hw) { struct igc_phy_info *phy = &hw->phy; + u32 phpm = 0, timeout = 10000; s32 ret_val; u32 ctrl; @@ -186,6 +187,8 @@ s32 igc_phy_hw_reset(struct igc_hw *hw) if (ret_val) goto out; + phpm = rd32(IGC_I225_PHPM); + ctrl = rd32(IGC_CTRL); wr32(IGC_CTRL, ctrl | IGC_CTRL_PHY_RST); wrfl(); @@ -195,7 +198,18 @@ s32 igc_phy_hw_reset(struct igc_hw *hw) wr32(IGC_CTRL, ctrl); wrfl(); - usleep_range(1500, 2000); + /* SW should guarantee 100us for the completion of the PHY reset */ + usleep_range(100, 150); + do { + phpm = rd32(IGC_I225_PHPM); + timeout--; + udelay(1); + } while (!(phpm & IGC_PHY_RST_COMP) && timeout); + + if (!timeout) + hw_dbg("Timeout is expired after a phy reset\n"); + + usleep_range(100, 150); phy->ops.release(hw); diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c new file mode 100644 index 000000000000..693506587198 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -0,0 +1,716 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Intel Corporation */ + +#include "igc.h" + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/ptp_classify.h> +#include <linux/clocksource.h> + +#define INCVALUE_MASK 0x7fffffff +#define ISGN 0x80000000 + +#define IGC_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9) +#define IGC_PTP_TX_TIMEOUT (HZ * 15) + +/* SYSTIM read access for I225 */ +static void igc_ptp_read_i225(struct igc_adapter *adapter, + struct timespec64 *ts) +{ + struct igc_hw *hw = &adapter->hw; + u32 sec, nsec; + + /* The timestamp latches on lowest register read. For I210/I211, the + * lowest register is SYSTIMR. Since we only need to provide nanosecond + * resolution, we can ignore it. + */ + rd32(IGC_SYSTIMR); + nsec = rd32(IGC_SYSTIML); + sec = rd32(IGC_SYSTIMH); + + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +static void igc_ptp_write_i225(struct igc_adapter *adapter, + const struct timespec64 *ts) +{ + struct igc_hw *hw = &adapter->hw; + + /* Writing the SYSTIMR register is not necessary as it only + * provides sub-nanosecond resolution. + */ + wr32(IGC_SYSTIML, ts->tv_nsec); + wr32(IGC_SYSTIMH, ts->tv_sec); +} + +static int igc_ptp_adjfine_i225(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct igc_adapter *igc = container_of(ptp, struct igc_adapter, + ptp_caps); + struct igc_hw *hw = &igc->hw; + int neg_adj = 0; + u64 rate; + u32 inca; + + if (scaled_ppm < 0) { + neg_adj = 1; + scaled_ppm = -scaled_ppm; + } + rate = scaled_ppm; + rate <<= 14; + rate = div_u64(rate, 78125); + + inca = rate & INCVALUE_MASK; + if (neg_adj) + inca |= ISGN; + + wr32(IGC_TIMINCA, inca); + + return 0; +} + +static int igc_ptp_adjtime_i225(struct ptp_clock_info *ptp, s64 delta) +{ + struct igc_adapter *igc = container_of(ptp, struct igc_adapter, + ptp_caps); + struct timespec64 now, then = ns_to_timespec64(delta); + unsigned long flags; + + spin_lock_irqsave(&igc->tmreg_lock, flags); + + igc_ptp_read_i225(igc, &now); + now = timespec64_add(now, then); + igc_ptp_write_i225(igc, (const struct timespec64 *)&now); + + spin_unlock_irqrestore(&igc->tmreg_lock, flags); + + return 0; +} + +static int igc_ptp_gettimex64_i225(struct ptp_clock_info *ptp, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct igc_adapter *igc = container_of(ptp, struct igc_adapter, + ptp_caps); + struct igc_hw *hw = &igc->hw; + unsigned long flags; + + spin_lock_irqsave(&igc->tmreg_lock, flags); + + ptp_read_system_prets(sts); + rd32(IGC_SYSTIMR); + ptp_read_system_postts(sts); + ts->tv_nsec = rd32(IGC_SYSTIML); + ts->tv_sec = rd32(IGC_SYSTIMH); + + spin_unlock_irqrestore(&igc->tmreg_lock, flags); + + return 0; +} + +static int igc_ptp_settime_i225(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct igc_adapter *igc = container_of(ptp, struct igc_adapter, + ptp_caps); + unsigned long flags; + + spin_lock_irqsave(&igc->tmreg_lock, flags); + + igc_ptp_write_i225(igc, ts); + + spin_unlock_irqrestore(&igc->tmreg_lock, flags); + + return 0; +} + +static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + return -EOPNOTSUPP; +} + +/** + * igc_ptp_systim_to_hwtstamp - convert system time value to HW timestamp + * @adapter: board private structure + * @hwtstamps: timestamp structure to update + * @systim: unsigned 64bit system time value + * + * We need to convert the system time value stored in the RX/TXSTMP registers + * into a hwtstamp which can be used by the upper level timestamping functions. + **/ +static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter, + struct skb_shared_hwtstamps *hwtstamps, + u64 systim) +{ + switch (adapter->hw.mac.type) { + case igc_i225: + memset(hwtstamps, 0, sizeof(*hwtstamps)); + /* Upper 32 bits contain s, lower 32 bits contain ns. */ + hwtstamps->hwtstamp = ktime_set(systim >> 32, + systim & 0xFFFFFFFF); + break; + default: + break; + } +} + +/** + * igc_ptp_rx_pktstamp - retrieve Rx per packet timestamp + * @q_vector: Pointer to interrupt specific structure + * @va: Pointer to address containing Rx buffer + * @skb: Buffer containing timestamp and packet + * + * This function is meant to retrieve the first timestamp from the + * first buffer of an incoming frame. The value is stored in little + * endian format starting on byte 0. There's a second timestamp + * starting on byte 8. + **/ +void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, void *va, + struct sk_buff *skb) +{ + struct igc_adapter *adapter = q_vector->adapter; + __le64 *regval = (__le64 *)va; + int adjust = 0; + + /* The timestamp is recorded in little endian format. + * DWORD: | 0 | 1 | 2 | 3 + * Field: | Timer0 Low | Timer0 High | Timer1 Low | Timer1 High + */ + igc_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), + le64_to_cpu(regval[0])); + + /* adjust timestamp for the RX latency based on link speed */ + if (adapter->hw.mac.type == igc_i225) { + switch (adapter->link_speed) { + case SPEED_10: + adjust = IGC_I225_RX_LATENCY_10; + break; + case SPEED_100: + adjust = IGC_I225_RX_LATENCY_100; + break; + case SPEED_1000: + adjust = IGC_I225_RX_LATENCY_1000; + break; + case SPEED_2500: + adjust = IGC_I225_RX_LATENCY_2500; + break; + } + } + skb_hwtstamps(skb)->hwtstamp = + ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); +} + +/** + * igc_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register + * @q_vector: Pointer to interrupt specific structure + * @skb: Buffer containing timestamp and packet + * + * This function is meant to retrieve a timestamp from the internal registers + * of the adapter and store it in the skb. + */ +void igc_ptp_rx_rgtstamp(struct igc_q_vector *q_vector, + struct sk_buff *skb) +{ + struct igc_adapter *adapter = q_vector->adapter; + struct igc_hw *hw = &adapter->hw; + u64 regval; + + /* If this bit is set, then the RX registers contain the time + * stamp. No other packet will be time stamped until we read + * these registers, so read the registers to make them + * available again. Because only one packet can be time + * stamped at a time, we know that the register values must + * belong to this one here and therefore we don't need to + * compare any of the additional attributes stored for it. + * + * If nothing went wrong, then it should have a shared + * tx_flags that we can turn into a skb_shared_hwtstamps. + */ + if (!(rd32(IGC_TSYNCRXCTL) & IGC_TSYNCRXCTL_VALID)) + return; + + regval = rd32(IGC_RXSTMPL); + regval |= (u64)rd32(IGC_RXSTMPH) << 32; + + igc_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); + + /* Update the last_rx_timestamp timer in order to enable watchdog check + * for error case of latched timestamp on a dropped packet. + */ + adapter->last_rx_timestamp = jiffies; +} + +/** + * igc_ptp_enable_tstamp_rxqueue - Enable RX timestamp for a queue + * @rx_ring: Pointer to RX queue + * @timer: Index for timer + * + * This function enables RX timestamping for a queue, and selects + * which 1588 timer will provide the timestamp. + */ +static void igc_ptp_enable_tstamp_rxqueue(struct igc_adapter *adapter, + struct igc_ring *rx_ring, u8 timer) +{ + struct igc_hw *hw = &adapter->hw; + int reg_idx = rx_ring->reg_idx; + u32 srrctl = rd32(IGC_SRRCTL(reg_idx)); + + srrctl |= IGC_SRRCTL_TIMESTAMP; + srrctl |= IGC_SRRCTL_TIMER1SEL(timer); + srrctl |= IGC_SRRCTL_TIMER0SEL(timer); + + wr32(IGC_SRRCTL(reg_idx), srrctl); +} + +static void igc_ptp_enable_tstamp_all_rxqueues(struct igc_adapter *adapter, + u8 timer) +{ + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igc_ring *ring = adapter->rx_ring[i]; + + igc_ptp_enable_tstamp_rxqueue(adapter, ring, timer); + } +} + +/** + * igc_ptp_set_timestamp_mode - setup hardware for timestamping + * @adapter: networking device structure + * @config: hwtstamp configuration + * + * Outgoing time stamping can be enabled and disabled. Play nice and + * disable it when requested, although it shouldn't case any overhead + * when no packet needs it. At most one packet in the queue may be + * marked for time stamping, otherwise it would be impossible to tell + * for sure to which packet the hardware time stamp belongs. + * + * Incoming time stamping has to be configured via the hardware + * filters. Not all combinations are supported, in particular event + * type has to be specified. Matching the kind of event packet is + * not supported, with the exception of "all V2 events regardless of + * level 2 or 4". + * + */ +static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, + struct hwtstamp_config *config) +{ + u32 tsync_tx_ctl = IGC_TSYNCTXCTL_ENABLED; + u32 tsync_rx_ctl = IGC_TSYNCRXCTL_ENABLED; + struct igc_hw *hw = &adapter->hw; + u32 tsync_rx_cfg = 0; + bool is_l4 = false; + bool is_l2 = false; + u32 regval; + + /* reserved for future extensions */ + if (config->flags) + return -EINVAL; + + switch (config->tx_type) { + case HWTSTAMP_TX_OFF: + tsync_tx_ctl = 0; + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + switch (config->rx_filter) { + case HWTSTAMP_FILTER_NONE: + tsync_rx_ctl = 0; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = IGC_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = IGC_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_EVENT_V2; + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + is_l2 = true; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_NTP_ALL: + case HWTSTAMP_FILTER_ALL: + tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; + break; + /* fall through */ + default: + config->rx_filter = HWTSTAMP_FILTER_NONE; + return -ERANGE; + } + + /* Per-packet timestamping only works if all packets are + * timestamped, so enable timestamping in all packets as long + * as one Rx filter was configured. + */ + if (tsync_rx_ctl) { + tsync_rx_ctl = IGC_TSYNCRXCTL_ENABLED; + tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_ALL; + tsync_rx_ctl |= IGC_TSYNCRXCTL_RXSYNSIG; + config->rx_filter = HWTSTAMP_FILTER_ALL; + is_l2 = true; + is_l4 = true; + + if (hw->mac.type == igc_i225) { + regval = rd32(IGC_RXPBS); + regval |= IGC_RXPBS_CFG_TS_EN; + wr32(IGC_RXPBS, regval); + + /* FIXME: For now, only support retrieving RX + * timestamps from timer 0 + */ + igc_ptp_enable_tstamp_all_rxqueues(adapter, 0); + } + } + + if (tsync_tx_ctl) { + tsync_tx_ctl = IGC_TSYNCTXCTL_ENABLED; + tsync_tx_ctl |= IGC_TSYNCTXCTL_TXSYNSIG; + } + + /* enable/disable TX */ + regval = rd32(IGC_TSYNCTXCTL); + regval &= ~IGC_TSYNCTXCTL_ENABLED; + regval |= tsync_tx_ctl; + wr32(IGC_TSYNCTXCTL, regval); + + /* enable/disable RX */ + regval = rd32(IGC_TSYNCRXCTL); + regval &= ~(IGC_TSYNCRXCTL_ENABLED | IGC_TSYNCRXCTL_TYPE_MASK); + regval |= tsync_rx_ctl; + wr32(IGC_TSYNCRXCTL, regval); + + /* define which PTP packets are time stamped */ + wr32(IGC_TSYNCRXCFG, tsync_rx_cfg); + + /* define ethertype filter for timestamped packets */ + if (is_l2) + wr32(IGC_ETQF(3), + (IGC_ETQF_FILTER_ENABLE | /* enable filter */ + IGC_ETQF_1588 | /* enable timestamping */ + ETH_P_1588)); /* 1588 eth protocol type */ + else + wr32(IGC_ETQF(3), 0); + + /* L4 Queue Filter[3]: filter by destination port and protocol */ + if (is_l4) { + u32 ftqf = (IPPROTO_UDP /* UDP */ + | IGC_FTQF_VF_BP /* VF not compared */ + | IGC_FTQF_1588_TIME_STAMP /* Enable Timestamp */ + | IGC_FTQF_MASK); /* mask all inputs */ + ftqf &= ~IGC_FTQF_MASK_PROTO_BP; /* enable protocol check */ + + wr32(IGC_IMIR(3), htons(PTP_EV_PORT)); + wr32(IGC_IMIREXT(3), + (IGC_IMIREXT_SIZE_BP | IGC_IMIREXT_CTRL_BP)); + wr32(IGC_FTQF(3), ftqf); + } else { + wr32(IGC_FTQF(3), IGC_FTQF_MASK); + } + wrfl(); + + /* clear TX/RX time stamp registers, just to be sure */ + regval = rd32(IGC_TXSTMPL); + regval = rd32(IGC_TXSTMPH); + regval = rd32(IGC_RXSTMPL); + regval = rd32(IGC_RXSTMPH); + + return 0; +} + +void igc_ptp_tx_hang(struct igc_adapter *adapter) +{ + bool timeout = time_is_before_jiffies(adapter->ptp_tx_start + + IGC_PTP_TX_TIMEOUT); + struct igc_hw *hw = &adapter->hw; + + if (!adapter->ptp_tx_skb) + return; + + if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) + return; + + /* If we haven't received a timestamp within the timeout, it is + * reasonable to assume that it will never occur, so we can unlock the + * timestamp bit when this occurs. + */ + if (timeout) { + cancel_work_sync(&adapter->ptp_tx_work); + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); + adapter->tx_hwtstamp_timeouts++; + /* Clear the Tx valid bit in TSYNCTXCTL register to enable + * interrupt + */ + rd32(IGC_TXSTMPH); + dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n"); + } +} + +/** + * igc_ptp_tx_hwtstamp - utility function which checks for TX time stamp + * @adapter: Board private structure + * + * If we were asked to do hardware stamping and such a time stamp is + * available, then it must have been for this skb here because we only + * allow only one such packet into the queue. + */ +static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) +{ + struct sk_buff *skb = adapter->ptp_tx_skb; + struct skb_shared_hwtstamps shhwtstamps; + struct igc_hw *hw = &adapter->hw; + u64 regval; + + regval = rd32(IGC_TXSTMPL); + regval |= (u64)rd32(IGC_TXSTMPH) << 32; + igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); + + /* Clear the lock early before calling skb_tstamp_tx so that + * applications are not woken up before the lock bit is clear. We use + * a copy of the skb pointer to ensure other threads can't change it + * while we're notifying the stack. + */ + adapter->ptp_tx_skb = NULL; + clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); + + /* Notify the stack and free the skb after we've unlocked */ + skb_tstamp_tx(skb, &shhwtstamps); + dev_kfree_skb_any(skb); +} + +/** + * igc_ptp_tx_work + * @work: pointer to work struct + * + * This work function polls the TSYNCTXCTL valid bit to determine when a + * timestamp has been taken for the current stored skb. + */ +void igc_ptp_tx_work(struct work_struct *work) +{ + struct igc_adapter *adapter = container_of(work, struct igc_adapter, + ptp_tx_work); + struct igc_hw *hw = &adapter->hw; + u32 tsynctxctl; + + if (!adapter->ptp_tx_skb) + return; + + if (time_is_before_jiffies(adapter->ptp_tx_start + + IGC_PTP_TX_TIMEOUT)) { + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); + adapter->tx_hwtstamp_timeouts++; + /* Clear the tx valid bit in TSYNCTXCTL register to enable + * interrupt + */ + rd32(IGC_TXSTMPH); + dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n"); + return; + } + + tsynctxctl = rd32(IGC_TSYNCTXCTL); + if (tsynctxctl & IGC_TSYNCTXCTL_VALID) + igc_ptp_tx_hwtstamp(adapter); + else + /* reschedule to check later */ + schedule_work(&adapter->ptp_tx_work); +} + +/** + * igc_ptp_set_ts_config - set hardware time stamping config + * @netdev: network interface device structure + * @ifreq: interface request data + * + **/ +int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct hwtstamp_config config; + int err; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + err = igc_ptp_set_timestamp_mode(adapter, &config); + if (err) + return err; + + /* save these settings for future reference */ + memcpy(&adapter->tstamp_config, &config, + sizeof(adapter->tstamp_config)); + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +/** + * igc_ptp_get_ts_config - get hardware time stamping config + * @netdev: network interface device structure + * @ifreq: interface request data + * + * Get the hwtstamp_config settings to return to the user. Rather than attempt + * to deconstruct the settings from the registers, just return a shadow copy + * of the last known settings. + **/ +int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct hwtstamp_config *config = &adapter->tstamp_config; + + return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? + -EFAULT : 0; +} + +/** + * igc_ptp_init - Initialize PTP functionality + * @adapter: Board private structure + * + * This function is called at device probe to initialize the PTP + * functionality. + */ +void igc_ptp_init(struct igc_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct igc_hw *hw = &adapter->hw; + + switch (hw->mac.type) { + case igc_i225: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 62499999; + adapter->ptp_caps.adjfine = igc_ptp_adjfine_i225; + adapter->ptp_caps.adjtime = igc_ptp_adjtime_i225; + adapter->ptp_caps.gettimex64 = igc_ptp_gettimex64_i225; + adapter->ptp_caps.settime64 = igc_ptp_settime_i225; + adapter->ptp_caps.enable = igc_ptp_feature_enable_i225; + break; + default: + adapter->ptp_clock = NULL; + return; + } + + spin_lock_init(&adapter->tmreg_lock); + INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work); + + adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; + adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; + + igc_ptp_reset(adapter); + + adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, + &adapter->pdev->dev); + if (IS_ERR(adapter->ptp_clock)) { + adapter->ptp_clock = NULL; + dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); + } else if (adapter->ptp_clock) { + dev_info(&adapter->pdev->dev, "added PHC on %s\n", + adapter->netdev->name); + adapter->ptp_flags |= IGC_PTP_ENABLED; + } +} + +/** + * igc_ptp_suspend - Disable PTP work items and prepare for suspend + * @adapter: Board private structure + * + * This function stops the overflow check work and PTP Tx timestamp work, and + * will prepare the device for OS suspend. + */ +void igc_ptp_suspend(struct igc_adapter *adapter) +{ + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) + return; + + cancel_work_sync(&adapter->ptp_tx_work); + if (adapter->ptp_tx_skb) { + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); + } +} + +/** + * igc_ptp_stop - Disable PTP device and stop the overflow check. + * @adapter: Board private structure. + * + * This function stops the PTP support and cancels the delayed work. + **/ +void igc_ptp_stop(struct igc_adapter *adapter) +{ + igc_ptp_suspend(adapter); + + if (adapter->ptp_clock) { + ptp_clock_unregister(adapter->ptp_clock); + dev_info(&adapter->pdev->dev, "removed PHC on %s\n", + adapter->netdev->name); + adapter->ptp_flags &= ~IGC_PTP_ENABLED; + } +} + +/** + * igc_ptp_reset - Re-enable the adapter for PTP following a reset. + * @adapter: Board private structure. + * + * This function handles the reset work required to re-enable the PTP device. + **/ +void igc_ptp_reset(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + unsigned long flags; + + /* reset the tstamp_config */ + igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); + + spin_lock_irqsave(&adapter->tmreg_lock, flags); + + switch (adapter->hw.mac.type) { + case igc_i225: + wr32(IGC_TSAUXC, 0x0); + wr32(IGC_TSSDP, 0x0); + wr32(IGC_TSIM, IGC_TSICR_INTERRUPTS); + wr32(IGC_IMS, IGC_IMS_TS); + break; + default: + /* No work to do. */ + goto out; + } + + /* Re-initialize the timer. */ + if (hw->mac.type == igc_i225) { + struct timespec64 ts64 = ktime_to_timespec64(ktime_get_real()); + + igc_ptp_write_i225(adapter, &ts64); + } else { + timecounter_init(&adapter->tc, &adapter->cc, + ktime_to_ns(ktime_get_real())); + } +out: + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + wrfl(); +} diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 50d7c04dccf5..c9029b549b90 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -12,6 +12,7 @@ #define IGC_MDIC 0x00020 /* MDI Control - RW */ #define IGC_MDICNFG 0x00E04 /* MDC/MDIO Configuration - RW */ #define IGC_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ +#define IGC_I225_PHPM 0x00E14 /* I225 PHY Power Management */ /* Internal Packet Buffer Size Registers */ #define IGC_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ @@ -209,12 +210,48 @@ #define IGC_LENERRS 0x04138 /* Length Errors Count */ #define IGC_HRMPC 0x0A018 /* Header Redirection Missed Packet Count */ +/* Time sync registers */ +#define IGC_TSICR 0x0B66C /* Time Sync Interrupt Cause */ +#define IGC_TSIM 0x0B674 /* Time Sync Interrupt Mask Register */ +#define IGC_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */ +#define IGC_TSYNCRXCTL 0x0B620 /* Rx Time Sync Control register - RW */ +#define IGC_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */ +#define IGC_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */ +#define IGC_TSSDP 0x0003C /* Time Sync SDP Configuration Register - RW */ + +#define IGC_IMIR(_i) (0x05A80 + ((_i) * 4)) /* Immediate Interrupt */ +#define IGC_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* Immediate INTR Ext*/ + +#define IGC_FTQF(_n) (0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */ + +#define IGC_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ + +/* System Time Registers */ +#define IGC_SYSTIML 0x0B600 /* System time register Low - RO */ +#define IGC_SYSTIMH 0x0B604 /* System time register High - RO */ +#define IGC_SYSTIMR 0x0B6F8 /* System time register Residue */ +#define IGC_TIMINCA 0x0B608 /* Increment attributes register - RW */ + +#define IGC_RXSTMPL 0x0B624 /* Rx timestamp Low - RO */ +#define IGC_RXSTMPH 0x0B628 /* Rx timestamp High - RO */ +#define IGC_TXSTMPL 0x0B618 /* Tx timestamp value Low - RO */ +#define IGC_TXSTMPH 0x0B61C /* Tx timestamp value High - RO */ + /* Management registers */ #define IGC_MANC 0x05820 /* Management Control - RW */ /* Shadow Ram Write Register - RW */ #define IGC_SRWR 0x12018 +/* Wake Up registers */ +#define IGC_WUC 0x05800 /* Wakeup Control - RW */ +#define IGC_WUFC 0x05808 /* Wakeup Filter Control - RW */ +#define IGC_WUS 0x05810 /* Wakeup Status - R/W1C */ +#define IGC_WUPL 0x05900 /* Wakeup Packet Length - RW */ + +/* Wake Up packet memory */ +#define IGC_WUPM_REG(_i) (0x05A00 + ((_i) * 4)) + /* forward declaration */ struct igc_hw; u32 igc_rd32(struct igc_hw *hw, u32 reg); diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 3d8c051dd327..b64e91ea3465 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -70,7 +70,7 @@ static int ixgb_clean(struct napi_struct *, int); static bool ixgb_clean_rx_irq(struct ixgb_adapter *, int *, int); static void ixgb_alloc_rx_buffers(struct ixgb_adapter *, int); -static void ixgb_tx_timeout(struct net_device *dev); +static void ixgb_tx_timeout(struct net_device *dev, unsigned int txqueue); static void ixgb_tx_timeout_task(struct work_struct *work); static void ixgb_vlan_strip_enable(struct ixgb_adapter *adapter); @@ -1538,7 +1538,7 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev) **/ static void -ixgb_tx_timeout(struct net_device *netdev) +ixgb_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ixgb_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c index 171cdc552961..5b1cf49df3d3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c @@ -166,7 +166,9 @@ static ssize_t ixgbe_dbg_netdev_ops_write(struct file *filp, ixgbe_dbg_netdev_ops_buf[len] = '\0'; if (strncmp(ixgbe_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) { - adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev); + /* TX Queue number below is wrong, but ixgbe does not use it */ + adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev, + UINT_MAX); e_dev_info("tx_timeout called\n"); } else { e_dev_info("Unknown command: %s\n", ixgbe_dbg_netdev_ops_buf); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index a2b2ad1f60b1..718931d951bc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6175,7 +6175,7 @@ static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter) * ixgbe_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void ixgbe_tx_timeout(struct net_device *netdev) +static void ixgbe_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ixgbe_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index b43be9f14105..74b540ebb3dc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -277,7 +277,7 @@ static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring, bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); - xsk_umem_discard_addr(umem); + xsk_umem_release_addr(umem); return true; } @@ -304,7 +304,7 @@ static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring *rx_ring, bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); - xsk_umem_discard_addr_rq(umem); + xsk_umem_release_addr_rq(umem); return true; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 64ec0e7c64b4..4622c4ea2e46 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -250,7 +250,7 @@ static void ixgbevf_tx_timeout_reset(struct ixgbevf_adapter *adapter) * ixgbevf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void ixgbevf_tx_timeout(struct net_device *netdev) +static void ixgbevf_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 25aa400e2e3c..2e4975572e9f 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2337,7 +2337,7 @@ jme_change_mtu(struct net_device *netdev, int new_mtu) } static void -jme_tx_timeout(struct net_device *netdev) +jme_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct jme_adapter *jme = netdev_priv(netdev); diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index ae195f8adff5..f98d9d627c71 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -917,7 +917,7 @@ static void korina_restart_task(struct work_struct *work) enable_irq(lp->rx_irq); } -static void korina_tx_timeout(struct net_device *dev) +static void korina_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct korina_private *lp = netdev_priv(dev); diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 6e73ffe6f928..028e3e6222e9 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -594,7 +594,7 @@ err_hw: } static void -ltq_etop_tx_timeout(struct net_device *dev) +ltq_etop_tx_timeout(struct net_device *dev, unsigned int txqueue) { int err; diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 65a093216dac..3c8125cbc84d 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2590,7 +2590,7 @@ static void tx_timeout_task(struct work_struct *ugly) } } -static void mv643xx_eth_tx_timeout(struct net_device *dev) +static void mv643xx_eth_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mv643xx_eth_private *mp = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 67ad8b8b127d..0449d4b28ade 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3072,7 +3072,7 @@ static int mvneta_create_page_pool(struct mvneta_port *pp, .order = 0, .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, .pool_size = size, - .nid = cpu_to_node(0), + .nid = NUMA_NO_NODE, .dev = pp->dev->dev.parent, .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, .offset = pp->rx_offset_correction, diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 14e372cda7f4..72133cbe55d4 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -1114,7 +1114,7 @@ mvpp2_shared_interrupt_mask_unmask(struct mvpp2_port *port, bool mask) /* Port configuration routines */ static bool mvpp2_is_xlg(phy_interface_t interface) { - return interface == PHY_INTERFACE_MODE_10GKR || + return interface == PHY_INTERFACE_MODE_10GBASER || interface == PHY_INTERFACE_MODE_XAUI; } @@ -1200,7 +1200,7 @@ static int mvpp22_gop_init(struct mvpp2_port *port) case PHY_INTERFACE_MODE_2500BASEX: mvpp22_gop_init_sgmii(port); break; - case PHY_INTERFACE_MODE_10GKR: + case PHY_INTERFACE_MODE_10GBASER: if (port->gop_id != 0) goto invalid_conf; mvpp22_gop_init_10gkr(port); @@ -1649,7 +1649,7 @@ static void mvpp22_pcs_reset_deassert(struct mvpp2_port *port) xpcs = priv->iface_base + MVPP22_XPCS_BASE(port->gop_id); switch (port->phy_interface) { - case PHY_INTERFACE_MODE_10GKR: + case PHY_INTERFACE_MODE_10GBASER: val = readl(mpcs + MVPP22_MPCS_CLK_RESET); val |= MAC_CLK_RESET_MAC | MAC_CLK_RESET_SD_RX | MAC_CLK_RESET_SD_TX; @@ -4758,7 +4758,7 @@ static void mvpp2_phylink_validate(struct phylink_config *config, /* Invalid combinations */ switch (state->interface) { - case PHY_INTERFACE_MODE_10GKR: + case PHY_INTERFACE_MODE_10GBASER: case PHY_INTERFACE_MODE_XAUI: if (port->gop_id != 0) goto empty_set; @@ -4780,7 +4780,7 @@ static void mvpp2_phylink_validate(struct phylink_config *config, phylink_set(mask, Asym_Pause); switch (state->interface) { - case PHY_INTERFACE_MODE_10GKR: + case PHY_INTERFACE_MODE_10GBASER: case PHY_INTERFACE_MODE_XAUI: case PHY_INTERFACE_MODE_NA: if (port->gop_id == 0) { @@ -4792,6 +4792,8 @@ static void mvpp2_phylink_validate(struct phylink_config *config, phylink_set(mask, 10000baseER_Full); phylink_set(mask, 10000baseKR_Full); } + if (state->interface != PHY_INTERFACE_MODE_NA) + break; /* Fall-through */ case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: @@ -4802,13 +4804,23 @@ static void mvpp2_phylink_validate(struct phylink_config *config, phylink_set(mask, 10baseT_Full); phylink_set(mask, 100baseT_Half); phylink_set(mask, 100baseT_Full); + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseX_Full); + if (state->interface != PHY_INTERFACE_MODE_NA) + break; /* Fall-through */ case PHY_INTERFACE_MODE_1000BASEX: case PHY_INTERFACE_MODE_2500BASEX: - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseX_Full); - phylink_set(mask, 2500baseT_Full); - phylink_set(mask, 2500baseX_Full); + if (port->comphy || + state->interface != PHY_INTERFACE_MODE_2500BASEX) { + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseX_Full); + } + if (port->comphy || + state->interface == PHY_INTERFACE_MODE_2500BASEX) { + phylink_set(mask, 2500baseT_Full); + phylink_set(mask, 2500baseX_Full); + } break; default: goto empty_set; @@ -4817,6 +4829,8 @@ static void mvpp2_phylink_validate(struct phylink_config *config, bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); bitmap_and(state->advertising, state->advertising, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); + + phylink_helper_basex_speed(state); return; empty_set: @@ -5233,6 +5247,15 @@ static int mvpp2_port_probe(struct platform_device *pdev, goto err_free_netdev; } + /* + * Rewrite 10GBASE-KR to 10GBASE-R for compatibility with existing DT. + * Existing usage of 10GBASE-KR is not correct; no backplane + * negotiation is done, and this driver does not actually support + * 10GBASE-KR. + */ + if (phy_mode == PHY_INTERFACE_MODE_10GKR) + phy_mode = PHY_INTERFACE_MODE_10GBASER; + if (port_node) { comphy = devm_of_phy_get(&pdev->dev, port_node, NULL); if (IS_ERR(comphy)) { @@ -5411,6 +5434,16 @@ static int mvpp2_port_probe(struct platform_device *pdev, port->phylink = NULL; } + /* Cycle the comphy to power it down, saving 270mW per port - + * don't worry about an error powering it up. When the comphy + * driver does this, we can remove this code. + */ + if (port->comphy) { + err = mvpp22_comphy_init(port); + if (err == 0) + phy_power_off(port->comphy); + } + err = register_netdev(dev); if (err < 0) { dev_err(&pdev->dev, "failed to register netdev\n"); diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 3fb7ee3d4d13..1a6877902dd6 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -742,7 +742,7 @@ txq_reclaim_end: return released; } -static void pxa168_eth_tx_timeout(struct net_device *dev) +static void pxa168_eth_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct pxa168_eth_private *pep = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 095f6c71b4fa..8ca15958e752 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -2884,7 +2884,7 @@ static void skge_tx_clean(struct net_device *dev) skge->tx_ring.to_clean = e; } -static void skge_tx_timeout(struct net_device *dev) +static void skge_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct skge_port *skge = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 5f56ee83e3b1..acd1cba987fb 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -2358,7 +2358,7 @@ static void sky2_qlink_intr(struct sky2_hw *hw) /* Transmit timeout is only called if we are running, carrier is up * and tx queue is full (stopped). */ -static void sky2_tx_timeout(struct net_device *dev) +static void sky2_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sky2_port *sky2 = netdev_priv(dev); struct sky2_hw *hw = sky2->hw; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 527ad2aadcca..8c6cfd15481c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2081,7 +2081,7 @@ static void mtk_dma_free(struct mtk_eth *eth) kfree(eth->scratch_head); } -static void mtk_tx_timeout(struct net_device *dev) +static void mtk_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 7af75b63245f..43dcbd8214c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1363,24 +1363,18 @@ static void mlx4_en_delete_rss_steer_rules(struct mlx4_en_priv *priv) } } -static void mlx4_en_tx_timeout(struct net_device *dev) +static void mlx4_en_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - int i; + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][txqueue]; if (netif_msg_timer(priv)) en_warn(priv, "Tx timeout called on port:%d\n", priv->port); - for (i = 0; i < priv->tx_ring_num[TX]; i++) { - struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][i]; - - if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i))) - continue; - en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n", - i, tx_ring->qpn, tx_ring->sp_cqn, - tx_ring->cons, tx_ring->prod); - } + en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n", + txqueue, tx_ring->qpn, tx_ring->sp_cqn, + tx_ring->cons, tx_ring->prod); priv->port_stats.tx_timeout++; en_dbg(DRV, priv, "Scheduling watchdog\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index a6f390fdb971..d3e06cec8317 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -42,7 +42,7 @@ mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o # Core extra # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ - ecpf.o rdma.o + ecpf.o rdma.o eswitch_offloads_chains.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 549f962cd86e..42198e64a7f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -71,8 +71,8 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, return cpu_handle; } -int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, - struct mlx5_frag_buf *buf, int node) +static int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_frag_buf *buf, int node) { dma_addr_t t; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 9c8427698238..fc80b59db9a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -135,7 +135,7 @@ struct page_pool; #define MLX5E_LOG_INDIR_RQT_SIZE 0x7 #define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) #define MLX5E_MIN_NUM_CHANNELS 0x1 -#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) +#define MLX5E_MAX_NUM_CHANNELS MLX5E_INDIR_RQT_SIZE #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_TX_XSK_POLL_BUDGET 64 @@ -1175,11 +1175,11 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); -void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, +void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, - u16 max_channels, u16 mtu); + u16 mtu); void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 475b6bd5d29b..62fc8a128a8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -35,7 +35,7 @@ int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, */ dma_info->addr = xdp_umem_get_dma(umem, handle); - xsk_umem_discard_addr_rq(umem); + xsk_umem_release_addr_rq(umem); dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_BIDIRECTIONAL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index acd946f2ddbe..3bc2ac3d53fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -58,6 +58,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, struct ethtool_rx_flow_spec *fs, int num_tuples) { + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_ethtool_table *eth_ft; struct mlx5_flow_namespace *ns; struct mlx5_flow_table *ft; @@ -102,9 +103,11 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev, flow_table_properties_nic_receive.log_max_ft_size)), MLX5E_ETHTOOL_NUM_ENTRIES); - ft = mlx5_create_auto_grouped_flow_table(ns, prio, - table_size, - MLX5E_ETHTOOL_NUM_GROUPS, 0, 0); + + ft_attr.prio = prio; + ft_attr.max_fte = table_size; + ft_attr.autogroup.max_num_groups = MLX5E_ETHTOOL_NUM_GROUPS; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); if (IS_ERR(ft)) return (void *)ft; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4997b8a51994..78737fd42616 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4325,7 +4325,7 @@ unlock: rtnl_unlock(); } -static void mlx5e_tx_timeout(struct net_device *dev) +static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -4739,17 +4739,19 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, tirc_default_config[tt].rx_hash_fields; } -void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, +void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, - u16 max_channels, u16 mtu) + u16 mtu) { + struct mlx5_core_dev *mdev = priv->mdev; u8 rx_cq_period_mode; params->sw_mtu = mtu; params->hard_mtu = MLX5E_ETH_HARD_MTU; - params->num_channels = max_channels; + params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2, + priv->max_nch); params->num_tc = 1; /* SQ */ @@ -4986,8 +4988,8 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (err) return err; - mlx5e_build_nic_params(mdev, &priv->xsk, rss, &priv->channels.params, - priv->max_nch, netdev->mtu); + mlx5e_build_nic_params(priv, &priv->xsk, rss, &priv->channels.params, + netdev->mtu); mlx5e_timestamp_init(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index f175cb24bb67..446eb4d6c983 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -41,6 +41,7 @@ #include <net/ipv6_stubs.h> #include "eswitch.h" +#include "eswitch_offloads_chains.h" #include "en.h" #include "en_rep.h" #include "en_tc.h" @@ -1247,8 +1248,7 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { - struct flow_cls_offload *f = type_data; - struct flow_cls_offload cls_flower; + struct flow_cls_offload tmp, *f = type_data; struct mlx5e_priv *priv = cb_priv; struct mlx5_eswitch *esw; unsigned long flags; @@ -1261,16 +1261,30 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSFLOWER: - if (!mlx5_eswitch_prios_supported(esw) || f->common.chain_index) + memcpy(&tmp, f, sizeof(*f)); + + if (!mlx5_esw_chains_prios_supported(esw) || + tmp.common.chain_index) return -EOPNOTSUPP; /* Re-use tc offload path by moving the ft flow to the * reserved ft chain. + * + * FT offload can use prio range [0, INT_MAX], so we normalize + * it to range [1, mlx5_esw_chains_get_prio_range(esw)] + * as with tc, where prio 0 isn't supported. + * + * We only support chain 0 of FT offload. */ - memcpy(&cls_flower, f, sizeof(*f)); - cls_flower.common.chain_index = FDB_FT_CHAIN; - err = mlx5e_rep_setup_tc_cls_flower(priv, &cls_flower, flags); - memcpy(&f->stats, &cls_flower.stats, sizeof(f->stats)); + if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw)) + return -EOPNOTSUPP; + if (tmp.common.chain_index != 0) + return -EOPNOTSUPP; + + tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw); + tmp.common.prio++; + err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags); + memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); return err; default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 9f09253f9f46..4291db78efc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -297,6 +297,9 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req; #endif s->tx_cqes += sq_stats->cqes; + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); } } } @@ -1130,6 +1133,7 @@ static void mlx5e_grp_per_port_buffer_congest_update_stats(struct mlx5e_priv *pr static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, + { "rx_prio%d_discards", PPORT_PER_PRIO_OFF(rx_discards) }, { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) }, { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) }, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 024e1cddfd0e..26f559b453dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -51,6 +51,7 @@ #include "en_rep.h" #include "en_tc.h" #include "eswitch.h" +#include "eswitch_offloads_chains.h" #include "fs_core.h" #include "en/port.h" #include "en/tc_tun.h" @@ -960,7 +961,8 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, mutex_lock(&priv->fs.tc.t_lock); if (IS_ERR_OR_NULL(priv->fs.tc.t)) { - int tc_grp_size, tc_tbl_size; + struct mlx5_flow_table_attr ft_attr = {}; + int tc_grp_size, tc_tbl_size, tc_num_grps; u32 max_flow_counter; max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | @@ -970,13 +972,15 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS, BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size))); + tc_num_grps = MLX5E_TC_TABLE_NUM_GROUPS; + ft_attr.prio = MLX5E_TC_PRIO; + ft_attr.max_fte = tc_tbl_size; + ft_attr.level = MLX5E_TC_FT_LEVEL; + ft_attr.autogroup.max_num_groups = tc_num_grps; priv->fs.tc.t = mlx5_create_auto_grouped_flow_table(priv->fs.ns, - MLX5E_TC_PRIO, - tc_tbl_size, - MLX5E_TC_TABLE_NUM_GROUPS, - MLX5E_TC_FT_LEVEL, 0); + &ft_attr); if (IS_ERR(priv->fs.tc.t)) { mutex_unlock(&priv->fs.tc.t_lock); NL_SET_ERR_MSG_MOD(extack, @@ -1080,7 +1084,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; slow_attr->split_count = 0; - slow_attr->dest_chain = FDB_TC_SLOW_PATH_CHAIN; + slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); if (!IS_ERR(rule)) @@ -1097,7 +1101,7 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; slow_attr->split_count = 0; - slow_attr->dest_chain = FDB_TC_SLOW_PATH_CHAIN; + slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); flow_flag_clear(flow, SLOW); } @@ -1157,19 +1161,18 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - u32 max_chain = mlx5_eswitch_get_chain_range(esw); struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; - u16 max_prio = mlx5_eswitch_get_prio_range(esw); struct net_device *out_dev, *encap_dev = NULL; struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; bool encap_valid = true; + u32 max_prio, max_chain; int err = 0; int out_index; - if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) { + if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) { NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW"); return -EOPNOTSUPP; } @@ -1179,11 +1182,13 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, * FDB_FT_CHAIN which is outside tc range. * See mlx5e_rep_setup_ft_cb(). */ + max_chain = mlx5_esw_chains_get_chain_range(esw); if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); return -EOPNOTSUPP; } + max_prio = mlx5_esw_chains_get_prio_range(esw); if (attr->prio > max_prio) { NL_SET_ERR_MSG(extack, "Requested priority is out of supported range"); return -EOPNOTSUPP; @@ -2842,6 +2847,10 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, flow_action_for_each(i, act, flow_action) { switch (act->id) { + case FLOW_ACTION_ACCEPT: + action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + break; case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, @@ -3462,7 +3471,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, break; case FLOW_ACTION_GOTO: { u32 dest_chain = act->chain_index; - u32 max_chain = mlx5_eswitch_get_chain_range(esw); + u32 max_chain = mlx5_esw_chains_get_chain_range(esw); if (ft_flow) { NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 580c71cb9dfa..cccea3a8eddd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -156,7 +156,8 @@ static int mlx5_eq_comp_int(struct notifier_block *nb, cq->comp(cq, eqe); mlx5_cq_put(cq); } else { - mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); + dev_dbg_ratelimited(eq->dev->device, + "Completion event for bogus CQ 0x%x\n", cqn); } ++eq->cons_index; @@ -563,6 +564,39 @@ static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) gather_user_async_events(dev, mask); } +static int +setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq, + struct mlx5_eq_param *param, const char *name) +{ + int err; + + eq->irq_nb.notifier_call = mlx5_eq_async_int; + + err = create_async_eq(dev, &eq->core, param); + if (err) { + mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err); + return err; + } + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err); + destroy_async_eq(dev, &eq->core); + } + return err; +} + +static void cleanup_async_eq(struct mlx5_core_dev *dev, + struct mlx5_eq_async *eq, const char *name) +{ + int err; + + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); + err = destroy_async_eq(dev, &eq->core); + if (err) + mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n", + name, err); +} + static int create_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -572,77 +606,45 @@ static int create_async_eqs(struct mlx5_core_dev *dev) MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); mlx5_eq_notifier_register(dev, &table->cq_err_nb); - table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .irq_index = 0, .nent = MLX5_NUM_CMD_EQE, + .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, }; - - param.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD; - err = create_async_eq(dev, &table->cmd_eq.core, ¶m); - if (err) { - mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); - goto err0; - } - err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); - if (err) { - mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err); + err = setup_async_eq(dev, &table->cmd_eq, ¶m, "cmd"); + if (err) goto err1; - } + mlx5_cmd_use_events(dev); - table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .irq_index = 0, .nent = MLX5_NUM_ASYNC_EQE, }; gather_async_events_mask(dev, param.mask); - err = create_async_eq(dev, &table->async_eq.core, ¶m); - if (err) { - mlx5_core_warn(dev, "failed to create async EQ %d\n", err); + err = setup_async_eq(dev, &table->async_eq, ¶m, "async"); + if (err) goto err2; - } - err = mlx5_eq_enable(dev, &table->async_eq.core, - &table->async_eq.irq_nb); - if (err) { - mlx5_core_warn(dev, "failed to enable async EQ %d\n", err); - goto err3; - } - table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .irq_index = 0, .nent = /* TODO: sriov max_vf + */ 1, + .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST, }; - param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST; - err = create_async_eq(dev, &table->pages_eq.core, ¶m); - if (err) { - mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); - goto err4; - } - err = mlx5_eq_enable(dev, &table->pages_eq.core, - &table->pages_eq.irq_nb); - if (err) { - mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err); - goto err5; - } + err = setup_async_eq(dev, &table->pages_eq, ¶m, "pages"); + if (err) + goto err3; - return err; + return 0; -err5: - destroy_async_eq(dev, &table->pages_eq.core); -err4: - mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); err3: - destroy_async_eq(dev, &table->async_eq.core); + cleanup_async_eq(dev, &table->async_eq, "async"); err2: mlx5_cmd_use_polling(dev); - mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + cleanup_async_eq(dev, &table->cmd_eq, "cmd"); err1: - destroy_async_eq(dev, &table->cmd_eq.core); -err0: mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; } @@ -650,28 +652,11 @@ err0: static void destroy_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - int err; - - mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb); - err = destroy_async_eq(dev, &table->pages_eq.core); - if (err) - mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", - err); - - mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); - err = destroy_async_eq(dev, &table->async_eq.core); - if (err) - mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", - err); + cleanup_async_eq(dev, &table->pages_eq, "pages"); + cleanup_async_eq(dev, &table->async_eq, "async"); mlx5_cmd_use_polling(dev); - - mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); - err = destroy_async_eq(dev, &table->cmd_eq.core); - if (err) - mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", - err); - + cleanup_async_eq(dev, &table->cmd_eq, "cmd"); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2c965ad0d744..05b13a1e829c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -277,6 +277,7 @@ enum { static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw) { + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb; @@ -289,8 +290,10 @@ static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw) } /* num FTE 2, num FG 2 */ - fdb = mlx5_create_auto_grouped_flow_table(root_ns, LEGACY_VEPA_PRIO, - 2, 2, 0, 0); + ft_attr.prio = LEGACY_VEPA_PRIO; + ft_attr.max_fte = 2; + ft_attr.autogroup.max_num_groups = 2; + fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create VEPA FDB err %d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ffcff3ba3701..4472710ccc9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -157,7 +157,7 @@ enum offloads_fdb_flags { ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0), }; -extern const unsigned int ESW_POOLS[4]; +struct mlx5_esw_chains_priv; struct mlx5_eswitch_fdb { union { @@ -182,14 +182,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_handle *miss_rule_multi; int vlan_push_pop_refcount; - struct { - struct mlx5_flow_table *fdb; - u32 num_rules; - } fdb_prio[FDB_NUM_CHAINS][FDB_TC_MAX_PRIO + 1][FDB_TC_LEVELS_PER_PRIO]; - /* Protects fdb_prio table */ - struct mutex fdb_prio_lock; - - int fdb_left[ARRAY_SIZE(ESW_POOLS)]; + struct mlx5_esw_chains_priv *esw_chains_priv; } offloads; }; u32 flags; @@ -355,15 +348,6 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, struct mlx5_esw_flow_attr *attr); -bool -mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw); - -u16 -mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw); - -u32 -mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw); - struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, struct mlx5_flow_destination *dest); @@ -388,6 +372,11 @@ enum { MLX5_ESW_DEST_ENCAP_VALID = BIT(1), }; +enum { + MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0), + MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), +}; + struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_core_dev *in_mdev; @@ -401,7 +390,6 @@ struct mlx5_esw_flow_attr { u16 vlan_vid[MLX5_FS_VLAN_DEPTH]; u8 vlan_prio[MLX5_FS_VLAN_DEPTH]; u8 total_vlan; - bool vlan_handled; struct { u32 flags; struct mlx5_eswitch_rep *rep; @@ -416,6 +404,7 @@ struct mlx5_esw_flow_attr { u32 chain; u16 prio; u32 dest_chain; + u32 flags; struct mlx5e_tc_flow_parse_attr *parse_attr; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 243a5440867e..a6d0b62ef234 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -37,6 +37,7 @@ #include <linux/mlx5/fs.h> #include "mlx5_core.h" #include "eswitch.h" +#include "eswitch_offloads_chains.h" #include "rdma.h" #include "en.h" #include "fs_core.h" @@ -47,10 +48,6 @@ * one for multicast. */ #define MLX5_ESW_MISS_FLOWS (2) - -#define fdb_prio_table(esw, chain, prio, level) \ - (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] - #define UPLINK_REP_INDEX 0 static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, @@ -62,32 +59,6 @@ static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, return &esw->offloads.vport_reps[idx]; } -static struct mlx5_flow_table * -esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); -static void -esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); - -bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw) -{ - return (!!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)); -} - -u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw) -{ - if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED) - return FDB_TC_MAX_CHAIN; - - return 0; -} - -u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw) -{ - if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED) - return FDB_TC_MAX_PRIO; - - return 1; -} - static bool esw_check_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw, const struct mlx5_vport *vport) @@ -175,10 +146,17 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - if (attr->dest_chain) { - struct mlx5_flow_table *ft; + struct mlx5_flow_table *ft; - ft = esw_get_prio_table(esw, attr->dest_chain, 1, 0); + if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = mlx5_esw_chains_get_tc_end_ft(esw); + i++; + } else if (attr->dest_chain) { + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + ft = mlx5_esw_chains_get_table(esw, attr->dest_chain, + 1, 0); if (IS_ERR(ft)) { rule = ERR_CAST(ft); goto err_create_goto_table; @@ -223,7 +201,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_hdr = attr->modify_hdr; - fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split); + fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, + !!split); if (IS_ERR(fdb)) { rule = ERR_CAST(fdb); goto err_esw_get; @@ -242,10 +221,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, return rule; err_add_rule: - esw_put_prio_table(esw, attr->chain, attr->prio, !!split); + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, !!split); err_esw_get: - if (attr->dest_chain) - esw_put_prio_table(esw, attr->dest_chain, 1, 0); + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain) + mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0); err_create_goto_table: return rule; } @@ -262,13 +241,13 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule; int i; - fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0); + fast_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 0); if (IS_ERR(fast_fdb)) { rule = ERR_CAST(fast_fdb); goto err_get_fast; } - fwd_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 1); + fwd_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 1); if (IS_ERR(fwd_fdb)) { rule = ERR_CAST(fwd_fdb); goto err_get_fwd; @@ -296,6 +275,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); if (IS_ERR(rule)) @@ -305,9 +285,9 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, return rule; add_err: - esw_put_prio_table(esw, attr->chain, attr->prio, 1); + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1); err_get_fwd: - esw_put_prio_table(esw, attr->chain, attr->prio, 0); + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); err_get_fast: return rule; } @@ -332,12 +312,13 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, atomic64_dec(&esw->offloads.num_flows); if (fwd_rule) { - esw_put_prio_table(esw, attr->chain, attr->prio, 1); - esw_put_prio_table(esw, attr->chain, attr->prio, 0); + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1); + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); } else { - esw_put_prio_table(esw, attr->chain, attr->prio, !!split); + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, + !!split); if (attr->dest_chain) - esw_put_prio_table(esw, attr->dest_chain, 1, 0); + mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0); } } @@ -451,7 +432,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, if (err) goto unlock; - attr->vlan_handled = false; + attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; vport = esw_vlan_action_get_vport(attr, push, pop); @@ -459,7 +440,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, /* tracks VF --> wire rules without vlan push action */ if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { vport->vlan_refcount++; - attr->vlan_handled = true; + attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; } goto unlock; @@ -490,7 +471,7 @@ skip_set_push: } out: if (!err) - attr->vlan_handled = true; + attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; unlock: mutex_unlock(&esw->state_lock); return err; @@ -508,7 +489,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) return 0; - if (!attr->vlan_handled) + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_VLAN_HANDLED)) return 0; push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); @@ -582,8 +563,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport, dest.vport.num = vport; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, - &flow_act, &dest, 1); + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); out: @@ -824,8 +805,8 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dest.vport.num = esw->manager_vport; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, - &flow_act, &dest, 1); + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err); @@ -839,8 +820,8 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); dmac_v[0] = 0x01; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, - &flow_act, &dest, 1); + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err); @@ -855,174 +836,6 @@ out: return err; } -#define ESW_OFFLOADS_NUM_GROUPS 4 - -/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), - * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated - * for each flow table pool. We can allocate up to 16M of each pool, - * and we keep track of how much we used via put/get_sz_to_pool. - * Firmware doesn't report any of this for now. - * ESW_POOL is expected to be sorted from large to small - */ -#define ESW_SIZE (16 * 1024 * 1024) -const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024, - 64 * 1024, 4 * 1024 }; - -static int -get_sz_from_pool(struct mlx5_eswitch *esw) -{ - int sz = 0, i; - - for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) { - if (esw->fdb_table.offloads.fdb_left[i]) { - --esw->fdb_table.offloads.fdb_left[i]; - sz = ESW_POOLS[i]; - break; - } - } - - return sz; -} - -static void -put_sz_to_pool(struct mlx5_eswitch *esw, int sz) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) { - if (sz >= ESW_POOLS[i]) { - ++esw->fdb_table.offloads.fdb_left[i]; - break; - } - } -} - -static struct mlx5_flow_table * -create_next_size_table(struct mlx5_eswitch *esw, - struct mlx5_flow_namespace *ns, - u16 table_prio, - int level, - u32 flags) -{ - struct mlx5_flow_table *fdb; - int sz; - - sz = get_sz_from_pool(esw); - if (!sz) - return ERR_PTR(-ENOSPC); - - fdb = mlx5_create_auto_grouped_flow_table(ns, - table_prio, - sz, - ESW_OFFLOADS_NUM_GROUPS, - level, - flags); - if (IS_ERR(fdb)) { - esw_warn(esw->dev, "Failed to create FDB Table err %d (table prio: %d, level: %d, size: %d)\n", - (int)PTR_ERR(fdb), table_prio, level, sz); - put_sz_to_pool(esw, sz); - } - - return fdb; -} - -static struct mlx5_flow_table * -esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) -{ - struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_table *fdb = NULL; - struct mlx5_flow_namespace *ns; - int table_prio, l = 0; - u32 flags = 0; - - if (chain == FDB_TC_SLOW_PATH_CHAIN) - return esw->fdb_table.offloads.slow_fdb; - - mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock); - - fdb = fdb_prio_table(esw, chain, prio, level).fdb; - if (fdb) { - /* take ref on earlier levels as well */ - while (level >= 0) - fdb_prio_table(esw, chain, prio, level--).num_rules++; - mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); - return fdb; - } - - ns = mlx5_get_fdb_sub_ns(dev, chain); - if (!ns) { - esw_warn(dev, "Failed to get FDB sub namespace\n"); - mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); - return ERR_PTR(-EOPNOTSUPP); - } - - if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) - flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | - MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - - table_prio = prio - 1; - - /* create earlier levels for correct fs_core lookup when - * connecting tables - */ - for (l = 0; l <= level; l++) { - if (fdb_prio_table(esw, chain, prio, l).fdb) { - fdb_prio_table(esw, chain, prio, l).num_rules++; - continue; - } - - fdb = create_next_size_table(esw, ns, table_prio, l, flags); - if (IS_ERR(fdb)) { - l--; - goto err_create_fdb; - } - - fdb_prio_table(esw, chain, prio, l).fdb = fdb; - fdb_prio_table(esw, chain, prio, l).num_rules = 1; - } - - mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); - return fdb; - -err_create_fdb: - mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); - if (l >= 0) - esw_put_prio_table(esw, chain, prio, l); - - return fdb; -} - -static void -esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) -{ - int l; - - if (chain == FDB_TC_SLOW_PATH_CHAIN) - return; - - mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock); - - for (l = level; l >= 0; l--) { - if (--(fdb_prio_table(esw, chain, prio, l).num_rules) > 0) - continue; - - put_sz_to_pool(esw, fdb_prio_table(esw, chain, prio, l).fdb->max_fte); - mlx5_destroy_flow_table(fdb_prio_table(esw, chain, prio, l).fdb); - fdb_prio_table(esw, chain, prio, l).fdb = NULL; - } - - mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); -} - -static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw) -{ - /* If lazy creation isn't supported, deref the fast path tables */ - if (!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)) { - esw_put_prio_table(esw, 0, 1, 1); - esw_put_prio_table(esw, 0, 1, 0); - } -} - #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 @@ -1055,16 +868,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; - u32 *flow_group_in, max_flow_counter; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; - int table_size, ix, err = 0, i; + u32 flags = 0, *flow_group_in; + int table_size, ix, err = 0; struct mlx5_flow_group *g; - u32 flags = 0, fdb_max; void *match_criteria; u8 *dmac; esw_debug(esw->dev, "Create offloads FDB Tables\n"); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -1083,19 +896,6 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) goto ns_err; } - max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | - MLX5_CAP_GEN(dev, max_flow_counter_15_0); - fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); - - esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(%d))\n", - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), - max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, - fdb_max); - - for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) - esw->fdb_table.offloads.fdb_left[i] = - ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; - table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + MLX5_ESW_MISS_FLOWS + esw->total_vports; @@ -1118,16 +918,10 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) } esw->fdb_table.offloads.slow_fdb = fdb; - /* If lazy creation isn't supported, open the fast path tables now */ - if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) && - esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { - esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; - esw_warn(dev, "Lazy creation of flow tables isn't supported, ignoring priorities\n"); - esw_get_prio_table(esw, 0, 1, 0); - esw_get_prio_table(esw, 0, 1, 1); - } else { - esw_debug(dev, "Lazy creation of flow tables supported, deferring table opening\n"); - esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + err = mlx5_esw_chains_create(esw); + if (err) { + esw_warn(dev, "Failed to create fdb chains err(%d)\n", err); + goto fdb_chains_err; } /* create send-to-vport group */ @@ -1218,7 +1012,8 @@ miss_err: peer_miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: - esw_destroy_offloads_fast_fdb_tables(esw); + mlx5_esw_chains_destroy(esw); +fdb_chains_err: mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); slow_fdb_err: /* Holds true only as long as DMFS is the default */ @@ -1240,8 +1035,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); + mlx5_esw_chains_destroy(esw); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); - esw_destroy_offloads_fast_fdb_tables(esw); /* Holds true only as long as DMFS is the default */ mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, MLX5_FLOW_STEERING_MODE_DMFS); @@ -2111,7 +1906,6 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev); memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); - mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); err = esw_create_uplink_offloads_acl_tables(esw); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c new file mode 100644 index 000000000000..3a60eb5360bd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c @@ -0,0 +1,758 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2020 Mellanox Technologies. + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/fs.h> + +#include "eswitch_offloads_chains.h" +#include "mlx5_core.h" +#include "fs_core.h" +#include "eswitch.h" +#include "en.h" + +#define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv) +#define esw_chains_lock(esw) (esw_chains_priv(esw)->lock) +#define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht) +#define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht) +#define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left) +#define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb) +#define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb) +#define fdb_ignore_flow_level_supported(esw) \ + (MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level)) + +#define ESW_OFFLOADS_NUM_GROUPS 4 + +/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), + * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated + * for each flow table pool. We can allocate up to 16M of each pool, + * and we keep track of how much we used via get_next_avail_sz_from_pool. + * Firmware doesn't report any of this for now. + * ESW_POOL is expected to be sorted from large to small and match firmware + * pools. + */ +#define ESW_SIZE (16 * 1024 * 1024) +const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024, + 1 * 1024 * 1024, + 64 * 1024, + 4 * 1024, }; + +struct mlx5_esw_chains_priv { + struct rhashtable chains_ht; + struct rhashtable prios_ht; + /* Protects above chains_ht and prios_ht */ + struct mutex lock; + + struct mlx5_flow_table *tc_end_fdb; + + int fdb_left[ARRAY_SIZE(ESW_POOLS)]; +}; + +struct fdb_chain { + struct rhash_head node; + + u32 chain; + + int ref; + + struct mlx5_eswitch *esw; + struct list_head prios_list; +}; + +struct fdb_prio_key { + u32 chain; + u32 prio; + u32 level; +}; + +struct fdb_prio { + struct rhash_head node; + struct list_head list; + + struct fdb_prio_key key; + + int ref; + + struct fdb_chain *fdb_chain; + struct mlx5_flow_table *fdb; + struct mlx5_flow_table *next_fdb; + struct mlx5_flow_group *miss_group; + struct mlx5_flow_handle *miss_rule; +}; + +static const struct rhashtable_params chain_params = { + .head_offset = offsetof(struct fdb_chain, node), + .key_offset = offsetof(struct fdb_chain, chain), + .key_len = sizeof_field(struct fdb_chain, chain), + .automatic_shrinking = true, +}; + +static const struct rhashtable_params prio_params = { + .head_offset = offsetof(struct fdb_prio, node), + .key_offset = offsetof(struct fdb_prio, key), + .key_len = sizeof_field(struct fdb_prio, key), + .automatic_shrinking = true, +}; + +bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw) +{ + return esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; +} + +u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw) +{ + if (!mlx5_esw_chains_prios_supported(esw)) + return 1; + + if (fdb_ignore_flow_level_supported(esw)) + return UINT_MAX - 1; + + return FDB_TC_MAX_CHAIN; +} + +u32 mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw) +{ + return mlx5_esw_chains_get_chain_range(esw) + 1; +} + +u32 mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw) +{ + if (!mlx5_esw_chains_prios_supported(esw)) + return 1; + + if (fdb_ignore_flow_level_supported(esw)) + return UINT_MAX; + + return FDB_TC_MAX_PRIO; +} + +static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw) +{ + if (fdb_ignore_flow_level_supported(esw)) + return UINT_MAX; + + return FDB_TC_LEVELS_PER_PRIO; +} + +#define POOL_NEXT_SIZE 0 +static int +mlx5_esw_chains_get_avail_sz_from_pool(struct mlx5_eswitch *esw, + int desired_size) +{ + int i, found_i = -1; + + for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) { + if (fdb_pool_left(esw)[i] && ESW_POOLS[i] > desired_size) { + found_i = i; + if (desired_size != POOL_NEXT_SIZE) + break; + } + } + + if (found_i != -1) { + --fdb_pool_left(esw)[found_i]; + return ESW_POOLS[found_i]; + } + + return 0; +} + +static void +mlx5_esw_chains_put_sz_to_pool(struct mlx5_eswitch *esw, int sz) +{ + int i; + + for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) { + if (sz == ESW_POOLS[i]) { + ++fdb_pool_left(esw)[i]; + return; + } + } + + WARN_ONCE(1, "Couldn't find size %d in fdb size pool", sz); +} + +static void +mlx5_esw_chains_init_sz_pool(struct mlx5_eswitch *esw) +{ + u32 fdb_max; + int i; + + fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, log_max_ft_size); + + for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) + fdb_pool_left(esw)[i] = + ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; +} + +static struct mlx5_flow_table * +mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw, + u32 chain, u32 prio, u32 level) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *fdb; + int sz; + + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + + sz = mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE); + if (!sz) + return ERR_PTR(-ENOSPC); + ft_attr.max_fte = sz; + + /* We use tc_slow_fdb(esw) as the table's next_ft till + * ignore_flow_level is allowed on FT creation and not just for FTEs. + * Instead caller should add an explicit miss rule if needed. + */ + ft_attr.next_ft = tc_slow_fdb(esw); + + /* The root table(chain 0, prio 1, level 0) is required to be + * connected to the previous prio (FDB_BYPASS_PATH if exists). + * We always create it, as a managed table, in order to align with + * fs_core logic. + */ + if (!fdb_ignore_flow_level_supported(esw) || + (chain == 0 && prio == 1 && level == 0)) { + ft_attr.level = level; + ft_attr.prio = prio - 1; + ns = mlx5_get_fdb_sub_ns(esw->dev, chain); + } else { + ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = FDB_TC_OFFLOAD; + /* Firmware doesn't allow us to create another level 0 table, + * so we create all unmanaged tables as level 1. + * + * To connect them, we use explicit miss rules with + * ignore_flow_level. Caller is responsible to create + * these rules (if needed). + */ + ft_attr.level = 1; + ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB); + } + + ft_attr.autogroup.num_reserved_entries = 2; + ft_attr.autogroup.max_num_groups = ESW_OFFLOADS_NUM_GROUPS; + fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(fdb)) { + esw_warn(esw->dev, + "Failed to create FDB table err %d (chain: %d, prio: %d, level: %d, size: %d)\n", + (int)PTR_ERR(fdb), chain, prio, level, sz); + mlx5_esw_chains_put_sz_to_pool(esw, sz); + return fdb; + } + + return fdb; +} + +static void +mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb) +{ + mlx5_esw_chains_put_sz_to_pool(esw, fdb->max_fte); + mlx5_destroy_flow_table(fdb); +} + +static struct fdb_chain * +mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) +{ + struct fdb_chain *fdb_chain = NULL; + int err; + + fdb_chain = kvzalloc(sizeof(*fdb_chain), GFP_KERNEL); + if (!fdb_chain) + return ERR_PTR(-ENOMEM); + + fdb_chain->esw = esw; + fdb_chain->chain = chain; + INIT_LIST_HEAD(&fdb_chain->prios_list); + + err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node, + chain_params); + if (err) + goto err_insert; + + return fdb_chain; + +err_insert: + kvfree(fdb_chain); + return ERR_PTR(err); +} + +static void +mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain) +{ + struct mlx5_eswitch *esw = fdb_chain->esw; + + rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node, + chain_params); + kvfree(fdb_chain); +} + +static struct fdb_chain * +mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain) +{ + struct fdb_chain *fdb_chain; + + fdb_chain = rhashtable_lookup_fast(&esw_chains_ht(esw), &chain, + chain_params); + if (!fdb_chain) { + fdb_chain = mlx5_esw_chains_create_fdb_chain(esw, chain); + if (IS_ERR(fdb_chain)) + return fdb_chain; + } + + fdb_chain->ref++; + + return fdb_chain; +} + +static struct mlx5_flow_handle * +mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb, + struct mlx5_flow_table *next_fdb) +{ + static const struct mlx5_flow_spec spec = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act act = {}; + + act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; + act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = next_fdb; + + return mlx5_add_flow_rules(fdb, &spec, &act, &dest, 1); +} + +static int +mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio, + struct mlx5_flow_table *next_fdb) +{ + struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {}; + struct fdb_chain *fdb_chain = fdb_prio->fdb_chain; + struct fdb_prio *pos; + int n = 0, err; + + if (fdb_prio->key.level) + return 0; + + /* Iterate in reverse order until reaching the level 0 rule of + * the previous priority, adding all the miss rules first, so we can + * revert them if any of them fails. + */ + pos = fdb_prio; + list_for_each_entry_continue_reverse(pos, + &fdb_chain->prios_list, + list) { + miss_rules[n] = mlx5_esw_chains_add_miss_rule(pos->fdb, + next_fdb); + if (IS_ERR(miss_rules[n])) { + err = PTR_ERR(miss_rules[n]); + goto err_prev_rule; + } + + n++; + if (!pos->key.level) + break; + } + + /* Success, delete old miss rules, and update the pointers. */ + n = 0; + pos = fdb_prio; + list_for_each_entry_continue_reverse(pos, + &fdb_chain->prios_list, + list) { + mlx5_del_flow_rules(pos->miss_rule); + + pos->miss_rule = miss_rules[n]; + pos->next_fdb = next_fdb; + + n++; + if (!pos->key.level) + break; + } + + return 0; + +err_prev_rule: + while (--n >= 0) + mlx5_del_flow_rules(miss_rules[n]); + + return err; +} + +static void +mlx5_esw_chains_put_fdb_chain(struct fdb_chain *fdb_chain) +{ + if (--fdb_chain->ref == 0) + mlx5_esw_chains_destroy_fdb_chain(fdb_chain); +} + +static struct fdb_prio * +mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw, + u32 chain, u32 prio, u32 level) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_handle *miss_rule = NULL; + struct mlx5_flow_group *miss_group; + struct fdb_prio *fdb_prio = NULL; + struct mlx5_flow_table *next_fdb; + struct fdb_chain *fdb_chain; + struct mlx5_flow_table *fdb; + struct list_head *pos; + u32 *flow_group_in; + int err; + + fdb_chain = mlx5_esw_chains_get_fdb_chain(esw, chain); + if (IS_ERR(fdb_chain)) + return ERR_CAST(fdb_chain); + + fdb_prio = kvzalloc(sizeof(*fdb_prio), GFP_KERNEL); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!fdb_prio || !flow_group_in) { + err = -ENOMEM; + goto err_alloc; + } + + /* Chain's prio list is sorted by prio and level. + * And all levels of some prio point to the next prio's level 0. + * Example list (prio, level): + * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0) + * In hardware, we will we have the following pointers: + * (3,0) -> (5,0) -> (7,0) -> Slow path + * (3,1) -> (5,0) + * (5,1) -> (7,0) + * (6,1) -> (7,0) + */ + + /* Default miss for each chain: */ + next_fdb = (chain == mlx5_esw_chains_get_ft_chain(esw)) ? + tc_slow_fdb(esw) : + tc_end_fdb(esw); + list_for_each(pos, &fdb_chain->prios_list) { + struct fdb_prio *p = list_entry(pos, struct fdb_prio, list); + + /* exit on first pos that is larger */ + if (prio < p->key.prio || (prio == p->key.prio && + level < p->key.level)) { + /* Get next level 0 table */ + next_fdb = p->key.level == 0 ? p->fdb : p->next_fdb; + break; + } + } + + fdb = mlx5_esw_chains_create_fdb_table(esw, chain, prio, level); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + goto err_create; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, + fdb->max_fte - 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + fdb->max_fte - 1); + miss_group = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(miss_group)) { + err = PTR_ERR(miss_group); + goto err_group; + } + + /* Add miss rule to next_fdb */ + miss_rule = mlx5_esw_chains_add_miss_rule(fdb, next_fdb); + if (IS_ERR(miss_rule)) { + err = PTR_ERR(miss_rule); + goto err_miss_rule; + } + + fdb_prio->miss_group = miss_group; + fdb_prio->miss_rule = miss_rule; + fdb_prio->next_fdb = next_fdb; + fdb_prio->fdb_chain = fdb_chain; + fdb_prio->key.chain = chain; + fdb_prio->key.prio = prio; + fdb_prio->key.level = level; + fdb_prio->fdb = fdb; + + err = rhashtable_insert_fast(&esw_prios_ht(esw), &fdb_prio->node, + prio_params); + if (err) + goto err_insert; + + list_add(&fdb_prio->list, pos->prev); + + /* Table is ready, connect it */ + err = mlx5_esw_chains_update_prio_prevs(fdb_prio, fdb); + if (err) + goto err_update; + + kvfree(flow_group_in); + return fdb_prio; + +err_update: + list_del(&fdb_prio->list); + rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node, + prio_params); +err_insert: + mlx5_del_flow_rules(miss_rule); +err_miss_rule: + mlx5_destroy_flow_group(miss_group); +err_group: + mlx5_esw_chains_destroy_fdb_table(esw, fdb); +err_create: +err_alloc: + kvfree(fdb_prio); + kvfree(flow_group_in); + mlx5_esw_chains_put_fdb_chain(fdb_chain); + return ERR_PTR(err); +} + +static void +mlx5_esw_chains_destroy_fdb_prio(struct mlx5_eswitch *esw, + struct fdb_prio *fdb_prio) +{ + struct fdb_chain *fdb_chain = fdb_prio->fdb_chain; + + WARN_ON(mlx5_esw_chains_update_prio_prevs(fdb_prio, + fdb_prio->next_fdb)); + + list_del(&fdb_prio->list); + rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node, + prio_params); + mlx5_del_flow_rules(fdb_prio->miss_rule); + mlx5_destroy_flow_group(fdb_prio->miss_group); + mlx5_esw_chains_destroy_fdb_table(esw, fdb_prio->fdb); + mlx5_esw_chains_put_fdb_chain(fdb_chain); + kvfree(fdb_prio); +} + +struct mlx5_flow_table * +mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level) +{ + struct mlx5_flow_table *prev_fts; + struct fdb_prio *fdb_prio; + struct fdb_prio_key key; + int l = 0; + + if ((chain > mlx5_esw_chains_get_chain_range(esw) && + chain != mlx5_esw_chains_get_ft_chain(esw)) || + prio > mlx5_esw_chains_get_prio_range(esw) || + level > mlx5_esw_chains_get_level_range(esw)) + return ERR_PTR(-EOPNOTSUPP); + + /* create earlier levels for correct fs_core lookup when + * connecting tables. + */ + for (l = 0; l < level; l++) { + prev_fts = mlx5_esw_chains_get_table(esw, chain, prio, l); + if (IS_ERR(prev_fts)) { + fdb_prio = ERR_CAST(prev_fts); + goto err_get_prevs; + } + } + + key.chain = chain; + key.prio = prio; + key.level = level; + + mutex_lock(&esw_chains_lock(esw)); + fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key, + prio_params); + if (!fdb_prio) { + fdb_prio = mlx5_esw_chains_create_fdb_prio(esw, chain, + prio, level); + if (IS_ERR(fdb_prio)) + goto err_create_prio; + } + + ++fdb_prio->ref; + mutex_unlock(&esw_chains_lock(esw)); + + return fdb_prio->fdb; + +err_create_prio: + mutex_unlock(&esw_chains_lock(esw)); +err_get_prevs: + while (--l >= 0) + mlx5_esw_chains_put_table(esw, chain, prio, l); + return ERR_CAST(fdb_prio); +} + +void +mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level) +{ + struct fdb_prio *fdb_prio; + struct fdb_prio_key key; + + key.chain = chain; + key.prio = prio; + key.level = level; + + mutex_lock(&esw_chains_lock(esw)); + fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key, + prio_params); + if (!fdb_prio) + goto err_get_prio; + + if (--fdb_prio->ref == 0) + mlx5_esw_chains_destroy_fdb_prio(esw, fdb_prio); + mutex_unlock(&esw_chains_lock(esw)); + + while (level-- > 0) + mlx5_esw_chains_put_table(esw, chain, prio, level); + + return; + +err_get_prio: + mutex_unlock(&esw_chains_lock(esw)); + WARN_ONCE(1, + "Couldn't find table: (chain: %d prio: %d level: %d)", + chain, prio, level); +} + +struct mlx5_flow_table * +mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) +{ + return tc_end_fdb(esw); +} + +static int +mlx5_esw_chains_init(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_chains_priv *chains_priv; + struct mlx5_core_dev *dev = esw->dev; + u32 max_flow_counter, fdb_max; + int err; + + chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL); + if (!chains_priv) + return -ENOMEM; + esw_chains_priv(esw) = chains_priv; + + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); + + esw_debug(dev, + "Init esw offloads chains, max counters(%d), groups(%d), max flow table size(%d)\n", + max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, fdb_max); + + mlx5_esw_chains_init_sz_pool(esw); + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) && + esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n"); + } else { + esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n", + mlx5_esw_chains_get_chain_range(esw), + mlx5_esw_chains_get_prio_range(esw)); + } + + err = rhashtable_init(&esw_chains_ht(esw), &chain_params); + if (err) + goto init_chains_ht_err; + + err = rhashtable_init(&esw_prios_ht(esw), &prio_params); + if (err) + goto init_prios_ht_err; + + mutex_init(&esw_chains_lock(esw)); + + return 0; + +init_prios_ht_err: + rhashtable_destroy(&esw_chains_ht(esw)); +init_chains_ht_err: + kfree(chains_priv); + return err; +} + +static void +mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw) +{ + mutex_destroy(&esw_chains_lock(esw)); + rhashtable_destroy(&esw_prios_ht(esw)); + rhashtable_destroy(&esw_chains_ht(esw)); + + kfree(esw_chains_priv(esw)); +} + +static int +mlx5_esw_chains_open(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_table *ft; + int err; + + /* Create tc_end_fdb(esw) which is the always created ft chain */ + ft = mlx5_esw_chains_get_table(esw, mlx5_esw_chains_get_ft_chain(esw), + 1, 0); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + tc_end_fdb(esw) = ft; + + /* Always open the root for fast path */ + ft = mlx5_esw_chains_get_table(esw, 0, 1, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto level_0_err; + } + + /* Open level 1 for split rules now if prios isn't supported */ + if (!mlx5_esw_chains_prios_supported(esw)) { + ft = mlx5_esw_chains_get_table(esw, 0, 1, 1); + + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto level_1_err; + } + } + + return 0; + +level_1_err: + mlx5_esw_chains_put_table(esw, 0, 1, 0); +level_0_err: + mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0); + return err; +} + +static void +mlx5_esw_chains_close(struct mlx5_eswitch *esw) +{ + if (!mlx5_esw_chains_prios_supported(esw)) + mlx5_esw_chains_put_table(esw, 0, 1, 1); + mlx5_esw_chains_put_table(esw, 0, 1, 0); + mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0); +} + +int +mlx5_esw_chains_create(struct mlx5_eswitch *esw) +{ + int err; + + err = mlx5_esw_chains_init(esw); + if (err) + return err; + + err = mlx5_esw_chains_open(esw); + if (err) + goto err_open; + + return 0; + +err_open: + mlx5_esw_chains_cleanup(esw); + return err; +} + +void +mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) +{ + mlx5_esw_chains_close(esw); + mlx5_esw_chains_cleanup(esw); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h new file mode 100644 index 000000000000..2e13097fe348 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __ML5_ESW_CHAINS_H__ +#define __ML5_ESW_CHAINS_H__ + +bool +mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw); +u32 +mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw); +u32 +mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw); +u32 +mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw); + +struct mlx5_flow_table * +mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level); +void +mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level); + +struct mlx5_flow_table * +mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw); + +int mlx5_esw_chains_create(struct mlx5_eswitch *esw); +void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw); + +#endif /* __ML5_ESW_CHAINS_H__ */ + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 366bda1bb1c3..dc08ed9339ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -50,8 +50,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, struct mlx5_flow_act *flow_act) { static const struct mlx5_flow_spec spec = {}; + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_namespace *root_ns; - int prio, flags; int err; root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); @@ -63,10 +63,11 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, /* As this is the terminating action then the termination table is the * same prio as the slow path */ - prio = FDB_SLOW_PATH; - flags = MLX5_FLOW_TABLE_TERMINATION; - tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, prio, 1, 1, - 0, flags); + ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION; + ft_attr.prio = FDB_SLOW_PATH; + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); if (IS_ERR(tt->termtbl)) { esw_warn(dev, "Failed to create termination table\n"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 3c816e81f8d9..b25465d9e030 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -432,6 +432,9 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(set_fte_in, in, table_type, ft->type); MLX5_SET(set_fte_in, in, table_id, ft->id); MLX5_SET(set_fte_in, in, flow_index, fte->index); + MLX5_SET(set_fte_in, in, ignore_flow_level, + !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL)); + if (ft->vport) { MLX5_SET(set_fte_in, in, vport_number, ft->vport); MLX5_SET(set_fte_in, in, other_vport, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 8c5df6c7d7b6..c7a16ae05fa8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -579,7 +579,9 @@ static void del_sw_flow_group(struct fs_node *node) rhashtable_destroy(&fg->ftes_hash); ida_destroy(&fg->fte_allocator); - if (ft->autogroup.active && fg->max_ftes == ft->autogroup.group_size) + if (ft->autogroup.active && + fg->max_ftes == ft->autogroup.group_size && + fg->start_index < ft->autogroup.max_fte) ft->autogroup.num_groups--; err = rhltable_remove(&ft->fgs_hash, &fg->hash, @@ -1006,7 +1008,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa u16 vport) { struct mlx5_flow_root_namespace *root = find_root(&ns->node); - struct mlx5_flow_table *next_ft = NULL; + bool unmanaged = ft_attr->flags & MLX5_FLOW_TABLE_UNMANAGED; + struct mlx5_flow_table *next_ft; struct fs_prio *fs_prio = NULL; struct mlx5_flow_table *ft; int log_table_sz; @@ -1023,14 +1026,21 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa err = -EINVAL; goto unlock_root; } - if (ft_attr->level >= fs_prio->num_levels) { - err = -ENOSPC; - goto unlock_root; + if (!unmanaged) { + /* The level is related to the + * priority level range. + */ + if (ft_attr->level >= fs_prio->num_levels) { + err = -ENOSPC; + goto unlock_root; + } + + ft_attr->level += fs_prio->start_level; } + /* The level is related to the * priority level range. */ - ft_attr->level += fs_prio->start_level; ft = alloc_flow_table(ft_attr->level, vport, ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0, @@ -1043,19 +1053,27 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; - next_ft = find_next_chained_ft(fs_prio); + next_ft = unmanaged ? ft_attr->next_ft : + find_next_chained_ft(fs_prio); ft->def_miss_action = ns->def_miss_action; err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft); if (err) goto free_ft; - err = connect_flow_table(root->dev, ft, fs_prio); - if (err) - goto destroy_ft; + if (!unmanaged) { + err = connect_flow_table(root->dev, ft, fs_prio); + if (err) + goto destroy_ft; + } + ft->node.active = true; down_write_ref_node(&fs_prio->node, false); - tree_add_node(&ft->node, &fs_prio->node); - list_add_flow_table(ft, fs_prio); + if (!unmanaged) { + tree_add_node(&ft->node, &fs_prio->node); + list_add_flow_table(ft, fs_prio); + } else { + ft->node.root = fs_prio->node.root; + } fs_prio->num_ft++; up_write_ref_node(&fs_prio->node, false); mutex_unlock(&root->chain_lock); @@ -1103,31 +1121,27 @@ EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); struct mlx5_flow_table* mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int num_flow_table_entries, - int max_num_groups, - u32 level, - u32 flags) + struct mlx5_flow_table_attr *ft_attr) { - struct mlx5_flow_table_attr ft_attr = {}; + int num_reserved_entries = ft_attr->autogroup.num_reserved_entries; + int autogroups_max_fte = ft_attr->max_fte - num_reserved_entries; + int max_num_groups = ft_attr->autogroup.max_num_groups; struct mlx5_flow_table *ft; - if (max_num_groups > num_flow_table_entries) + if (max_num_groups > autogroups_max_fte) + return ERR_PTR(-EINVAL); + if (num_reserved_entries > ft_attr->max_fte) return ERR_PTR(-EINVAL); - ft_attr.max_fte = num_flow_table_entries; - ft_attr.prio = prio; - ft_attr.level = level; - ft_attr.flags = flags; - - ft = mlx5_create_flow_table(ns, &ft_attr); + ft = mlx5_create_flow_table(ns, ft_attr); if (IS_ERR(ft)) return ft; ft->autogroup.active = true; ft->autogroup.required_groups = max_num_groups; + ft->autogroup.max_fte = autogroups_max_fte; /* We save place for flow groups in addition to max types */ - ft->autogroup.group_size = ft->max_fte / (max_num_groups + 1); + ft->autogroup.group_size = autogroups_max_fte / (max_num_groups + 1); return ft; } @@ -1149,7 +1163,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, struct mlx5_flow_group *fg; int err; - if (ft->autogroup.active) + if (ft->autogroup.active && start_index < ft->autogroup.max_fte) return ERR_PTR(-EPERM); down_write_ref_node(&ft->node, false); @@ -1322,9 +1336,10 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft const struct mlx5_flow_spec *spec) { struct list_head *prev = &ft->node.children; - struct mlx5_flow_group *fg; + u32 max_fte = ft->autogroup.max_fte; unsigned int candidate_index = 0; unsigned int group_size = 0; + struct mlx5_flow_group *fg; if (!ft->autogroup.active) return ERR_PTR(-ENOENT); @@ -1332,7 +1347,7 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft if (ft->autogroup.num_groups < ft->autogroup.required_groups) group_size = ft->autogroup.group_size; - /* ft->max_fte == ft->autogroup.max_types */ + /* max_fte == ft->autogroup.max_types */ if (group_size == 0) group_size = 1; @@ -1345,7 +1360,7 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft prev = &fg->node.list; } - if (candidate_index + group_size > ft->max_fte) + if (candidate_index + group_size > max_fte) return ERR_PTR(-ENOSPC); fg = alloc_insert_flow_group(ft, @@ -1529,18 +1544,30 @@ static bool counter_is_valid(u32 action) } static bool dest_is_valid(struct mlx5_flow_destination *dest, - u32 action, + struct mlx5_flow_act *flow_act, struct mlx5_flow_table *ft) { + bool ignore_level = flow_act->flags & FLOW_ACT_IGNORE_FLOW_LEVEL; + u32 action = flow_act->action; + if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)) return counter_is_valid(action); if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return true; + if (ignore_level) { + if (ft->type != FS_FT_FDB) + return false; + + if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + dest->ft->type != FS_FT_FDB) + return false; + } + if (!dest || ((dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) && - (dest->ft->level <= ft->level))) + (dest->ft->level <= ft->level && !ignore_level))) return false; return true; } @@ -1770,7 +1797,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, return ERR_PTR(-EINVAL); for (i = 0; i < dest_num; i++) { - if (!dest_is_valid(&dest[i], flow_act->action, ft)) + if (!dest_is_valid(&dest[i], flow_act, ft)) return ERR_PTR(-EINVAL); } nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT); @@ -2033,7 +2060,8 @@ int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) int err = 0; mutex_lock(&root->chain_lock); - err = disconnect_flow_table(ft); + if (!(ft->flags & MLX5_FLOW_TABLE_UNMANAGED)) + err = disconnect_flow_table(ft); if (err) { mutex_unlock(&root->chain_lock); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index c2621b911563..be5f5e32c1e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -164,6 +164,7 @@ struct mlx5_flow_table { unsigned int required_groups; unsigned int group_size; unsigned int num_groups; + unsigned int max_fte; } autogroup; /* Protect fwd_rules */ struct mutex lock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index a19790dee7b2..d89ff1d09119 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -131,11 +131,11 @@ static int mlx5_get_pcam_reg(struct mlx5_core_dev *dev) MLX5_PCAM_REGS_5000_TO_507F); } -static int mlx5_get_mcam_reg(struct mlx5_core_dev *dev) +static int mlx5_get_mcam_access_reg_group(struct mlx5_core_dev *dev, + enum mlx5_mcam_reg_groups group) { - return mlx5_query_mcam_reg(dev, dev->caps.mcam, - MLX5_MCAM_FEATURE_ENHANCED_FEATURES, - MLX5_MCAM_REGS_FIRST_128); + return mlx5_query_mcam_reg(dev, dev->caps.mcam[group], + MLX5_MCAM_FEATURE_ENHANCED_FEATURES, group); } static int mlx5_get_qcam_reg(struct mlx5_core_dev *dev) @@ -221,8 +221,11 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, pcam_reg)) mlx5_get_pcam_reg(dev); - if (MLX5_CAP_GEN(dev, mcam_reg)) - mlx5_get_mcam_reg(dev); + if (MLX5_CAP_GEN(dev, mcam_reg)) { + mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_FIRST_128); + mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9080_0x90FF); + mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9100_0x917F); + } if (MLX5_CAP_GEN(dev, qcam_reg)) mlx5_get_qcam_reg(dev); @@ -245,6 +248,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { + err = mlx5_core_get_caps(dev, MLX5_CAP_VDPA_EMULATION); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 3ed8ab2d703d..7c87f523e370 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -87,8 +87,8 @@ int mlx5i_init(struct mlx5_core_dev *mdev, mlx5e_set_netdev_mtu_boundaries(priv); netdev->mtu = netdev->max_mtu; - mlx5e_build_nic_params(mdev, NULL, &priv->rss_params, &priv->channels.params, - priv->max_nch, netdev->mtu); + mlx5e_build_nic_params(priv, NULL, &priv->rss_params, &priv->channels.params, + netdev->mtu); mlx5i_build_nic_params(mdev, &priv->channels.params); mlx5e_timestamp_init(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index b70afa310ad2..416676c35b1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -200,8 +200,6 @@ static void mlx5_lag_fib_update(struct work_struct *work) rtnl_lock(); switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: mlx5_lag_fib_route_event(ldev, fib_work->event, fib_work->fen_info.fi); @@ -259,8 +257,6 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, switch (event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: fen_info = container_of(info, struct fib_entry_notifier_info, info); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 004c56c2fc0c..9359eed10889 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -690,9 +690,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, /* get the relevant addresses */ if (!action->dest_tbl.fw_tbl.rx_icm_addr) { - ret = mlx5dr_cmd_query_flow_table(action->dest_tbl.fw_tbl.mdev, - action->dest_tbl.fw_tbl.ft->type, - action->dest_tbl.fw_tbl.ft->id, + ret = mlx5dr_cmd_query_flow_table(dmn->mdev, + action->dest_tbl.fw_tbl.type, + action->dest_tbl.fw_tbl.id, &output); if (!ret) { action->dest_tbl.fw_tbl.tx_icm_addr = @@ -982,8 +982,106 @@ dec_ref: } struct mlx5dr_action * -mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft, - struct mlx5_core_dev *mdev) +mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_action_dest *dests, + u32 num_of_dests) +{ + struct mlx5dr_cmd_flow_destination_hw_info *hw_dests; + struct mlx5dr_action **ref_actions; + struct mlx5dr_action *action; + bool reformat_req = false; + u32 num_of_ref = 0; + int ret; + int i; + + if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) { + mlx5dr_err(dmn, "Multiple destination support is for FDB only\n"); + return NULL; + } + + hw_dests = kzalloc(sizeof(*hw_dests) * num_of_dests, GFP_KERNEL); + if (!hw_dests) + return NULL; + + ref_actions = kzalloc(sizeof(*ref_actions) * num_of_dests * 2, GFP_KERNEL); + if (!ref_actions) + goto free_hw_dests; + + for (i = 0; i < num_of_dests; i++) { + struct mlx5dr_action *reformat_action = dests[i].reformat; + struct mlx5dr_action *dest_action = dests[i].dest; + + ref_actions[num_of_ref++] = dest_action; + + switch (dest_action->action_type) { + case DR_ACTION_TYP_VPORT: + hw_dests[i].vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + hw_dests[i].vport.num = dest_action->vport.caps->num; + hw_dests[i].vport.vhca_id = dest_action->vport.caps->vhca_gvmi; + if (reformat_action) { + reformat_req = true; + hw_dests[i].vport.reformat_id = + reformat_action->reformat.reformat_id; + ref_actions[num_of_ref++] = reformat_action; + hw_dests[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + } + break; + + case DR_ACTION_TYP_FT: + hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + if (dest_action->dest_tbl.is_fw_tbl) + hw_dests[i].ft_id = dest_action->dest_tbl.fw_tbl.id; + else + hw_dests[i].ft_id = dest_action->dest_tbl.tbl->table_id; + break; + + default: + mlx5dr_dbg(dmn, "Invalid multiple destinations action\n"); + goto free_ref_actions; + } + } + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + goto free_ref_actions; + + ret = mlx5dr_fw_create_md_tbl(dmn, + hw_dests, + num_of_dests, + reformat_req, + &action->dest_tbl.fw_tbl.id, + &action->dest_tbl.fw_tbl.group_id); + if (ret) + goto free_action; + + refcount_inc(&dmn->refcount); + + for (i = 0; i < num_of_ref; i++) + refcount_inc(&ref_actions[i]->refcount); + + action->dest_tbl.is_fw_tbl = true; + action->dest_tbl.fw_tbl.dmn = dmn; + action->dest_tbl.fw_tbl.type = FS_FT_FDB; + action->dest_tbl.fw_tbl.ref_actions = ref_actions; + action->dest_tbl.fw_tbl.num_of_ref_actions = num_of_ref; + + kfree(hw_dests); + + return action; + +free_action: + kfree(action); +free_ref_actions: + kfree(ref_actions); +free_hw_dests: + kfree(hw_dests); + return NULL; +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *dmn, + struct mlx5_flow_table *ft) { struct mlx5dr_action *action; @@ -992,8 +1090,11 @@ mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft, return NULL; action->dest_tbl.is_fw_tbl = 1; - action->dest_tbl.fw_tbl.ft = ft; - action->dest_tbl.fw_tbl.mdev = mdev; + action->dest_tbl.fw_tbl.type = ft->type; + action->dest_tbl.fw_tbl.id = ft->id; + action->dest_tbl.fw_tbl.dmn = dmn; + + refcount_inc(&dmn->refcount); return action; } @@ -1559,8 +1660,26 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action) switch (action->action_type) { case DR_ACTION_TYP_FT: - if (!action->dest_tbl.is_fw_tbl) + if (action->dest_tbl.is_fw_tbl) + refcount_dec(&action->dest_tbl.fw_tbl.dmn->refcount); + else refcount_dec(&action->dest_tbl.tbl->refcount); + + if (action->dest_tbl.is_fw_tbl && + action->dest_tbl.fw_tbl.num_of_ref_actions) { + struct mlx5dr_action **ref_actions; + int i; + + ref_actions = action->dest_tbl.fw_tbl.ref_actions; + for (i = 0; i < action->dest_tbl.fw_tbl.num_of_ref_actions; i++) + refcount_dec(&ref_actions[i]->refcount); + + kfree(ref_actions); + + mlx5dr_fw_destroy_md_tbl(action->dest_tbl.fw_tbl.dmn, + action->dest_tbl.fw_tbl.id, + action->dest_tbl.fw_tbl.group_id); + } break; case DR_ACTION_TYP_TNL_L2_TO_L2: refcount_dec(&action->reformat.dmn->refcount); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 41662c4e2664..461b39376daf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -320,12 +320,7 @@ int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev, } int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, - u32 table_type, - u64 icm_addr_rx, - u64 icm_addr_tx, - u8 level, - bool sw_owner, - bool term_tbl, + struct mlx5dr_cmd_create_flow_table_attr *attr, u64 *fdb_rx_icm_addr, u32 *table_id) { @@ -335,37 +330,43 @@ int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, int err; MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); - MLX5_SET(create_flow_table_in, in, table_type, table_type); + MLX5_SET(create_flow_table_in, in, table_type, attr->table_type); ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context); - MLX5_SET(flow_table_context, ft_mdev, termination_table, term_tbl); - MLX5_SET(flow_table_context, ft_mdev, sw_owner, sw_owner); - MLX5_SET(flow_table_context, ft_mdev, level, level); + MLX5_SET(flow_table_context, ft_mdev, termination_table, attr->term_tbl); + MLX5_SET(flow_table_context, ft_mdev, sw_owner, attr->sw_owner); + MLX5_SET(flow_table_context, ft_mdev, level, attr->level); - if (sw_owner) { + if (attr->sw_owner) { /* icm_addr_0 used for FDB RX / NIC TX / NIC_RX * icm_addr_1 used for FDB TX */ - if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) { + if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) { MLX5_SET64(flow_table_context, ft_mdev, - sw_owner_icm_root_0, icm_addr_rx); - } else if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) { + sw_owner_icm_root_0, attr->icm_addr_rx); + } else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) { MLX5_SET64(flow_table_context, ft_mdev, - sw_owner_icm_root_0, icm_addr_tx); - } else if (table_type == MLX5_FLOW_TABLE_TYPE_FDB) { + sw_owner_icm_root_0, attr->icm_addr_tx); + } else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB) { MLX5_SET64(flow_table_context, ft_mdev, - sw_owner_icm_root_0, icm_addr_rx); + sw_owner_icm_root_0, attr->icm_addr_rx); MLX5_SET64(flow_table_context, ft_mdev, - sw_owner_icm_root_1, icm_addr_tx); + sw_owner_icm_root_1, attr->icm_addr_tx); } } + MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en, + attr->decap_en); + MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, + attr->reformat_en); + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (err) return err; *table_id = MLX5_GET(create_flow_table_out, out, table_id); - if (!sw_owner && table_type == MLX5_FLOW_TABLE_TYPE_FDB) + if (!attr->sw_owner && attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB && + fdb_rx_icm_addr) *fdb_rx_icm_addr = (u64)MLX5_GET(create_flow_table_out, out, icm_address_31_0) | (u64)MLX5_GET(create_flow_table_out, out, icm_address_39_32) << 32 | @@ -478,3 +479,208 @@ int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, return 0; } + +static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev, + struct mlx5dr_cmd_fte_info *fte, + bool *extended_dest) +{ + int fw_log_max_fdb_encap_uplink = MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink); + int num_fwd_destinations = 0; + int num_encap = 0; + int i; + + *extended_dest = false; + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return 0; + for (i = 0; i < fte->dests_size; i++) { + if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + fte->dest_arr[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + num_encap++; + num_fwd_destinations++; + } + + if (num_fwd_destinations > 1 && num_encap > 0) + *extended_dest = true; + + if (*extended_dest && !fw_log_max_fdb_encap_uplink) { + mlx5_core_warn(dev, "FW does not support extended destination"); + return -EOPNOTSUPP; + } + if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) { + mlx5_core_warn(dev, "FW does not support more than %d encaps", + 1 << fw_log_max_fdb_encap_uplink); + return -EOPNOTSUPP; + } + + return 0; +} + +int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5dr_cmd_ft_info *ft, + u32 group_id, + struct mlx5dr_cmd_fte_info *fte) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {}; + void *in_flow_context, *vlan; + bool extended_dest = false; + void *in_match_value; + unsigned int inlen; + int dst_cnt_size; + void *in_dests; + u32 *in; + int err; + int i; + + if (mlx5dr_cmd_set_extended_dest(dev, fte, &extended_dest)) + return -EOPNOTSUPP; + + if (!extended_dest) + dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct); + else + dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format); + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, op_mod, opmod); + MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask); + MLX5_SET(set_fte_in, in, table_type, ft->type); + MLX5_SET(set_fte_in, in, table_id, ft->id); + MLX5_SET(set_fte_in, in, flow_index, fte->index); + if (ft->vport) { + MLX5_SET(set_fte_in, in, vport_number, ft->vport); + MLX5_SET(set_fte_in, in, other_vport, 1); + } + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + + MLX5_SET(flow_context, in_flow_context, flow_tag, + fte->flow_context.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_source, + fte->flow_context.flow_source); + + MLX5_SET(flow_context, in_flow_context, extended_destination, + extended_dest); + if (extended_dest) { + u32 action; + + action = fte->action.action & + ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + MLX5_SET(flow_context, in_flow_context, action, action); + } else { + MLX5_SET(flow_context, in_flow_context, action, + fte->action.action); + if (fte->action.pkt_reformat) + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.pkt_reformat->id); + } + if (fte->action.modify_hdr) + MLX5_SET(flow_context, in_flow_context, modify_header_id, + fte->action.modify_hdr->id); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio); + + in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, + match_value); + memcpy(in_match_value, fte->val, sizeof(u32) * MLX5_ST_SZ_DW_MATCH_PARAM); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + int list_size = 0; + + for (i = 0; i < fte->dests_size; i++) { + unsigned int id, type = fte->dest_arr[i].type; + + if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + id = fte->dest_arr[i].ft_num; + type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + id = fte->dest_arr[i].ft_id; + break; + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + id = fte->dest_arr[i].vport.num; + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, + !!(fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID)); + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id, + fte->dest_arr[i].vport.vhca_id); + if (extended_dest && (fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)) { + MLX5_SET(dest_format_struct, in_dests, + packet_reformat, + !!(fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)); + MLX5_SET(extended_dest_format, in_dests, + packet_reformat_id, + fte->dest_arr[i].vport.reformat_id); + } + break; + default: + id = fte->dest_arr[i].tir_num; + } + + MLX5_SET(dest_format_struct, in_dests, destination_type, + type); + MLX5_SET(dest_format_struct, in_dests, destination_id, id); + in_dests += dst_cnt_size; + list_size++; + } + + MLX5_SET(flow_context, in_flow_context, destination_list_size, + list_size); + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev, + log_max_flow_counter, + ft->type)); + int list_size = 0; + + for (i = 0; i < fte->dests_size; i++) { + if (fte->dest_arr[i].type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + MLX5_SET(flow_counter_list, in_dests, flow_counter_id, + fte->dest_arr[i].counter_id); + in_dests += dst_cnt_size; + list_size++; + } + if (list_size > max_list_size) { + err = -EINVAL; + goto err_out; + } + + MLX5_SET(flow_context, in_flow_context, flow_counter_list_size, + list_size); + } + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +err_out: + kvfree(in); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c index 60ef6e6171e3..1fbcd012bb85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c @@ -7,6 +7,7 @@ struct mlx5dr_fw_recalc_cs_ft * mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num) { + struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft; u32 table_id, group_id, modify_hdr_id; u64 rx_icm_addr, modify_ttl_action; @@ -16,9 +17,14 @@ mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num) if (!recalc_cs_ft) return NULL; - ret = mlx5dr_cmd_create_flow_table(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, - 0, 0, dmn->info.caps.max_ft_level - 1, - false, true, &rx_icm_addr, &table_id); + ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB; + ft_attr.level = dmn->info.caps.max_ft_level - 1; + ft_attr.term_tbl = true; + + ret = mlx5dr_cmd_create_flow_table(dmn->mdev, + &ft_attr, + &rx_icm_addr, + &table_id); if (ret) { mlx5dr_err(dmn, "Failed creating TTL W/A FW flow table %d\n", ret); goto free_ttl_tbl; @@ -91,3 +97,70 @@ void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn, kfree(recalc_cs_ft); } + +int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_cmd_flow_destination_hw_info *dest, + int num_dest, + bool reformat_req, + u32 *tbl_id, + u32 *group_id) +{ + struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; + struct mlx5dr_cmd_fte_info fte_info = {}; + u32 val[MLX5_ST_SZ_DW_MATCH_PARAM] = {}; + struct mlx5dr_cmd_ft_info ft_info = {}; + int ret; + + ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB; + ft_attr.level = dmn->info.caps.max_ft_level - 2; + ft_attr.reformat_en = reformat_req; + ft_attr.decap_en = reformat_req; + + ret = mlx5dr_cmd_create_flow_table(dmn->mdev, &ft_attr, NULL, tbl_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating multi dest FW flow table %d\n", ret); + return ret; + } + + ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + *tbl_id, group_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating multi dest FW flow group %d\n", ret); + goto free_flow_table; + } + + ft_info.id = *tbl_id; + ft_info.type = FS_FT_FDB; + fte_info.action.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + fte_info.dests_size = num_dest; + fte_info.val = val; + fte_info.dest_arr = dest; + + ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info); + if (ret) { + mlx5dr_err(dmn, "Failed setting fte into table %d\n", ret); + goto free_flow_group; + } + + return 0; + +free_flow_group: + mlx5dr_cmd_destroy_flow_group(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, + *tbl_id, *group_id); +free_flow_table: + mlx5dr_cmd_destroy_flow_table(dmn->mdev, *tbl_id, + MLX5_FLOW_TABLE_TYPE_FDB); + return ret; +} + +void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, + u32 tbl_id, u32 group_id) +{ + mlx5dr_cmd_del_flow_table_entry(dmn->mdev, FS_FT_FDB, tbl_id); + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + tbl_id, group_id); + mlx5dr_cmd_destroy_flow_table(dmn->mdev, tbl_id, + MLX5_FLOW_TABLE_TYPE_FDB); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index e178d8d3dbc9..14ce2d7dbb66 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -211,6 +211,9 @@ static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl) static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl) { + bool en_encap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); + bool en_decap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; u64 icm_addr_rx = 0; u64 icm_addr_tx = 0; int ret; @@ -221,18 +224,21 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl) if (tbl->tx.s_anchor) icm_addr_tx = tbl->tx.s_anchor->chunk->icm_addr; - ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, - tbl->table_type, - icm_addr_rx, - icm_addr_tx, - tbl->dmn->info.caps.max_ft_level - 1, - true, false, NULL, - &tbl->table_id); + ft_attr.table_type = tbl->table_type; + ft_attr.icm_addr_rx = icm_addr_rx; + ft_attr.icm_addr_tx = icm_addr_tx; + ft_attr.level = tbl->dmn->info.caps.max_ft_level - 1; + ft_attr.sw_owner = true; + ft_attr.decap_en = en_decap; + ft_attr.reformat_en = en_encap; + + ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, &ft_attr, + NULL, &tbl->table_id); return ret; } -struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level) +struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, u32 flags) { struct mlx5dr_table *tbl; int ret; @@ -245,6 +251,7 @@ struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level) tbl->dmn = dmn; tbl->level = level; + tbl->flags = flags; refcount_set(&tbl->refcount, 1); ret = dr_table_init(tbl); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 3fdf4a5eb031..dffe35145d19 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -681,6 +681,7 @@ struct mlx5dr_table { u32 level; u32 table_type; u32 table_id; + u32 flags; struct list_head matcher_list; struct mlx5dr_action *miss_action; refcount_t refcount; @@ -744,10 +745,14 @@ struct mlx5dr_action { union { struct mlx5dr_table *tbl; struct { - struct mlx5_flow_table *ft; + struct mlx5dr_domain *dmn; + u32 id; + u32 group_id; + enum fs_flow_table_type type; u64 rx_icm_addr; u64 tx_icm_addr; - struct mlx5_core_dev *mdev; + struct mlx5dr_action **ref_actions; + u32 num_of_ref_actions; } fw_tbl; }; } dest_tbl; @@ -869,6 +874,17 @@ struct mlx5dr_cmd_query_flow_table_details { u64 sw_owner_icm_root_0; }; +struct mlx5dr_cmd_create_flow_table_attr { + u32 table_type; + u64 icm_addr_rx; + u64 icm_addr_tx; + u8 level; + bool sw_owner; + bool term_tbl; + bool decap_en; + bool reformat_en; +}; + /* internal API functions */ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, struct mlx5dr_cmd_caps *caps); @@ -906,12 +922,7 @@ int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev, u32 table_id, u32 group_id); int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, - u32 table_type, - u64 icm_addr_rx, - u64 icm_addr_tx, - u8 level, - bool sw_owner, - bool term_tbl, + struct mlx5dr_cmd_create_flow_table_attr *attr, u64 *fdb_rx_icm_addr, u32 *table_id); int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev, @@ -1053,6 +1064,43 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, struct mlx5dr_action *action); +struct mlx5dr_cmd_ft_info { + u32 id; + u16 vport; + enum fs_flow_table_type type; +}; + +struct mlx5dr_cmd_flow_destination_hw_info { + enum mlx5_flow_destination_type type; + union { + u32 tir_num; + u32 ft_num; + u32 ft_id; + u32 counter_id; + struct { + u16 num; + u16 vhca_id; + u32 reformat_id; + u8 flags; + } vport; + }; +}; + +struct mlx5dr_cmd_fte_info { + u32 dests_size; + u32 index; + struct mlx5_flow_context flow_context; + u32 *val; + struct mlx5_flow_act action; + struct mlx5dr_cmd_flow_destination_hw_info *dest_arr; +}; + +int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5dr_cmd_ft_info *ft, + u32 group_id, + struct mlx5dr_cmd_fte_info *fte); + struct mlx5dr_fw_recalc_cs_ft { u64 rx_icm_addr; u32 table_id; @@ -1067,4 +1115,12 @@ void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn, int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn, u32 vport_num, u64 *rx_icm_addr); +int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_cmd_flow_destination_hw_info *dest, + int num_dest, + bool reformat_req, + u32 *tbl_id, + u32 *group_id); +void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id, + u32 group_id); #endif /* _DR_TYPES_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 3d587d0bdbbe..b43275cde8bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -74,7 +74,7 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns, next_ft); tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, - ft->level); + ft->level, ft->flags); if (!tbl) { mlx5_core_err(ns->dev, "Failed creating dr flow_table\n"); return -EINVAL; @@ -184,13 +184,13 @@ static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain, dest_attr->vport.vhca_id); } -static struct mlx5dr_action *create_ft_action(struct mlx5_core_dev *dev, +static struct mlx5dr_action *create_ft_action(struct mlx5dr_domain *domain, struct mlx5_flow_rule *dst) { struct mlx5_flow_table *dest_ft = dst->dest_attr.ft; if (mlx5_dr_is_fw_table(dest_ft->flags)) - return mlx5dr_create_action_dest_flow_fw_table(dest_ft, dev); + return mlx5dr_action_create_dest_flow_fw_table(domain, dest_ft); return mlx5dr_action_create_dest_table(dest_ft->fs_dr_table.dr_table); } @@ -206,6 +206,12 @@ static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domai return mlx5dr_action_create_push_vlan(domain, htonl(vlan_hdr)); } +static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst) +{ + return dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID; +} + #define MLX5_FLOW_CONTEXT_ACTION_MAX 20 static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, @@ -213,7 +219,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, struct fs_fte *fte) { struct mlx5dr_domain *domain = ns->fs_dr_domain.dr_domain; - struct mlx5dr_action *term_action = NULL; + struct mlx5dr_action_dest *term_actions; struct mlx5dr_match_parameters params; struct mlx5_core_dev *dev = ns->dev; struct mlx5dr_action **fs_dr_actions; @@ -223,6 +229,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, struct mlx5dr_rule *rule; struct mlx5_flow_rule *dst; int fs_dr_num_actions = 0; + int num_term_actions = 0; int num_actions = 0; size_t match_sz; int err = 0; @@ -233,18 +240,38 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions), GFP_KERNEL); - if (!actions) - return -ENOMEM; + if (!actions) { + err = -ENOMEM; + goto out_err; + } fs_dr_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*fs_dr_actions), GFP_KERNEL); if (!fs_dr_actions) { - kfree(actions); - return -ENOMEM; + err = -ENOMEM; + goto free_actions_alloc; + } + + term_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, + sizeof(*term_actions), GFP_KERNEL); + if (!term_actions) { + err = -ENOMEM; + goto free_fs_dr_actions_alloc; } match_sz = sizeof(fte->val); + /* Drop reformat action bit if destination vport set with reformat */ + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + list_for_each_entry(dst, &fte->node.children, node.list) { + if (!contain_vport_reformat_action(dst)) + continue; + + fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + break; + } + } + /* The order of the actions are must to be keep, only the following * order is supported by SW steering: * TX: push vlan -> modify header -> encap @@ -335,7 +362,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, goto free_actions; } fs_dr_actions[fs_dr_num_actions++] = tmp_action; - term_action = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; } if (fte->flow_context.flow_tag) { @@ -354,7 +381,8 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, enum mlx5_flow_destination_type type = dst->dest_attr.type; u32 id; - if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) { err = -ENOSPC; goto free_actions; } @@ -373,13 +401,13 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, actions[num_actions++] = tmp_action; break; case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: - tmp_action = create_ft_action(dev, dst); + tmp_action = create_ft_action(domain, dst); if (!tmp_action) { err = -ENOMEM; goto free_actions; } fs_dr_actions[fs_dr_num_actions++] = tmp_action; - term_action = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; break; case MLX5_FLOW_DESTINATION_TYPE_VPORT: tmp_action = create_vport_action(domain, dst); @@ -388,7 +416,14 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, goto free_actions; } fs_dr_actions[fs_dr_num_actions++] = tmp_action; - term_action = tmp_action; + term_actions[num_term_actions].dest = tmp_action; + + if (dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + term_actions[num_term_actions].reformat = + dst->dest_attr.vport.pkt_reformat->action.dr_action; + + num_term_actions++; break; default: err = -EOPNOTSUPP; @@ -399,9 +434,22 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, params.match_sz = match_sz; params.match_buf = (u64 *)fte->val; - - if (term_action) - actions[num_actions++] = term_action; + if (num_term_actions == 1) { + if (term_actions->reformat) + actions[num_actions++] = term_actions->reformat; + + actions[num_actions++] = term_actions->dest; + } else if (num_term_actions > 1) { + tmp_action = mlx5dr_action_create_mult_dest_tbl(domain, + term_actions, + num_term_actions); + if (!tmp_action) { + err = -EOPNOTSUPP; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher, ¶ms, @@ -412,7 +460,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, goto free_actions; } + kfree(term_actions); kfree(actions); + fte->fs_dr_rule.dr_rule = rule; fte->fs_dr_rule.num_actions = fs_dr_num_actions; fte->fs_dr_rule.dr_actions = fs_dr_actions; @@ -420,13 +470,18 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, return 0; free_actions: - for (i = 0; i < fs_dr_num_actions; i++) + /* Free in reverse order to handle action dependencies */ + for (i = fs_dr_num_actions - 1; i >= 0; i--) if (!IS_ERR_OR_NULL(fs_dr_actions[i])) mlx5dr_action_destroy(fs_dr_actions[i]); - mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err); - kfree(actions); + kfree(term_actions); +free_fs_dr_actions_alloc: kfree(fs_dr_actions); +free_actions_alloc: + kfree(actions); +out_err: + mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err); return err; } @@ -533,7 +588,8 @@ static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns, if (err) return err; - for (i = 0; i < rule->num_actions; i++) + /* Free in reverse order to handle action dependencies */ + for (i = rule->num_actions - 1; i >= 0; i--) if (!IS_ERR_OR_NULL(rule->dr_actions[i])) mlx5dr_action_destroy(rule->dr_actions[i]); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index adda9cbfba45..e1edc9c247b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -33,6 +33,11 @@ struct mlx5dr_match_parameters { u64 *match_buf; /* Device spec format */ }; +struct mlx5dr_action_dest { + struct mlx5dr_action *dest; + struct mlx5dr_action *reformat; +}; + #ifdef CONFIG_MLX5_SW_STEERING struct mlx5dr_domain * @@ -46,7 +51,7 @@ void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, struct mlx5dr_domain *peer_dmn); struct mlx5dr_table * -mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level); +mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags); int mlx5dr_table_destroy(struct mlx5dr_table *table); @@ -75,14 +80,19 @@ struct mlx5dr_action * mlx5dr_action_create_dest_table(struct mlx5dr_table *table); struct mlx5dr_action * -mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft, - struct mlx5_core_dev *mdev); +mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain, + struct mlx5_flow_table *ft); struct mlx5dr_action * mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, u32 vport, u8 vhca_id_valid, u16 vhca_id); +struct mlx5dr_action * +mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_action_dest *dests, + u32 num_of_dests); + struct mlx5dr_action *mlx5dr_action_create_drop(void); struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value); @@ -131,7 +141,7 @@ mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, struct mlx5dr_domain *peer_dmn) { } static inline struct mlx5dr_table * -mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level) { return NULL; } +mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags) { return NULL; } static inline int mlx5dr_table_destroy(struct mlx5dr_table *table) { return 0; } @@ -165,8 +175,8 @@ static inline struct mlx5dr_action * mlx5dr_action_create_dest_table(struct mlx5dr_table *table) { return NULL; } static inline struct mlx5dr_action * -mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft, - struct mlx5_core_dev *mdev) { return NULL; } +mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain, + struct mlx5_flow_table *ft) { return NULL; } static inline struct mlx5dr_action * mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, @@ -174,6 +184,11 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, u16 vhca_id) { return NULL; } static inline struct mlx5dr_action * +mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_action_dest *dests, + u32 num_of_dests) { return NULL; } + +static inline struct mlx5dr_action * mlx5dr_action_create_drop(void) { return NULL; } static inline struct mlx5dr_action * diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index af30e8a76682..0b80e75e87c3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3477,10 +3477,10 @@ MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN); MLXSW_ITEM32(reg, qeec, local_port, 0x00, 16, 8); enum mlxsw_reg_qeec_hr { - MLXSW_REG_QEEC_HIERARCY_PORT, - MLXSW_REG_QEEC_HIERARCY_GROUP, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_PORT, + MLXSW_REG_QEEC_HR_GROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, + MLXSW_REG_QEEC_HR_TC, }; /* reg_qeec_element_hierarchy @@ -3618,8 +3618,7 @@ static inline void mlxsw_reg_qeec_ptps_pack(char *payload, u8 local_port, { MLXSW_REG_ZERO(qeec, payload); mlxsw_reg_qeec_local_port_set(payload, local_port); - mlxsw_reg_qeec_element_hierarchy_set(payload, - MLXSW_REG_QEEC_HIERARCY_PORT); + mlxsw_reg_qeec_element_hierarchy_set(payload, MLXSW_REG_QEEC_HR_PORT); mlxsw_reg_qeec_ptps_set(payload, ptps); } @@ -3749,6 +3748,38 @@ mlxsw_reg_qpdsm_prio_pack(char *payload, unsigned short prio, u8 dscp) mlxsw_reg_qpdsm_prio_entry_color2_dscp_set(payload, prio, dscp); } +/* QPDP - QoS Port DSCP to Priority Mapping Register + * ------------------------------------------------- + * This register controls the port default Switch Priority and Color. The + * default Switch Priority and Color are used for frames where the trust state + * uses default values. All member ports of a LAG should be configured with the + * same default values. + */ +#define MLXSW_REG_QPDP_ID 0x4007 +#define MLXSW_REG_QPDP_LEN 0x8 + +MLXSW_REG_DEFINE(qpdp, MLXSW_REG_QPDP_ID, MLXSW_REG_QPDP_LEN); + +/* reg_qpdp_local_port + * Local Port. Supported for data packets from CPU port. + * Access: Index + */ +MLXSW_ITEM32(reg, qpdp, local_port, 0x00, 16, 8); + +/* reg_qpdp_switch_prio + * Default port Switch Priority (default 0) + * Access: RW + */ +MLXSW_ITEM32(reg, qpdp, switch_prio, 0x04, 0, 4); + +static inline void mlxsw_reg_qpdp_pack(char *payload, u8 local_port, + u8 switch_prio) +{ + MLXSW_REG_ZERO(qpdp, payload); + mlxsw_reg_qpdp_local_port_set(payload, local_port); + mlxsw_reg_qpdp_switch_prio_set(payload, switch_prio); +} + /* QPDPM - QoS Port DSCP to Priority Mapping Register * -------------------------------------------------- * This register controls the mapping from DSCP field to @@ -5482,6 +5513,7 @@ enum mlxsw_reg_htgt_discard_trap_group { MLXSW_REG_HTGT_DISCARD_TRAP_GROUP_BASE = MLXSW_REG_HTGT_TRAP_GROUP_MAX, MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS, MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS, }; /* reg_htgt_trap_group @@ -10109,6 +10141,92 @@ static inline void mlxsw_reg_tigcr_pack(char *payload, bool ttlc, u8 ttl_uc) mlxsw_reg_tigcr_ttl_uc_set(payload, ttl_uc); } +/* TIEEM - Tunneling IPinIP Encapsulation ECN Mapping Register + * ----------------------------------------------------------- + * The TIEEM register maps ECN of the IP header at the ingress to the + * encapsulation to the ECN of the underlay network. + */ +#define MLXSW_REG_TIEEM_ID 0xA812 +#define MLXSW_REG_TIEEM_LEN 0x0C + +MLXSW_REG_DEFINE(tieem, MLXSW_REG_TIEEM_ID, MLXSW_REG_TIEEM_LEN); + +/* reg_tieem_overlay_ecn + * ECN of the IP header in the overlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tieem, overlay_ecn, 0x04, 24, 2); + +/* reg_tineem_underlay_ecn + * ECN of the IP header in the underlay network. + * Access: RW + */ +MLXSW_ITEM32(reg, tieem, underlay_ecn, 0x04, 16, 2); + +static inline void mlxsw_reg_tieem_pack(char *payload, u8 overlay_ecn, + u8 underlay_ecn) +{ + MLXSW_REG_ZERO(tieem, payload); + mlxsw_reg_tieem_overlay_ecn_set(payload, overlay_ecn); + mlxsw_reg_tieem_underlay_ecn_set(payload, underlay_ecn); +} + +/* TIDEM - Tunneling IPinIP Decapsulation ECN Mapping Register + * ----------------------------------------------------------- + * The TIDEM register configures the actions that are done in the + * decapsulation. + */ +#define MLXSW_REG_TIDEM_ID 0xA813 +#define MLXSW_REG_TIDEM_LEN 0x0C + +MLXSW_REG_DEFINE(tidem, MLXSW_REG_TIDEM_ID, MLXSW_REG_TIDEM_LEN); + +/* reg_tidem_underlay_ecn + * ECN field of the IP header in the underlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tidem, underlay_ecn, 0x04, 24, 2); + +/* reg_tidem_overlay_ecn + * ECN field of the IP header in the overlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tidem, overlay_ecn, 0x04, 16, 2); + +/* reg_tidem_eip_ecn + * Egress IP ECN. ECN field of the IP header of the packet which goes out + * from the decapsulation. + * Access: RW + */ +MLXSW_ITEM32(reg, tidem, eip_ecn, 0x04, 8, 2); + +/* reg_tidem_trap_en + * Trap enable: + * 0 - No trap due to decap ECN + * 1 - Trap enable with trap_id + * Access: RW + */ +MLXSW_ITEM32(reg, tidem, trap_en, 0x08, 28, 4); + +/* reg_tidem_trap_id + * Trap ID. Either DECAP_ECN0 or DECAP_ECN1. + * Reserved when trap_en is '0'. + * Access: RW + */ +MLXSW_ITEM32(reg, tidem, trap_id, 0x08, 0, 9); + +static inline void mlxsw_reg_tidem_pack(char *payload, u8 underlay_ecn, + u8 overlay_ecn, u8 eip_ecn, + bool trap_en, u16 trap_id) +{ + MLXSW_REG_ZERO(tidem, payload); + mlxsw_reg_tidem_underlay_ecn_set(payload, underlay_ecn); + mlxsw_reg_tidem_overlay_ecn_set(payload, overlay_ecn); + mlxsw_reg_tidem_eip_ecn_set(payload, eip_ecn); + mlxsw_reg_tidem_trap_en_set(payload, trap_en); + mlxsw_reg_tidem_trap_id_set(payload, trap_id); +} + /* SBPR - Shared Buffer Pools Register * ----------------------------------- * The SBPR configures and retrieves the shared buffer pools and configuration. @@ -10581,6 +10699,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(qeec), MLXSW_REG(qrwe), MLXSW_REG(qpdsm), + MLXSW_REG(qpdp), MLXSW_REG(qpdpm), MLXSW_REG(qtctm), MLXSW_REG(qpsc), @@ -10652,6 +10771,8 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(tndem), MLXSW_REG(tnpc), MLXSW_REG(tigcr), + MLXSW_REG(tieem), + MLXSW_REG(tidem), MLXSW_REG(sbpr), MLXSW_REG(sbcm), MLXSW_REG(sbpm), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 8ed15199eb4f..8639f32ec4d5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -45,11 +45,9 @@ #include "spectrum_ptp.h" #include "../mlxfw/mlxfw.h" -#define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100) - #define MLXSW_SP1_FWREV_MAJOR 13 #define MLXSW_SP1_FWREV_MINOR 2000 -#define MLXSW_SP1_FWREV_SUBMINOR 2308 +#define MLXSW_SP1_FWREV_SUBMINOR 2714 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { @@ -66,7 +64,7 @@ static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { #define MLXSW_SP2_FWREV_MAJOR 29 #define MLXSW_SP2_FWREV_MINOR 2000 -#define MLXSW_SP2_FWREV_SUBMINOR 2308 +#define MLXSW_SP2_FWREV_SUBMINOR 2714 static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = { .major = MLXSW_SP2_FWREV_MAJOR, @@ -423,13 +421,12 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) rev->major, req_rev->major); return -EINVAL; } - if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) == - MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor) && - mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev)) + if (mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev)) return 0; - dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n", - rev->major, rev->minor, rev->subminor); + dev_err(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n", + rev->major, rev->minor, rev->subminor, req_rev->major, + req_rev->minor, req_rev->subminor); dev_info(mlxsw_sp->bus_info->dev, "Flashing firmware using file %s\n", fw_filename); @@ -1793,6 +1790,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data); case TC_SETUP_QDISC_PRIO: return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_ETS: + return mlxsw_sp_setup_tc_ets(mlxsw_sp_port, type_data); default: return -EOPNOTSUPP; } @@ -3599,26 +3598,25 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) * one subgroup, which are all member in the same group. */ err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_GROUP, 0, 0, false, - 0); + MLXSW_REG_QEEC_HR_GROUP, 0, 0, false, 0); if (err) return err; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, false, 0); if (err) return err; } for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, i, i, + MLXSW_REG_QEEC_HR_TC, i, i, false, 0); if (err) return err; err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i + 8, i, true, 100); if (err) @@ -3630,13 +3628,13 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) * for the initial configuration. */ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0, + MLXSW_REG_QEEC_HR_PORT, 0, 0, MLXSW_REG_QEEC_MAS_DIS); if (err) return err; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, MLXSW_REG_QEEC_MAS_DIS); if (err) @@ -3644,14 +3642,14 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) } for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i, i, MLXSW_REG_QEEC_MAS_DIS); if (err) return err; err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i + 8, i, MLXSW_REG_QEEC_MAS_DIS); if (err) @@ -3661,7 +3659,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) /* Configure the min shaper for multicast TCs. */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i + 8, i, MLXSW_REG_QEEC_MIS_MIN); if (err) @@ -4547,10 +4545,16 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { false), MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(DECAP_ECN0, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, VRRP, false), MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, VRRP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_CLASS_E, FORWARD, + ROUTER_EXP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_MC_DMAC, FORWARD, + ROUTER_EXP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_DIP, FORWARD, + ROUTER_EXP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_DIP_LINK_LOCAL, FORWARD, + ROUTER_EXP, false), /* PKT Sample trap */ MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU, false, SP_IP2ME, DISCARD), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 347bec9d1ecf..948ef4720d40 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -852,6 +852,8 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_red_qopt_offload *p); int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p); +int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p); /* spectrum_fid.c */ bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index 21296fa7f7fb..db66f2b56a6d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -160,7 +160,7 @@ static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port, u8 weight = ets->tc_tx_bw[i]; err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, dwrr, weight); if (err) { netdev_err(dev, "Failed to link subgroup ETS element %d to group\n", @@ -198,7 +198,7 @@ err_port_ets_set: u8 weight = my_ets->tc_tx_bw[i]; err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, dwrr, weight); } return err; @@ -369,6 +369,17 @@ err_update_qrwe: } static int +mlxsw_sp_port_dcb_app_update_qpdp(struct mlxsw_sp_port *mlxsw_sp_port, + u8 default_prio) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qpdp_pl[MLXSW_REG_QPDP_LEN]; + + mlxsw_reg_qpdp_pack(qpdp_pl, mlxsw_sp_port->local_port, default_prio); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdp), qpdp_pl); +} + +static int mlxsw_sp_port_dcb_app_update_qpdpm(struct mlxsw_sp_port *mlxsw_sp_port, struct dcb_ieee_app_dscp_map *map) { @@ -405,6 +416,12 @@ static int mlxsw_sp_port_dcb_app_update(struct mlxsw_sp_port *mlxsw_sp_port) int err; default_prio = mlxsw_sp_port_dcb_app_default_prio(mlxsw_sp_port); + err = mlxsw_sp_port_dcb_app_update_qpdp(mlxsw_sp_port, default_prio); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Couldn't configure port default priority\n"); + return err; + } + have_dscp = mlxsw_sp_port_dcb_app_prio_dscp_map(mlxsw_sp_port, &prio_map); @@ -507,7 +524,7 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev, for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, maxrate->tc_maxrate[i]); if (err) { @@ -523,7 +540,7 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev, err_port_ets_maxrate_set: for (i--; i >= 0; i--) mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, my_maxrate->tc_maxrate[i]); return err; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 6400cd644b7a..a8525992528f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -3,8 +3,10 @@ #include <net/ip_tunnels.h> #include <net/ip6_tunnel.h> +#include <net/inet_ecn.h> #include "spectrum_ipip.h" +#include "reg.h" struct ip_tunnel_parm mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev) @@ -338,3 +340,61 @@ static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = { const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = { [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops, }; + +static int mlxsw_sp_ipip_ecn_encap_init_one(struct mlxsw_sp *mlxsw_sp, + u8 inner_ecn, u8 outer_ecn) +{ + char tieem_pl[MLXSW_REG_TIEEM_LEN]; + + mlxsw_reg_tieem_pack(tieem_pl, inner_ecn, outer_ecn); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tieem), tieem_pl); +} + +int mlxsw_sp_ipip_ecn_encap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + u8 outer_ecn = INET_ECN_encapsulate(0, i); + int err; + + err = mlxsw_sp_ipip_ecn_encap_init_one(mlxsw_sp, i, outer_ecn); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_sp_ipip_ecn_decap_init_one(struct mlxsw_sp *mlxsw_sp, + u8 inner_ecn, u8 outer_ecn) +{ + char tidem_pl[MLXSW_REG_TIDEM_LEN]; + bool trap_en, set_ce = false; + u8 new_inner_ecn; + + trap_en = __INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); + new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn; + + mlxsw_reg_tidem_pack(tidem_pl, outer_ecn, inner_ecn, new_inner_ecn, + trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tidem), tidem_pl); +} + +int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i, j, err; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + /* Iterate over outer ECN values */ + for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) { + err = mlxsw_sp_ipip_ecn_decap_init_one(mlxsw_sp, i, j); + if (err) + return err; + } + } + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index ec2ff3d7f41c..4aaaa4937b1a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -920,6 +920,7 @@ static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config, egr_types = 0xff; break; case HWTSTAMP_TX_ONESTEP_SYNC: + case HWTSTAMP_TX_ONESTEP_P2P: return -ERANGE; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 0124bfe1963b..d57c9b15f45e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -18,6 +18,7 @@ enum mlxsw_sp_qdisc_type { MLXSW_SP_QDISC_NO_QDISC, MLXSW_SP_QDISC_RED, MLXSW_SP_QDISC_PRIO, + MLXSW_SP_QDISC_ETS, }; struct mlxsw_sp_qdisc_ops { @@ -487,14 +488,16 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +__mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port) { int i; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, MLXSW_SP_PORT_DEFAULT_TCLASS); + mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + i, 0, false, 0); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, &mlxsw_sp_port->tclass_qdiscs[i]); mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0; @@ -504,36 +507,58 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - void *params) +mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct tc_prio_qopt_offload_params *p = params; + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port); +} - if (p->bands > IEEE_8021QAZ_MAX_TCS) +static int +__mlxsw_sp_qdisc_ets_check_params(unsigned int nbands) +{ + if (nbands > IEEE_8021QAZ_MAX_TCS) return -EOPNOTSUPP; return 0; } static int -mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - void *params) +mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) { struct tc_prio_qopt_offload_params *p = params; + + return __mlxsw_sp_qdisc_ets_check_params(p->bands); +} + +static int +__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, + unsigned int nbands, + const unsigned int *quanta, + const unsigned int *weights, + const u8 *priomap) +{ struct mlxsw_sp_qdisc *child_qdisc; int tclass, i, band, backlog; u8 old_priomap; int err; - for (band = 0; band < p->bands; band++) { + for (band = 0; band < nbands; band++) { tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; old_priomap = child_qdisc->prio_bitmap; child_qdisc->prio_bitmap = 0; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + tclass, 0, !!quanta[band], + weights[band]); + if (err) + return err; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (p->priomap[i] == band) { + if (priomap[i] == band) { child_qdisc->prio_bitmap |= BIT(i); if (BIT(i) & old_priomap) continue; @@ -556,21 +581,46 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; child_qdisc->prio_bitmap = 0; mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc); + mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + tclass, 0, false, 0); } return 0; } +static int +mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_prio_qopt_offload_params *p = params; + unsigned int zeroes[TCQ_ETS_MAX_BANDS] = {0}; + + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands, + zeroes, zeroes, p->priomap); +} + +static void +__mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct gnet_stats_queue *qstats) +{ + u64 backlog; + + backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_qdisc->stats_base.backlog); + qstats->backlog -= backlog; +} + static void mlxsw_sp_qdisc_prio_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_prio_qopt_offload_params *p = params; - u64 backlog; - backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_qdisc->stats_base.backlog); - p->qstats->backlog -= backlog; + __mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, + p->qstats); } static int @@ -647,27 +697,93 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, }; -/* Grafting is not supported in mlxsw. It will result in un-offloading of the - * grafted qdisc as well as the qdisc in the qdisc new location. - * (However, if the graft is to the location where the qdisc is already at, it - * will be ignored completely and won't cause un-offloading). +static int +mlxsw_sp_qdisc_ets_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + return __mlxsw_sp_qdisc_ets_check_params(p->bands); +} + +static int +mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands, + p->quanta, p->weights, p->priomap); +} + +static void +mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + __mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, + p->qstats); +} + +static int +mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port); +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = { + .type = MLXSW_SP_QDISC_ETS, + .check_params = mlxsw_sp_qdisc_ets_check_params, + .replace = mlxsw_sp_qdisc_ets_replace, + .unoffload = mlxsw_sp_qdisc_ets_unoffload, + .destroy = mlxsw_sp_qdisc_ets_destroy, + .get_stats = mlxsw_sp_qdisc_get_prio_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, +}; + +/* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting + * graph is free of cycles). These operations do not change the parent handle + * though, which means it can be incomplete (if there is more than one class + * where the Qdisc in question is grafted) or outright wrong (if the Qdisc was + * linked to a different class and then removed from the original class). + * + * E.g. consider this sequence of operations: + * + * # tc qdisc add dev swp1 root handle 1: prio + * # tc qdisc add dev swp1 parent 1:3 handle 13: red limit 1000000 avpkt 10000 + * RED: set bandwidth to 10Mbit + * # tc qdisc link dev swp1 handle 13: parent 1:2 + * + * At this point, both 1:2 and 1:3 have the same RED Qdisc instance as their + * child. But RED will still only claim that 1:3 is its parent. If it's removed + * from that band, its only parent will be 1:2, but it will continue to claim + * that it is in fact 1:3. + * + * The notification for child Qdisc replace (e.g. TC_RED_REPLACE) comes before + * the notification for parent graft (e.g. TC_PRIO_GRAFT). We take the replace + * notification to offload the child Qdisc, based on its parent handle, and use + * the graft operation to validate that the class where the child is actually + * grafted corresponds to the parent handle. If the two don't match, we + * unoffload the child. */ static int -mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - struct tc_prio_qopt_offload_graft_params *p) +__mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u8 band, u32 child_handle) { - int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band); + int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); struct mlxsw_sp_qdisc *old_qdisc; - /* Check if the grafted qdisc is already in its "new" location. If so - - * nothing needs to be done. - */ - if (p->band < IEEE_8021QAZ_MAX_TCS && - mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle) + if (band < IEEE_8021QAZ_MAX_TCS && + mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == child_handle) return 0; - if (!p->child_handle) { + if (!child_handle) { /* This is an invisible FIFO replacing the original Qdisc. * Ignore it--the original Qdisc's destroy will follow. */ @@ -678,7 +794,7 @@ mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, * unoffload it. */ old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port, - p->child_handle); + child_handle); if (old_qdisc) mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); @@ -687,6 +803,15 @@ mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, return -EOPNOTSUPP; } +static int +mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_prio_qopt_offload_graft_params *p) +{ + return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + p->band, p->child_handle); +} + int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p) { @@ -720,6 +845,40 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, } } +int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true); + if (!mlxsw_sp_qdisc) + return -EOPNOTSUPP; + + if (p->command == TC_ETS_REPLACE) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_ets, + &p->replace_params); + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, + MLXSW_SP_QDISC_ETS)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_ETS_DESTROY: + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + case TC_ETS_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + case TC_ETS_GRAFT: + return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + p->graft_params.band, + p->graft_params.child_handle); + default: + return -EOPNOTSUPP; + } +} + int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 8290e82240fc..ce707723f8cf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -382,9 +382,10 @@ enum mlxsw_sp_fib_entry_type { }; struct mlxsw_sp_nexthop_group; +struct mlxsw_sp_fib_entry; struct mlxsw_sp_fib_node { - struct list_head entry_list; + struct mlxsw_sp_fib_entry *fib_entry; struct list_head list; struct rhash_head ht_node; struct mlxsw_sp_fib *fib; @@ -397,7 +398,6 @@ struct mlxsw_sp_fib_entry_decap { }; struct mlxsw_sp_fib_entry { - struct list_head list; struct mlxsw_sp_fib_node *fib_node; enum mlxsw_sp_fib_entry_type type; struct list_head nexthop_group_node; @@ -1162,7 +1162,6 @@ mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const union mlxsw_sp_l3addr *addr, enum mlxsw_sp_fib_entry_type type) { - struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_fib_node *fib_node; unsigned char addr_prefix_len; struct mlxsw_sp_fib *fib; @@ -1191,15 +1190,10 @@ mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len, addr_prefix_len); - if (!fib_node || list_empty(&fib_node->entry_list)) + if (!fib_node || fib_node->fib_entry->type != type) return NULL; - fib_entry = list_first_entry(&fib_node->entry_list, - struct mlxsw_sp_fib_entry, list); - if (fib_entry->type != type) - return NULL; - - return fib_entry; + return fib_node->fib_entry; } /* Given an IPIP entry, find the corresponding decap route. */ @@ -1209,7 +1203,6 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, { static struct mlxsw_sp_fib_node *fib_node; const struct mlxsw_sp_ipip_ops *ipip_ops; - struct mlxsw_sp_fib_entry *fib_entry; unsigned char saddr_prefix_len; union mlxsw_sp_l3addr saddr; struct mlxsw_sp_fib *ul_fib; @@ -1244,15 +1237,11 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len, saddr_prefix_len); - if (!fib_node || list_empty(&fib_node->entry_list)) + if (!fib_node || + fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) return NULL; - fib_entry = list_first_entry(&fib_node->entry_list, - struct mlxsw_sp_fib_entry, list); - if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) - return NULL; - - return fib_entry; + return fib_node->fib_entry; } static struct mlxsw_sp_ipip_entry * @@ -3231,10 +3220,6 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static bool -mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib_entry *fib_entry); - static int mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) @@ -3243,9 +3228,6 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, int err; list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { - if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node, - fib_entry)) - continue; err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); if (err) return err; @@ -3253,24 +3235,6 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static void -mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op, int err); - -static void -mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp) -{ - enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE; - struct mlxsw_sp_fib_entry *fib_entry; - - list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { - if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node, - fib_entry)) - continue; - mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0); - } -} - static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size) { /* Valid sizes for an adjacency group are: @@ -3374,6 +3338,73 @@ mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp) } } +static struct mlxsw_sp_nexthop * +mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_rt6 *mlxsw_sp_rt6); + +static void +mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + int i; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + + if (nh->offloaded) + nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; + else + nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +__mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; + struct mlxsw_sp_nexthop *nh; + + nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); + if (nh && nh->offloaded) + fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD; + else + fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + + /* Unfortunately, in IPv6 the route and the nexthop are described by + * the same struct, so we need to iterate over all the routes using the + * nexthop group and set / clear the offload indication for them. + */ + list_for_each_entry(fib6_entry, &nh_grp->fib_list, + common.nexthop_group_node) + __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry); +} + +static void +mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + switch (mlxsw_sp_nexthop_group_type(nh_grp)) { + case AF_INET: + mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp); + break; + case AF_INET6: + mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp); + break; + } +} + static void mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) @@ -3447,6 +3478,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, goto set_trap; } + mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); + if (!old_adj_index_valid) { /* The trap was set for fib entries, so we have to call * fib entry update to unset it and use adjacency index. @@ -3468,9 +3501,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, goto set_trap; } - /* Offload state within the group changed, so update the flags. */ - mlxsw_sp_nexthop_fib_entries_refresh(nh_grp); - return; set_trap: @@ -3483,6 +3513,7 @@ set_trap: err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); if (err) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); + mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); if (old_adj_index_valid) mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, nh_grp->ecmp_size, nh_grp->adj_index); @@ -3845,7 +3876,7 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, key.fib_nh = fib_nh; nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key); - if (WARN_ON_ONCE(!nh)) + if (!nh) return; switch (event) { @@ -4065,131 +4096,128 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, } static void -mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { - struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; - int i; - - if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || - fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE || - fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP || - fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) { - nh_grp->nexthops->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; - return; - } - - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group); + u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr; + int dst_len = fib_entry->fib_node->key.prefix_len; + struct mlxsw_sp_fib4_entry *fib4_entry; + struct fib_rt_info fri; + bool should_offload; - if (nh->offloaded) - nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; - else - nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; - } + should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry); + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + fri.fi = fi; + fri.tb_id = fib4_entry->tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = dst_len; + fri.tos = fib4_entry->tos; + fri.type = fib4_entry->type; + fri.offload = should_offload; + fri.trap = !should_offload; + fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); } static void -mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { - struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; - int i; - - if (!list_is_singular(&nh_grp->fib_list)) - return; - - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group); + u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr; + int dst_len = fib_entry->fib_node->key.prefix_len; + struct mlxsw_sp_fib4_entry *fib4_entry; + struct fib_rt_info fri; - nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; - } + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + fri.fi = fi; + fri.tb_id = fib4_entry->tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = dst_len; + fri.tos = fib4_entry->tos; + fri.type = fib4_entry->type; + fri.offload = false; + fri.trap = false; + fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); } static void -mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + bool should_offload; + + should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry); + /* In IPv6 a multipath route is represented using multiple routes, so + * we need to set the flags on all of them. + */ fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, common); - - if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || - fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) { - list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, - list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD; - return; - } - - list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; - struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; - struct mlxsw_sp_nexthop *nh; - - nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); - if (nh && nh->offloaded) - fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD; - else - fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; - } + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) + fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, should_offload, + !should_offload); } static void -mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, common); - list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - struct fib6_info *rt = mlxsw_sp_rt6->rt; - - rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; - } + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) + fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, false, false); } -static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +static void +mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: - mlxsw_sp_fib4_entry_offload_set(fib_entry); + mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - mlxsw_sp_fib6_entry_offload_set(fib_entry); + mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry); break; } } static void -mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: - mlxsw_sp_fib4_entry_offload_unset(fib_entry); + mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - mlxsw_sp_fib6_entry_offload_unset(fib_entry); + mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry); break; } } static void -mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op, int err) +mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { switch (op) { - case MLXSW_REG_RALUE_OP_WRITE_DELETE: - return mlxsw_sp_fib_entry_offload_unset(fib_entry); case MLXSW_REG_RALUE_OP_WRITE_WRITE: - if (err) - return; - if (mlxsw_sp_fib_entry_should_offload(fib_entry)) - mlxsw_sp_fib_entry_offload_set(fib_entry); - else - mlxsw_sp_fib_entry_offload_unset(fib_entry); - return; + mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry); + break; + case MLXSW_REG_RALUE_OP_WRITE_DELETE: + mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry); + break; default: - return; + break; } } @@ -4416,7 +4444,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, { int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); - mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err); + if (err) + return err; + + mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op); return err; } @@ -4491,6 +4522,19 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, } } +static void +mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + switch (fib_entry->type) { + case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry); + break; + default: + break; + } +} + static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, @@ -4523,6 +4567,7 @@ mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, return fib4_entry; err_nexthop4_group_get: + mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib_entry); err_fib4_entry_type_set: kfree(fib4_entry); return ERR_PTR(err); @@ -4532,6 +4577,7 @@ static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib4_entry *fib4_entry) { mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, &fib4_entry->common); kfree(fib4_entry); } @@ -4555,15 +4601,14 @@ mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, if (!fib_node) return NULL; - list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { - if (fib4_entry->tb_id == fen_info->tb_id && - fib4_entry->tos == fen_info->tos && - fib4_entry->type == fen_info->type && - mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) == - fen_info->fi) { - return fib4_entry; - } - } + fib4_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib4_entry, common); + if (fib4_entry->tb_id == fen_info->tb_id && + fib4_entry->tos == fen_info->tos && + fib4_entry->type == fen_info->type && + mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) == + fen_info->fi) + return fib4_entry; return NULL; } @@ -4611,7 +4656,6 @@ mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr, if (!fib_node) return NULL; - INIT_LIST_HEAD(&fib_node->entry_list); list_add(&fib_node->list, &fib->node_list); memcpy(fib_node->key.addr, addr, addr_len); fib_node->key.prefix_len = prefix_len; @@ -4622,18 +4666,9 @@ mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr, static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node) { list_del(&fib_node->list); - WARN_ON(!list_empty(&fib_node->entry_list)); kfree(fib_node); } -static bool -mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib_entry *fib_entry) -{ - return list_first_entry(&fib_node->entry_list, - struct mlxsw_sp_fib_entry, list) == fib_entry; -} - static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { @@ -4773,200 +4808,48 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_vr *vr = fib_node->fib->vr; - if (!list_empty(&fib_node->entry_list)) + if (fib_node->fib_entry) return; mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node); mlxsw_sp_fib_node_destroy(fib_node); mlxsw_sp_vr_put(mlxsw_sp, vr); } -static struct mlxsw_sp_fib4_entry * -mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib4_entry *new4_entry) -{ - struct mlxsw_sp_fib4_entry *fib4_entry; - - list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { - if (fib4_entry->tb_id > new4_entry->tb_id) - continue; - if (fib4_entry->tb_id != new4_entry->tb_id) - break; - if (fib4_entry->tos > new4_entry->tos) - continue; - if (fib4_entry->prio >= new4_entry->prio || - fib4_entry->tos < new4_entry->tos) - return fib4_entry; - } - - return NULL; -} - -static int -mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry, - struct mlxsw_sp_fib4_entry *new4_entry) -{ - struct mlxsw_sp_fib_node *fib_node; - - if (WARN_ON(!fib4_entry)) - return -EINVAL; - - fib_node = fib4_entry->common.fib_node; - list_for_each_entry_from(fib4_entry, &fib_node->entry_list, - common.list) { - if (fib4_entry->tb_id != new4_entry->tb_id || - fib4_entry->tos != new4_entry->tos || - fib4_entry->prio != new4_entry->prio) - break; - } - - list_add_tail(&new4_entry->common.list, &fib4_entry->common.list); - return 0; -} - -static int -mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry, - bool replace, bool append) -{ - struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node; - struct mlxsw_sp_fib4_entry *fib4_entry; - - fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry); - - if (append) - return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry); - if (replace && WARN_ON(!fib4_entry)) - return -EINVAL; - - /* Insert new entry before replaced one, so that we can later - * remove the second. - */ - if (fib4_entry) { - list_add_tail(&new4_entry->common.list, - &fib4_entry->common.list); - } else { - struct mlxsw_sp_fib4_entry *last; - - list_for_each_entry(last, &fib_node->entry_list, common.list) { - if (new4_entry->tb_id > last->tb_id) - break; - fib4_entry = last; - } - - if (fib4_entry) - list_add(&new4_entry->common.list, - &fib4_entry->common.list); - else - list_add(&new4_entry->common.list, - &fib_node->entry_list); - } - - return 0; -} - -static void -mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry) -{ - list_del(&fib4_entry->common.list); -} - -static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) -{ - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - - if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) - return 0; - - /* To prevent packet loss, overwrite the previously offloaded - * entry. - */ - if (!list_is_singular(&fib_node->entry_list)) { - enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE; - struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list); - - mlxsw_sp_fib_entry_offload_refresh(n, op, 0); - } - - return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); -} - -static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp, +static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - - if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) - return; - - /* Promote the next entry by overwriting the deleted entry */ - if (!list_is_singular(&fib_node->entry_list)) { - struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list); - enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE; - - mlxsw_sp_fib_entry_update(mlxsw_sp, n); - mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0); - return; - } - - mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); -} - -static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib4_entry *fib4_entry, - bool replace, bool append) -{ int err; - err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append); - if (err) - return err; + fib_node->fib_entry = fib_entry; - err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common); + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); if (err) - goto err_fib_node_entry_add; + goto err_fib_entry_update; return 0; -err_fib_node_entry_add: - mlxsw_sp_fib4_node_list_remove(fib4_entry); +err_fib_entry_update: + fib_node->fib_entry = NULL; return err; } static void -mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib4_entry *fib4_entry) -{ - mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common); - mlxsw_sp_fib4_node_list_remove(fib4_entry); - - if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) - mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common); -} - -static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib4_entry *fib4_entry, - bool replace) +mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { - struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; - struct mlxsw_sp_fib4_entry *replaced; - - if (!replace) - return; - - /* We inserted the new entry before replaced one */ - replaced = list_next_entry(fib4_entry, common.list); + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced); - mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); + fib_node->fib_entry = NULL; } static int -mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info, - bool replace, bool append) +mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, + const struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp_fib4_entry *fib4_entry; + struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced; + struct mlxsw_sp_fib_entry *replaced; struct mlxsw_sp_fib_node *fib_node; int err; @@ -4989,18 +4872,26 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, goto err_fib4_entry_create; } - err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace, - append); + replaced = fib_node->fib_entry; + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); - goto err_fib4_node_entry_link; + goto err_fib_node_entry_link; } - mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace); + /* Nothing to replace */ + if (!replaced) + return 0; + + mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced); + fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry, + common); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced); return 0; -err_fib4_node_entry_link: +err_fib_node_entry_link: + fib_node->fib_entry = replaced; mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); err_fib4_entry_create: mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); @@ -5021,7 +4912,7 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, return; fib_node = fib4_entry->common.fib_node; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common); mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } @@ -5083,13 +4974,6 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) kfree(mlxsw_sp_rt6); } -static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt) -{ - /* RTF_CACHE routes are ignored */ - return !(rt->fib6_flags & RTF_ADDRCONF) && - rt->fib6_nh->fib_nh_gw_family; -} - static struct fib6_info * mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) { @@ -5097,37 +4981,6 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) list)->rt; } -static struct mlxsw_sp_fib6_entry * -mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct fib6_info *nrt, bool replace) -{ - struct mlxsw_sp_fib6_entry *fib6_entry; - - if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace) - return NULL; - - list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); - - /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same - * virtual router. - */ - if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id) - continue; - if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id) - break; - if (rt->fib6_metric < nrt->fib6_metric) - continue; - if (rt->fib6_metric == nrt->fib6_metric && - mlxsw_sp_fib6_rt_can_mp(rt)) - return fib6_entry; - if (rt->fib6_metric > nrt->fib6_metric) - break; - } - - return NULL; -} - static struct mlxsw_sp_rt6 * mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, const struct fib6_info *rt) @@ -5313,6 +5166,11 @@ static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp, &nh_grp->fib_list); fib6_entry->common.nh_group = nh_grp; + /* The route and the nexthop are described by the same struct, so we + * need to the update the nexthop offload indication for the new route. + */ + __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry); + return 0; } @@ -5345,16 +5203,16 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, * currently associated with it in the device's table is that * of the old group. Start using the new one instead. */ - err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); + err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common); if (err) - goto err_fib_node_entry_add; + goto err_fib_entry_update; if (list_empty(&old_nh_grp->fib_list)) mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp); return 0; -err_fib_node_entry_add: +err_fib_entry_update: mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); err_nexthop6_group_get: list_add_tail(&fib6_entry->common.nexthop_group_node, @@ -5519,112 +5377,13 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_fib6_entry * -mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct fib6_info *nrt, bool replace) -{ - struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL; - - list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); - - if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id) - continue; - if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id) - break; - if (replace && rt->fib6_metric == nrt->fib6_metric) { - if (mlxsw_sp_fib6_rt_can_mp(rt) == - mlxsw_sp_fib6_rt_can_mp(nrt)) - return fib6_entry; - if (mlxsw_sp_fib6_rt_can_mp(nrt)) - fallback = fallback ?: fib6_entry; - } - if (rt->fib6_metric > nrt->fib6_metric) - return fallback ?: fib6_entry; - } - - return fallback; -} - -static int -mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, - bool *p_replace) -{ - struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; - struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); - struct mlxsw_sp_fib6_entry *fib6_entry; - - fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace); - - if (*p_replace && !fib6_entry) - *p_replace = false; - - if (fib6_entry) { - list_add_tail(&new6_entry->common.list, - &fib6_entry->common.list); - } else { - struct mlxsw_sp_fib6_entry *last; - - list_for_each_entry(last, &fib_node->entry_list, common.list) { - struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last); - - if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id) - break; - fib6_entry = last; - } - - if (fib6_entry) - list_add(&new6_entry->common.list, - &fib6_entry->common.list); - else - list_add(&new6_entry->common.list, - &fib_node->entry_list); - } - - return 0; -} - -static void -mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry) -{ - list_del(&fib6_entry->common.list); -} - -static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry, - bool *p_replace) -{ - int err; - - err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace); - if (err) - return err; - - err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); - if (err) - goto err_fib_node_entry_add; - - return 0; - -err_fib_node_entry_add: - mlxsw_sp_fib6_node_list_remove(fib6_entry); - return err; -} - -static void -mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry) -{ - mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common); - mlxsw_sp_fib6_node_list_remove(fib6_entry); -} - -static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_fib *fib; + struct fib6_info *cmp_rt; struct mlxsw_sp_vr *vr; vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id); @@ -5638,40 +5397,23 @@ mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, if (!fib_node) return NULL; - list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); - - if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id && - rt->fib6_metric == iter_rt->fib6_metric && - mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt)) - return fib6_entry; - } + fib6_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, common); + cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id && + rt->fib6_metric == cmp_rt->fib6_metric && + mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt)) + return fib6_entry; return NULL; } -static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry, - bool replace) -{ - struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; - struct mlxsw_sp_fib6_entry *replaced; - - if (!replace) - return; - - replaced = list_next_entry(fib6_entry, common.list); - - mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced); - mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced); - mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); -} - -static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, - struct fib6_info **rt_arr, - unsigned int nrt6, bool replace) +static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) { - struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced; + struct mlxsw_sp_fib_entry *replaced; struct mlxsw_sp_fib_node *fib_node; struct fib6_info *rt = rt_arr[0]; int err; @@ -5693,18 +5435,6 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, if (IS_ERR(fib_node)) return PTR_ERR(fib_node); - /* Before creating a new entry, try to append route to an existing - * multipath entry. - */ - fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace); - if (fib6_entry) { - err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, - rt_arr, nrt6); - if (err) - goto err_fib6_entry_nexthop_add; - return 0; - } - fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr, nrt6); if (IS_ERR(fib6_entry)) { @@ -5712,17 +5442,70 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, goto err_fib6_entry_create; } - err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace); + replaced = fib_node->fib_entry; + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common); if (err) - goto err_fib6_node_entry_link; + goto err_fib_node_entry_link; + + /* Nothing to replace */ + if (!replaced) + return 0; - mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace); + mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced); + fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry, + common); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced); return 0; -err_fib6_node_entry_link: +err_fib_node_entry_link: + fib_node->fib_entry = replaced; mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); err_fib6_entry_create: + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; +} + +static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; + int err; + + if (mlxsw_sp->router->aborted) + return 0; + + if (rt->fib6_src.plen) + return -EINVAL; + + if (mlxsw_sp_fib6_rt_should_ignore(rt)) + return 0; + + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id, + &rt->fib6_dst.addr, + sizeof(rt->fib6_dst.addr), + rt->fib6_dst.plen, + MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(fib_node)) + return PTR_ERR(fib_node); + + if (WARN_ON_ONCE(!fib_node->fib_entry)) { + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return -EINVAL; + } + + fib6_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, common); + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr, + nrt6); + if (err) + goto err_fib6_entry_nexthop_add; + + return 0; + err_fib6_entry_nexthop_add: mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); return err; @@ -5762,7 +5545,7 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, fib_node = fib6_entry->common.fib_node; - mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common); mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } @@ -5916,39 +5699,25 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_fib4_entry *fib4_entry, *tmp; - - list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list, - common.list) { - bool do_break = &tmp->common.list == &fib_node->entry_list; + struct mlxsw_sp_fib4_entry *fib4_entry; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); - mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); - /* Break when entry list is empty and node was freed. - * Otherwise, we'll access freed memory in the next - * iteration. - */ - if (do_break) - break; - } + fib4_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib4_entry, common); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_fib6_entry *fib6_entry, *tmp; - - list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list, - common.list) { - bool do_break = &tmp->common.list == &fib_node->entry_list; + struct mlxsw_sp_fib6_entry *fib6_entry; - mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); - mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); - mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); - if (do_break) - break; - } + fib6_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, common); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, @@ -6099,7 +5868,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; - bool replace, append; int err; /* Protect internal structures from changes */ @@ -6107,13 +5875,9 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) mlxsw_sp_span_respin(mlxsw_sp); switch (fib_work->event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: - replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; - append = fib_work->event == FIB_EVENT_ENTRY_APPEND; - err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info, - replace, append); + case FIB_EVENT_ENTRY_REPLACE: + err = mlxsw_sp_router_fib4_replace(mlxsw_sp, + &fib_work->fen_info); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); fib_info_put(fib_work->fen_info.fi); @@ -6138,20 +5902,24 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; - bool replace; int err; rtnl_lock(); mlxsw_sp_span_respin(mlxsw_sp); switch (fib_work->event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_ADD: - replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; - err = mlxsw_sp_router_fib6_add(mlxsw_sp, - fib_work->fib6_work.rt_arr, - fib_work->fib6_work.nrt6, - replace); + case FIB_EVENT_ENTRY_REPLACE: + err = mlxsw_sp_router_fib6_replace(mlxsw_sp, + fib_work->fib6_work.rt_arr, + fib_work->fib6_work.nrt6); + if (err) + mlxsw_sp_router_fib_abort(mlxsw_sp); + mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); + break; + case FIB_EVENT_ENTRY_APPEND: + err = mlxsw_sp_router_fib6_append(mlxsw_sp, + fib_work->fib6_work.rt_arr, + fib_work->fib6_work.nrt6); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); @@ -6216,8 +5984,6 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: fen_info = container_of(info, struct fib_entry_notifier_info, info); @@ -6245,7 +6011,7 @@ static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_APPEND: /* fall through */ case FIB_EVENT_ENTRY_DEL: fen6_info = container_of(info, struct fib6_entry_notifier_info, info); @@ -6348,9 +6114,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, err = mlxsw_sp_router_fib_rule_event(event, info, router->mlxsw_sp); return notifier_from_errno(err); - case FIB_EVENT_ENTRY_ADD: + case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ + case FIB_EVENT_ENTRY_APPEND: if (router->aborted) { NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route"); return notifier_from_errno(-EINVAL); @@ -8025,8 +7791,18 @@ mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp) { + int err; + mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr; INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list); + + err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp); + if (err) + return err; + err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp); + if (err) + return err; + return mlxsw_sp_ipip_config_tigcr(mlxsw_sp); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index cc1de91e8217..c9b94f435cdd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -104,4 +104,7 @@ static inline bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1, return !memcmp(addr1, addr2, sizeof(*addr1)); } +int mlxsw_sp_ipip_ecn_encap_init(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp); + #endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index e0d7c49ffae0..60205aa3f6a5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -9,6 +9,20 @@ #include "reg.h" #include "spectrum.h" +/* All driver-specific traps must be documented in + * Documentation/networking/devlink/mlxsw.rst + */ +enum { + DEVLINK_MLXSW_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX, + DEVLINK_MLXSW_TRAP_ID_IRIF_DISABLED, + DEVLINK_MLXSW_TRAP_ID_ERIF_DISABLED, +}; + +#define DEVLINK_MLXSW_TRAP_NAME_IRIF_DISABLED \ + "irif_disabled" +#define DEVLINK_MLXSW_TRAP_NAME_ERIF_DISABLED \ + "erif_disabled" + #define MLXSW_SP_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, @@ -21,6 +35,12 @@ static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port, DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ MLXSW_SP_TRAP_METADATA) +#define MLXSW_SP_TRAP_DRIVER_DROP(_id, _group_id) \ + DEVLINK_TRAP_DRIVER(DROP, DROP, DEVLINK_MLXSW_TRAP_ID_##_id, \ + DEVLINK_MLXSW_TRAP_NAME_##_id, \ + DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + MLXSW_SP_TRAP_METADATA) + #define MLXSW_SP_TRAP_EXCEPTION(_id, _group_id) \ DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id, \ DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ @@ -58,6 +78,11 @@ static struct devlink_trap mlxsw_sp_traps_arr[] = { MLXSW_SP_TRAP_EXCEPTION(UNRESOLVED_NEIGH, L3_DROPS), MLXSW_SP_TRAP_EXCEPTION(IPV4_LPM_UNICAST_MISS, L3_DROPS), MLXSW_SP_TRAP_EXCEPTION(IPV6_LPM_UNICAST_MISS, L3_DROPS), + MLXSW_SP_TRAP_DRIVER_DROP(IRIF_DISABLED, L3_DROPS), + MLXSW_SP_TRAP_DRIVER_DROP(ERIF_DISABLED, L3_DROPS), + MLXSW_SP_TRAP_DROP(NON_ROUTABLE, L3_DROPS), + MLXSW_SP_TRAP_EXCEPTION(DECAP_ERROR, TUNNEL_DROPS), + MLXSW_SP_TRAP_DROP(OVERLAY_SMAC_MC, TUNNEL_DROPS), }; static struct mlxsw_listener mlxsw_sp_listeners_arr[] = { @@ -90,6 +115,15 @@ static struct mlxsw_listener mlxsw_sp_listeners_arr[] = { TRAP_EXCEPTION_TO_CPU), MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, ROUTER_EXP, TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_DISCARD(ROUTER_IRIF_EN, L3_DISCARDS), + MLXSW_SP_RXL_DISCARD(ROUTER_ERIF_EN, L3_DISCARDS), + MLXSW_SP_RXL_DISCARD(NON_ROUTABLE, L3_DISCARDS), + MLXSW_SP_RXL_EXCEPTION(DECAP_ECN0, ROUTER_EXP, TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(IPIP_DECAP_ERROR, ROUTER_EXP, + TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(DISCARD_DEC_PKT, TUNNEL_DISCARDS, + TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_DISCARD(OVERLAY_SMAC_MC, TUNNEL_DISCARDS), }; /* Mapping between hardware trap and devlink trap. Multiple hardware traps can @@ -123,6 +157,13 @@ static u16 mlxsw_sp_listener_devlink_map[] = { DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH, DEVLINK_TRAP_GENERIC_ID_IPV4_LPM_UNICAST_MISS, DEVLINK_TRAP_GENERIC_ID_IPV6_LPM_UNICAST_MISS, + DEVLINK_MLXSW_TRAP_ID_IRIF_DISABLED, + DEVLINK_MLXSW_TRAP_ID_ERIF_DISABLED, + DEVLINK_TRAP_GENERIC_ID_NON_ROUTABLE, + DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, + DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, + DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, + DEVLINK_TRAP_GENERIC_ID_OVERLAY_SMAC_MC, }; static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, @@ -304,8 +345,9 @@ mlxsw_sp_trap_group_policer_init(struct mlxsw_sp *mlxsw_sp, u32 rate; switch (group->id) { - case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS:/* fall through */ - case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS: + case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS: /* fall through */ + case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS: /* fall through */ + case DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS: policer_id = MLXSW_SP_DISCARD_POLICER_ID; ir_units = MLXSW_REG_QPCR_IR_UNITS_M; is_bytes = false; @@ -342,6 +384,12 @@ __mlxsw_sp_trap_group_init(struct mlxsw_sp *mlxsw_sp, priority = 0; tc = 1; break; + case DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS: + group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS; + policer_id = MLXSW_SP_DISCARD_POLICER_ID; + priority = 0; + tc = 1; + break; default: return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 0c1c142bb6b0..12e1fa998d42 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -67,6 +67,7 @@ enum { MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD, MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, + MLXSW_TRAP_ID_DISCARD_NON_ROUTABLE = 0x11A, MLXSW_TRAP_ID_DISCARD_ROUTER2 = 0x130, MLXSW_TRAP_ID_DISCARD_ROUTER3 = 0x131, MLXSW_TRAP_ID_DISCARD_ING_PACKET_SMAC_MC = 0x140, @@ -80,12 +81,20 @@ enum { MLXSW_TRAP_ID_DISCARD_ING_ROUTER_UC_DIP_MC_DMAC = 0x161, MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LB = 0x162, MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_MC = 0x163, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_CLASS_E = 0x164, MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_LB = 0x165, MLXSW_TRAP_ID_DISCARD_ING_ROUTER_CORRUPTED_IP_HDR = 0x167, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_MC_DMAC = 0x168, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_DIP = 0x169, MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_SIP_BC = 0x16A, MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_DIP_LOCAL_NET = 0x16B, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LINK_LOCAL = 0x16C, + MLXSW_TRAP_ID_DISCARD_ROUTER_IRIF_EN = 0x178, + MLXSW_TRAP_ID_DISCARD_ROUTER_ERIF_EN = 0x179, MLXSW_TRAP_ID_DISCARD_ROUTER_LPM4 = 0x17B, MLXSW_TRAP_ID_DISCARD_ROUTER_LPM6 = 0x17C, + MLXSW_TRAP_ID_DISCARD_DEC_PKT = 0x188, + MLXSW_TRAP_ID_DISCARD_OVERLAY_SMAC_MC = 0x190, MLXSW_TRAP_ID_DISCARD_IPV6_MC_DIP_RESERVED_SCOPE = 0x1B0, MLXSW_TRAP_ID_DISCARD_IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE = 0x1B1, MLXSW_TRAP_ID_ACL0 = 0x1C0, diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c index da329ca115cc..f3f6dfe3eddc 100644 --- a/drivers/net/ethernet/micrel/ks8842.c +++ b/drivers/net/ethernet/micrel/ks8842.c @@ -1103,7 +1103,7 @@ static void ks8842_tx_timeout_work(struct work_struct *work) __ks8842_start_new_rx_dma(netdev); } -static void ks8842_tx_timeout(struct net_device *netdev) +static void ks8842_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ks8842_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index e102e1560ac7..d1444ba36e10 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -4896,7 +4896,7 @@ unlock: * triggered to free up resources so that the transmit routine can continue * sending out packets. The hardware is reset to correct the problem. */ -static void netdev_tx_timeout(struct net_device *dev) +static void netdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { static unsigned long last_reset; diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 0567e4f387a5..09cdc2f2e7ff 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -1325,7 +1325,7 @@ static irqreturn_t enc28j60_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static void enc28j60_tx_timeout(struct net_device *ndev) +static void enc28j60_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct enc28j60_net *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index 52c41d11f565..39925e4bf2ec 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -892,7 +892,7 @@ static netdev_tx_t encx24j600_tx(struct sk_buff *skb, struct net_device *dev) } /* Deal with a transmit timeout */ -static void encx24j600_tx_timeout(struct net_device *dev) +static void encx24j600_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct encx24j600_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c index afe52463dc57..9399f6a98748 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.c +++ b/drivers/net/ethernet/microchip/lan743x_ptp.c @@ -1265,6 +1265,9 @@ int lan743x_ptp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) lan743x_ptp_set_sync_ts_insert(adapter, true); break; + case HWTSTAMP_TX_ONESTEP_P2P: + ret = -ERANGE; + break; default: netif_warn(adapter, drv, adapter->netdev, " tx_type = %d, UNKNOWN\n", config.tx_type); diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 985b46d7e3d1..86d543ab1ab9 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -500,13 +500,14 @@ EXPORT_SYMBOL(ocelot_port_enable); static int ocelot_port_open(struct net_device *dev) { struct ocelot_port_private *priv = netdev_priv(dev); - struct ocelot *ocelot = priv->port.ocelot; + struct ocelot_port *ocelot_port = &priv->port; + struct ocelot *ocelot = ocelot_port->ocelot; int port = priv->chip_port; int err; if (priv->serdes) { err = phy_set_mode_ext(priv->serdes, PHY_MODE_ETHERNET, - priv->phy_mode); + ocelot_port->phy_mode); if (err) { netdev_err(dev, "Could not set mode of SerDes\n"); return err; @@ -514,7 +515,7 @@ static int ocelot_port_open(struct net_device *dev) } err = phy_connect_direct(dev, priv->phy, &ocelot_port_adjust_link, - priv->phy_mode); + ocelot_port->phy_mode); if (err) { netdev_err(dev, "Could not attach to PHY\n"); return err; diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index c259114c48fd..04372ba72fec 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -18,11 +18,11 @@ #include <linux/ptp_clock_kernel.h> #include <linux/regmap.h> +#include <soc/mscc/ocelot_qsys.h> #include <soc/mscc/ocelot_sys.h> +#include <soc/mscc/ocelot_dev.h> +#include <soc/mscc/ocelot_ana.h> #include <soc/mscc/ocelot.h> -#include "ocelot_ana.h" -#include "ocelot_dev.h" -#include "ocelot_qsys.h" #include "ocelot_rew.h" #include "ocelot_qs.h" #include "ocelot_tc.h" @@ -68,7 +68,6 @@ struct ocelot_port_private { u8 vlan_aware; - phy_interface_t phy_mode; struct phy *serdes; struct ocelot_port_tc tc; diff --git a/drivers/net/ethernet/mscc/ocelot_ana.h b/drivers/net/ethernet/mscc/ocelot_ana.h deleted file mode 100644 index 841c6ec22b64..000000000000 --- a/drivers/net/ethernet/mscc/ocelot_ana.h +++ /dev/null @@ -1,625 +0,0 @@ -/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ -/* - * Microsemi Ocelot Switch driver - * - * Copyright (c) 2017 Microsemi Corporation - */ - -#ifndef _MSCC_OCELOT_ANA_H_ -#define _MSCC_OCELOT_ANA_H_ - -#define ANA_ANAGEFIL_B_DOM_EN BIT(22) -#define ANA_ANAGEFIL_B_DOM_VAL BIT(21) -#define ANA_ANAGEFIL_AGE_LOCKED BIT(20) -#define ANA_ANAGEFIL_PID_EN BIT(19) -#define ANA_ANAGEFIL_PID_VAL(x) (((x) << 14) & GENMASK(18, 14)) -#define ANA_ANAGEFIL_PID_VAL_M GENMASK(18, 14) -#define ANA_ANAGEFIL_PID_VAL_X(x) (((x) & GENMASK(18, 14)) >> 14) -#define ANA_ANAGEFIL_VID_EN BIT(13) -#define ANA_ANAGEFIL_VID_VAL(x) ((x) & GENMASK(12, 0)) -#define ANA_ANAGEFIL_VID_VAL_M GENMASK(12, 0) - -#define ANA_STORMLIMIT_CFG_RSZ 0x4 - -#define ANA_STORMLIMIT_CFG_STORM_RATE(x) (((x) << 3) & GENMASK(6, 3)) -#define ANA_STORMLIMIT_CFG_STORM_RATE_M GENMASK(6, 3) -#define ANA_STORMLIMIT_CFG_STORM_RATE_X(x) (((x) & GENMASK(6, 3)) >> 3) -#define ANA_STORMLIMIT_CFG_STORM_UNIT BIT(2) -#define ANA_STORMLIMIT_CFG_STORM_MODE(x) ((x) & GENMASK(1, 0)) -#define ANA_STORMLIMIT_CFG_STORM_MODE_M GENMASK(1, 0) - -#define ANA_AUTOAGE_AGE_FAST BIT(21) -#define ANA_AUTOAGE_AGE_PERIOD(x) (((x) << 1) & GENMASK(20, 1)) -#define ANA_AUTOAGE_AGE_PERIOD_M GENMASK(20, 1) -#define ANA_AUTOAGE_AGE_PERIOD_X(x) (((x) & GENMASK(20, 1)) >> 1) -#define ANA_AUTOAGE_AUTOAGE_LOCKED BIT(0) - -#define ANA_MACTOPTIONS_REDUCED_TABLE BIT(1) -#define ANA_MACTOPTIONS_SHADOW BIT(0) - -#define ANA_AGENCTRL_FID_MASK(x) (((x) << 12) & GENMASK(23, 12)) -#define ANA_AGENCTRL_FID_MASK_M GENMASK(23, 12) -#define ANA_AGENCTRL_FID_MASK_X(x) (((x) & GENMASK(23, 12)) >> 12) -#define ANA_AGENCTRL_IGNORE_DMAC_FLAGS BIT(11) -#define ANA_AGENCTRL_IGNORE_SMAC_FLAGS BIT(10) -#define ANA_AGENCTRL_FLOOD_SPECIAL BIT(9) -#define ANA_AGENCTRL_FLOOD_IGNORE_VLAN BIT(8) -#define ANA_AGENCTRL_MIRROR_CPU BIT(7) -#define ANA_AGENCTRL_LEARN_CPU_COPY BIT(6) -#define ANA_AGENCTRL_LEARN_FWD_KILL BIT(5) -#define ANA_AGENCTRL_LEARN_IGNORE_VLAN BIT(4) -#define ANA_AGENCTRL_CPU_CPU_KILL_ENA BIT(3) -#define ANA_AGENCTRL_GREEN_COUNT_MODE BIT(2) -#define ANA_AGENCTRL_YELLOW_COUNT_MODE BIT(1) -#define ANA_AGENCTRL_RED_COUNT_MODE BIT(0) - -#define ANA_FLOODING_RSZ 0x4 - -#define ANA_FLOODING_FLD_UNICAST(x) (((x) << 12) & GENMASK(17, 12)) -#define ANA_FLOODING_FLD_UNICAST_M GENMASK(17, 12) -#define ANA_FLOODING_FLD_UNICAST_X(x) (((x) & GENMASK(17, 12)) >> 12) -#define ANA_FLOODING_FLD_BROADCAST(x) (((x) << 6) & GENMASK(11, 6)) -#define ANA_FLOODING_FLD_BROADCAST_M GENMASK(11, 6) -#define ANA_FLOODING_FLD_BROADCAST_X(x) (((x) & GENMASK(11, 6)) >> 6) -#define ANA_FLOODING_FLD_MULTICAST(x) ((x) & GENMASK(5, 0)) -#define ANA_FLOODING_FLD_MULTICAST_M GENMASK(5, 0) - -#define ANA_FLOODING_IPMC_FLD_MC4_CTRL(x) (((x) << 18) & GENMASK(23, 18)) -#define ANA_FLOODING_IPMC_FLD_MC4_CTRL_M GENMASK(23, 18) -#define ANA_FLOODING_IPMC_FLD_MC4_CTRL_X(x) (((x) & GENMASK(23, 18)) >> 18) -#define ANA_FLOODING_IPMC_FLD_MC4_DATA(x) (((x) << 12) & GENMASK(17, 12)) -#define ANA_FLOODING_IPMC_FLD_MC4_DATA_M GENMASK(17, 12) -#define ANA_FLOODING_IPMC_FLD_MC4_DATA_X(x) (((x) & GENMASK(17, 12)) >> 12) -#define ANA_FLOODING_IPMC_FLD_MC6_CTRL(x) (((x) << 6) & GENMASK(11, 6)) -#define ANA_FLOODING_IPMC_FLD_MC6_CTRL_M GENMASK(11, 6) -#define ANA_FLOODING_IPMC_FLD_MC6_CTRL_X(x) (((x) & GENMASK(11, 6)) >> 6) -#define ANA_FLOODING_IPMC_FLD_MC6_DATA(x) ((x) & GENMASK(5, 0)) -#define ANA_FLOODING_IPMC_FLD_MC6_DATA_M GENMASK(5, 0) - -#define ANA_SFLOW_CFG_RSZ 0x4 - -#define ANA_SFLOW_CFG_SF_RATE(x) (((x) << 2) & GENMASK(13, 2)) -#define ANA_SFLOW_CFG_SF_RATE_M GENMASK(13, 2) -#define ANA_SFLOW_CFG_SF_RATE_X(x) (((x) & GENMASK(13, 2)) >> 2) -#define ANA_SFLOW_CFG_SF_SAMPLE_RX BIT(1) -#define ANA_SFLOW_CFG_SF_SAMPLE_TX BIT(0) - -#define ANA_PORT_MODE_RSZ 0x4 - -#define ANA_PORT_MODE_REDTAG_PARSE_CFG BIT(3) -#define ANA_PORT_MODE_VLAN_PARSE_CFG(x) (((x) << 1) & GENMASK(2, 1)) -#define ANA_PORT_MODE_VLAN_PARSE_CFG_M GENMASK(2, 1) -#define ANA_PORT_MODE_VLAN_PARSE_CFG_X(x) (((x) & GENMASK(2, 1)) >> 1) -#define ANA_PORT_MODE_L3_PARSE_CFG BIT(0) - -#define ANA_CUT_THRU_CFG_RSZ 0x4 - -#define ANA_PGID_PGID_RSZ 0x4 - -#define ANA_PGID_PGID_PGID(x) ((x) & GENMASK(11, 0)) -#define ANA_PGID_PGID_PGID_M GENMASK(11, 0) -#define ANA_PGID_PGID_CPUQ_DST_PGID(x) (((x) << 27) & GENMASK(29, 27)) -#define ANA_PGID_PGID_CPUQ_DST_PGID_M GENMASK(29, 27) -#define ANA_PGID_PGID_CPUQ_DST_PGID_X(x) (((x) & GENMASK(29, 27)) >> 27) - -#define ANA_TABLES_MACHDATA_VID(x) (((x) << 16) & GENMASK(28, 16)) -#define ANA_TABLES_MACHDATA_VID_M GENMASK(28, 16) -#define ANA_TABLES_MACHDATA_VID_X(x) (((x) & GENMASK(28, 16)) >> 16) -#define ANA_TABLES_MACHDATA_MACHDATA(x) ((x) & GENMASK(15, 0)) -#define ANA_TABLES_MACHDATA_MACHDATA_M GENMASK(15, 0) - -#define ANA_TABLES_STREAMDATA_SSID_VALID BIT(16) -#define ANA_TABLES_STREAMDATA_SSID(x) (((x) << 9) & GENMASK(15, 9)) -#define ANA_TABLES_STREAMDATA_SSID_M GENMASK(15, 9) -#define ANA_TABLES_STREAMDATA_SSID_X(x) (((x) & GENMASK(15, 9)) >> 9) -#define ANA_TABLES_STREAMDATA_SFID_VALID BIT(8) -#define ANA_TABLES_STREAMDATA_SFID(x) ((x) & GENMASK(7, 0)) -#define ANA_TABLES_STREAMDATA_SFID_M GENMASK(7, 0) - -#define ANA_TABLES_MACACCESS_MAC_CPU_COPY BIT(15) -#define ANA_TABLES_MACACCESS_SRC_KILL BIT(14) -#define ANA_TABLES_MACACCESS_IGNORE_VLAN BIT(13) -#define ANA_TABLES_MACACCESS_AGED_FLAG BIT(12) -#define ANA_TABLES_MACACCESS_VALID BIT(11) -#define ANA_TABLES_MACACCESS_ENTRYTYPE(x) (((x) << 9) & GENMASK(10, 9)) -#define ANA_TABLES_MACACCESS_ENTRYTYPE_M GENMASK(10, 9) -#define ANA_TABLES_MACACCESS_ENTRYTYPE_X(x) (((x) & GENMASK(10, 9)) >> 9) -#define ANA_TABLES_MACACCESS_DEST_IDX(x) (((x) << 3) & GENMASK(8, 3)) -#define ANA_TABLES_MACACCESS_DEST_IDX_M GENMASK(8, 3) -#define ANA_TABLES_MACACCESS_DEST_IDX_X(x) (((x) & GENMASK(8, 3)) >> 3) -#define ANA_TABLES_MACACCESS_MAC_TABLE_CMD(x) ((x) & GENMASK(2, 0)) -#define ANA_TABLES_MACACCESS_MAC_TABLE_CMD_M GENMASK(2, 0) -#define MACACCESS_CMD_IDLE 0 -#define MACACCESS_CMD_LEARN 1 -#define MACACCESS_CMD_FORGET 2 -#define MACACCESS_CMD_AGE 3 -#define MACACCESS_CMD_GET_NEXT 4 -#define MACACCESS_CMD_INIT 5 -#define MACACCESS_CMD_READ 6 -#define MACACCESS_CMD_WRITE 7 - -#define ANA_TABLES_VLANACCESS_VLAN_PORT_MASK(x) (((x) << 2) & GENMASK(13, 2)) -#define ANA_TABLES_VLANACCESS_VLAN_PORT_MASK_M GENMASK(13, 2) -#define ANA_TABLES_VLANACCESS_VLAN_PORT_MASK_X(x) (((x) & GENMASK(13, 2)) >> 2) -#define ANA_TABLES_VLANACCESS_VLAN_TBL_CMD(x) ((x) & GENMASK(1, 0)) -#define ANA_TABLES_VLANACCESS_VLAN_TBL_CMD_M GENMASK(1, 0) -#define ANA_TABLES_VLANACCESS_CMD_IDLE 0x0 -#define ANA_TABLES_VLANACCESS_CMD_WRITE 0x2 -#define ANA_TABLES_VLANACCESS_CMD_INIT 0x3 - -#define ANA_TABLES_VLANTIDX_VLAN_SEC_FWD_ENA BIT(17) -#define ANA_TABLES_VLANTIDX_VLAN_FLOOD_DIS BIT(16) -#define ANA_TABLES_VLANTIDX_VLAN_PRIV_VLAN BIT(15) -#define ANA_TABLES_VLANTIDX_VLAN_LEARN_DISABLED BIT(14) -#define ANA_TABLES_VLANTIDX_VLAN_MIRROR BIT(13) -#define ANA_TABLES_VLANTIDX_VLAN_SRC_CHK BIT(12) -#define ANA_TABLES_VLANTIDX_V_INDEX(x) ((x) & GENMASK(11, 0)) -#define ANA_TABLES_VLANTIDX_V_INDEX_M GENMASK(11, 0) - -#define ANA_TABLES_ISDXACCESS_ISDX_PORT_MASK(x) (((x) << 2) & GENMASK(8, 2)) -#define ANA_TABLES_ISDXACCESS_ISDX_PORT_MASK_M GENMASK(8, 2) -#define ANA_TABLES_ISDXACCESS_ISDX_PORT_MASK_X(x) (((x) & GENMASK(8, 2)) >> 2) -#define ANA_TABLES_ISDXACCESS_ISDX_TBL_CMD(x) ((x) & GENMASK(1, 0)) -#define ANA_TABLES_ISDXACCESS_ISDX_TBL_CMD_M GENMASK(1, 0) - -#define ANA_TABLES_ISDXTIDX_ISDX_SDLBI(x) (((x) << 21) & GENMASK(28, 21)) -#define ANA_TABLES_ISDXTIDX_ISDX_SDLBI_M GENMASK(28, 21) -#define ANA_TABLES_ISDXTIDX_ISDX_SDLBI_X(x) (((x) & GENMASK(28, 21)) >> 21) -#define ANA_TABLES_ISDXTIDX_ISDX_MSTI(x) (((x) << 15) & GENMASK(20, 15)) -#define ANA_TABLES_ISDXTIDX_ISDX_MSTI_M GENMASK(20, 15) -#define ANA_TABLES_ISDXTIDX_ISDX_MSTI_X(x) (((x) & GENMASK(20, 15)) >> 15) -#define ANA_TABLES_ISDXTIDX_ISDX_ES0_KEY_ENA BIT(14) -#define ANA_TABLES_ISDXTIDX_ISDX_FORCE_ENA BIT(10) -#define ANA_TABLES_ISDXTIDX_ISDX_INDEX(x) ((x) & GENMASK(7, 0)) -#define ANA_TABLES_ISDXTIDX_ISDX_INDEX_M GENMASK(7, 0) - -#define ANA_TABLES_ENTRYLIM_RSZ 0x4 - -#define ANA_TABLES_ENTRYLIM_ENTRYLIM(x) (((x) << 14) & GENMASK(17, 14)) -#define ANA_TABLES_ENTRYLIM_ENTRYLIM_M GENMASK(17, 14) -#define ANA_TABLES_ENTRYLIM_ENTRYLIM_X(x) (((x) & GENMASK(17, 14)) >> 14) -#define ANA_TABLES_ENTRYLIM_ENTRYSTAT(x) ((x) & GENMASK(13, 0)) -#define ANA_TABLES_ENTRYLIM_ENTRYSTAT_M GENMASK(13, 0) - -#define ANA_TABLES_STREAMACCESS_GEN_REC_SEQ_NUM(x) (((x) << 4) & GENMASK(31, 4)) -#define ANA_TABLES_STREAMACCESS_GEN_REC_SEQ_NUM_M GENMASK(31, 4) -#define ANA_TABLES_STREAMACCESS_GEN_REC_SEQ_NUM_X(x) (((x) & GENMASK(31, 4)) >> 4) -#define ANA_TABLES_STREAMACCESS_SEQ_GEN_REC_ENA BIT(3) -#define ANA_TABLES_STREAMACCESS_GEN_REC_TYPE BIT(2) -#define ANA_TABLES_STREAMACCESS_STREAM_TBL_CMD(x) ((x) & GENMASK(1, 0)) -#define ANA_TABLES_STREAMACCESS_STREAM_TBL_CMD_M GENMASK(1, 0) - -#define ANA_TABLES_STREAMTIDX_SEQ_GEN_ERR_STATUS(x) (((x) << 30) & GENMASK(31, 30)) -#define ANA_TABLES_STREAMTIDX_SEQ_GEN_ERR_STATUS_M GENMASK(31, 30) -#define ANA_TABLES_STREAMTIDX_SEQ_GEN_ERR_STATUS_X(x) (((x) & GENMASK(31, 30)) >> 30) -#define ANA_TABLES_STREAMTIDX_S_INDEX(x) (((x) << 16) & GENMASK(22, 16)) -#define ANA_TABLES_STREAMTIDX_S_INDEX_M GENMASK(22, 16) -#define ANA_TABLES_STREAMTIDX_S_INDEX_X(x) (((x) & GENMASK(22, 16)) >> 16) -#define ANA_TABLES_STREAMTIDX_FORCE_SF_BEHAVIOUR BIT(14) -#define ANA_TABLES_STREAMTIDX_SEQ_HISTORY_LEN(x) (((x) << 8) & GENMASK(13, 8)) -#define ANA_TABLES_STREAMTIDX_SEQ_HISTORY_LEN_M GENMASK(13, 8) -#define ANA_TABLES_STREAMTIDX_SEQ_HISTORY_LEN_X(x) (((x) & GENMASK(13, 8)) >> 8) -#define ANA_TABLES_STREAMTIDX_RESET_ON_ROGUE BIT(7) -#define ANA_TABLES_STREAMTIDX_REDTAG_POP BIT(6) -#define ANA_TABLES_STREAMTIDX_STREAM_SPLIT BIT(5) -#define ANA_TABLES_STREAMTIDX_SEQ_SPACE_LOG2(x) ((x) & GENMASK(4, 0)) -#define ANA_TABLES_STREAMTIDX_SEQ_SPACE_LOG2_M GENMASK(4, 0) - -#define ANA_TABLES_SEQ_MASK_SPLIT_MASK(x) (((x) << 16) & GENMASK(22, 16)) -#define ANA_TABLES_SEQ_MASK_SPLIT_MASK_M GENMASK(22, 16) -#define ANA_TABLES_SEQ_MASK_SPLIT_MASK_X(x) (((x) & GENMASK(22, 16)) >> 16) -#define ANA_TABLES_SEQ_MASK_INPUT_PORT_MASK(x) ((x) & GENMASK(6, 0)) -#define ANA_TABLES_SEQ_MASK_INPUT_PORT_MASK_M GENMASK(6, 0) - -#define ANA_TABLES_SFID_MASK_IGR_PORT_MASK(x) (((x) << 1) & GENMASK(7, 1)) -#define ANA_TABLES_SFID_MASK_IGR_PORT_MASK_M GENMASK(7, 1) -#define ANA_TABLES_SFID_MASK_IGR_PORT_MASK_X(x) (((x) & GENMASK(7, 1)) >> 1) -#define ANA_TABLES_SFID_MASK_IGR_SRCPORT_MATCH_ENA BIT(0) - -#define ANA_TABLES_SFIDACCESS_IGR_PRIO_MATCH_ENA BIT(22) -#define ANA_TABLES_SFIDACCESS_IGR_PRIO(x) (((x) << 19) & GENMASK(21, 19)) -#define ANA_TABLES_SFIDACCESS_IGR_PRIO_M GENMASK(21, 19) -#define ANA_TABLES_SFIDACCESS_IGR_PRIO_X(x) (((x) & GENMASK(21, 19)) >> 19) -#define ANA_TABLES_SFIDACCESS_FORCE_BLOCK BIT(18) -#define ANA_TABLES_SFIDACCESS_MAX_SDU_LEN(x) (((x) << 2) & GENMASK(17, 2)) -#define ANA_TABLES_SFIDACCESS_MAX_SDU_LEN_M GENMASK(17, 2) -#define ANA_TABLES_SFIDACCESS_MAX_SDU_LEN_X(x) (((x) & GENMASK(17, 2)) >> 2) -#define ANA_TABLES_SFIDACCESS_SFID_TBL_CMD(x) ((x) & GENMASK(1, 0)) -#define ANA_TABLES_SFIDACCESS_SFID_TBL_CMD_M GENMASK(1, 0) - -#define ANA_TABLES_SFIDTIDX_SGID_VALID BIT(26) -#define ANA_TABLES_SFIDTIDX_SGID(x) (((x) << 18) & GENMASK(25, 18)) -#define ANA_TABLES_SFIDTIDX_SGID_M GENMASK(25, 18) -#define ANA_TABLES_SFIDTIDX_SGID_X(x) (((x) & GENMASK(25, 18)) >> 18) -#define ANA_TABLES_SFIDTIDX_POL_ENA BIT(17) -#define ANA_TABLES_SFIDTIDX_POL_IDX(x) (((x) << 8) & GENMASK(16, 8)) -#define ANA_TABLES_SFIDTIDX_POL_IDX_M GENMASK(16, 8) -#define ANA_TABLES_SFIDTIDX_POL_IDX_X(x) (((x) & GENMASK(16, 8)) >> 8) -#define ANA_TABLES_SFIDTIDX_SFID_INDEX(x) ((x) & GENMASK(7, 0)) -#define ANA_TABLES_SFIDTIDX_SFID_INDEX_M GENMASK(7, 0) - -#define ANA_MSTI_STATE_RSZ 0x4 - -#define ANA_OAM_UPM_LM_CNT_RSZ 0x4 - -#define ANA_SG_ACCESS_CTRL_SGID(x) ((x) & GENMASK(7, 0)) -#define ANA_SG_ACCESS_CTRL_SGID_M GENMASK(7, 0) -#define ANA_SG_ACCESS_CTRL_CONFIG_CHANGE BIT(28) - -#define ANA_SG_CONFIG_REG_3_BASE_TIME_SEC_MSB(x) ((x) & GENMASK(15, 0)) -#define ANA_SG_CONFIG_REG_3_BASE_TIME_SEC_MSB_M GENMASK(15, 0) -#define ANA_SG_CONFIG_REG_3_LIST_LENGTH(x) (((x) << 16) & GENMASK(18, 16)) -#define ANA_SG_CONFIG_REG_3_LIST_LENGTH_M GENMASK(18, 16) -#define ANA_SG_CONFIG_REG_3_LIST_LENGTH_X(x) (((x) & GENMASK(18, 16)) >> 16) -#define ANA_SG_CONFIG_REG_3_GATE_ENABLE BIT(20) -#define ANA_SG_CONFIG_REG_3_INIT_IPS(x) (((x) << 24) & GENMASK(27, 24)) -#define ANA_SG_CONFIG_REG_3_INIT_IPS_M GENMASK(27, 24) -#define ANA_SG_CONFIG_REG_3_INIT_IPS_X(x) (((x) & GENMASK(27, 24)) >> 24) -#define ANA_SG_CONFIG_REG_3_INIT_GATE_STATE BIT(28) - -#define ANA_SG_GCL_GS_CONFIG_RSZ 0x4 - -#define ANA_SG_GCL_GS_CONFIG_IPS(x) ((x) & GENMASK(3, 0)) -#define ANA_SG_GCL_GS_CONFIG_IPS_M GENMASK(3, 0) -#define ANA_SG_GCL_GS_CONFIG_GATE_STATE BIT(4) - -#define ANA_SG_GCL_TI_CONFIG_RSZ 0x4 - -#define ANA_SG_STATUS_REG_3_CFG_CHG_TIME_SEC_MSB(x) ((x) & GENMASK(15, 0)) -#define ANA_SG_STATUS_REG_3_CFG_CHG_TIME_SEC_MSB_M GENMASK(15, 0) -#define ANA_SG_STATUS_REG_3_GATE_STATE BIT(16) -#define ANA_SG_STATUS_REG_3_IPS(x) (((x) << 20) & GENMASK(23, 20)) -#define ANA_SG_STATUS_REG_3_IPS_M GENMASK(23, 20) -#define ANA_SG_STATUS_REG_3_IPS_X(x) (((x) & GENMASK(23, 20)) >> 20) -#define ANA_SG_STATUS_REG_3_CONFIG_PENDING BIT(24) - -#define ANA_PORT_VLAN_CFG_GSZ 0x100 - -#define ANA_PORT_VLAN_CFG_VLAN_VID_AS_ISDX BIT(21) -#define ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA BIT(20) -#define ANA_PORT_VLAN_CFG_VLAN_POP_CNT(x) (((x) << 18) & GENMASK(19, 18)) -#define ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M GENMASK(19, 18) -#define ANA_PORT_VLAN_CFG_VLAN_POP_CNT_X(x) (((x) & GENMASK(19, 18)) >> 18) -#define ANA_PORT_VLAN_CFG_VLAN_INNER_TAG_ENA BIT(17) -#define ANA_PORT_VLAN_CFG_VLAN_TAG_TYPE BIT(16) -#define ANA_PORT_VLAN_CFG_VLAN_DEI BIT(15) -#define ANA_PORT_VLAN_CFG_VLAN_PCP(x) (((x) << 12) & GENMASK(14, 12)) -#define ANA_PORT_VLAN_CFG_VLAN_PCP_M GENMASK(14, 12) -#define ANA_PORT_VLAN_CFG_VLAN_PCP_X(x) (((x) & GENMASK(14, 12)) >> 12) -#define ANA_PORT_VLAN_CFG_VLAN_VID(x) ((x) & GENMASK(11, 0)) -#define ANA_PORT_VLAN_CFG_VLAN_VID_M GENMASK(11, 0) - -#define ANA_PORT_DROP_CFG_GSZ 0x100 - -#define ANA_PORT_DROP_CFG_DROP_UNTAGGED_ENA BIT(6) -#define ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA BIT(5) -#define ANA_PORT_DROP_CFG_DROP_C_TAGGED_ENA BIT(4) -#define ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA BIT(3) -#define ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA BIT(2) -#define ANA_PORT_DROP_CFG_DROP_NULL_MAC_ENA BIT(1) -#define ANA_PORT_DROP_CFG_DROP_MC_SMAC_ENA BIT(0) - -#define ANA_PORT_QOS_CFG_GSZ 0x100 - -#define ANA_PORT_QOS_CFG_DP_DEFAULT_VAL BIT(8) -#define ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL(x) (((x) << 5) & GENMASK(7, 5)) -#define ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_M GENMASK(7, 5) -#define ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_X(x) (((x) & GENMASK(7, 5)) >> 5) -#define ANA_PORT_QOS_CFG_QOS_DSCP_ENA BIT(4) -#define ANA_PORT_QOS_CFG_QOS_PCP_ENA BIT(3) -#define ANA_PORT_QOS_CFG_DSCP_TRANSLATE_ENA BIT(2) -#define ANA_PORT_QOS_CFG_DSCP_REWR_CFG(x) ((x) & GENMASK(1, 0)) -#define ANA_PORT_QOS_CFG_DSCP_REWR_CFG_M GENMASK(1, 0) - -#define ANA_PORT_VCAP_CFG_GSZ 0x100 - -#define ANA_PORT_VCAP_CFG_S1_ENA BIT(14) -#define ANA_PORT_VCAP_CFG_S1_DMAC_DIP_ENA(x) (((x) << 11) & GENMASK(13, 11)) -#define ANA_PORT_VCAP_CFG_S1_DMAC_DIP_ENA_M GENMASK(13, 11) -#define ANA_PORT_VCAP_CFG_S1_DMAC_DIP_ENA_X(x) (((x) & GENMASK(13, 11)) >> 11) -#define ANA_PORT_VCAP_CFG_S1_VLAN_INNER_TAG_ENA(x) (((x) << 8) & GENMASK(10, 8)) -#define ANA_PORT_VCAP_CFG_S1_VLAN_INNER_TAG_ENA_M GENMASK(10, 8) -#define ANA_PORT_VCAP_CFG_S1_VLAN_INNER_TAG_ENA_X(x) (((x) & GENMASK(10, 8)) >> 8) -#define ANA_PORT_VCAP_CFG_PAG_VAL(x) ((x) & GENMASK(7, 0)) -#define ANA_PORT_VCAP_CFG_PAG_VAL_M GENMASK(7, 0) - -#define ANA_PORT_VCAP_S1_KEY_CFG_GSZ 0x100 -#define ANA_PORT_VCAP_S1_KEY_CFG_RSZ 0x4 - -#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP6_CFG(x) (((x) << 4) & GENMASK(6, 4)) -#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP6_CFG_M GENMASK(6, 4) -#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP6_CFG_X(x) (((x) & GENMASK(6, 4)) >> 4) -#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP4_CFG(x) (((x) << 2) & GENMASK(3, 2)) -#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP4_CFG_M GENMASK(3, 2) -#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP4_CFG_X(x) (((x) & GENMASK(3, 2)) >> 2) -#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_OTHER_CFG(x) ((x) & GENMASK(1, 0)) -#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_OTHER_CFG_M GENMASK(1, 0) - -#define ANA_PORT_VCAP_S2_CFG_GSZ 0x100 - -#define ANA_PORT_VCAP_S2_CFG_S2_UDP_PAYLOAD_ENA(x) (((x) << 17) & GENMASK(18, 17)) -#define ANA_PORT_VCAP_S2_CFG_S2_UDP_PAYLOAD_ENA_M GENMASK(18, 17) -#define ANA_PORT_VCAP_S2_CFG_S2_UDP_PAYLOAD_ENA_X(x) (((x) & GENMASK(18, 17)) >> 17) -#define ANA_PORT_VCAP_S2_CFG_S2_ETYPE_PAYLOAD_ENA(x) (((x) << 15) & GENMASK(16, 15)) -#define ANA_PORT_VCAP_S2_CFG_S2_ETYPE_PAYLOAD_ENA_M GENMASK(16, 15) -#define ANA_PORT_VCAP_S2_CFG_S2_ETYPE_PAYLOAD_ENA_X(x) (((x) & GENMASK(16, 15)) >> 15) -#define ANA_PORT_VCAP_S2_CFG_S2_ENA BIT(14) -#define ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS(x) (((x) << 12) & GENMASK(13, 12)) -#define ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS_M GENMASK(13, 12) -#define ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS_X(x) (((x) & GENMASK(13, 12)) >> 12) -#define ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS(x) (((x) << 10) & GENMASK(11, 10)) -#define ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS_M GENMASK(11, 10) -#define ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS_X(x) (((x) & GENMASK(11, 10)) >> 10) -#define ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS(x) (((x) << 8) & GENMASK(9, 8)) -#define ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS_M GENMASK(9, 8) -#define ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS_X(x) (((x) & GENMASK(9, 8)) >> 8) -#define ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS(x) (((x) << 6) & GENMASK(7, 6)) -#define ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS_M GENMASK(7, 6) -#define ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS_X(x) (((x) & GENMASK(7, 6)) >> 6) -#define ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG(x) (((x) << 2) & GENMASK(5, 2)) -#define ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG_M GENMASK(5, 2) -#define ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG_X(x) (((x) & GENMASK(5, 2)) >> 2) -#define ANA_PORT_VCAP_S2_CFG_S2_OAM_DIS(x) ((x) & GENMASK(1, 0)) -#define ANA_PORT_VCAP_S2_CFG_S2_OAM_DIS_M GENMASK(1, 0) - -#define ANA_PORT_PCP_DEI_MAP_GSZ 0x100 -#define ANA_PORT_PCP_DEI_MAP_RSZ 0x4 - -#define ANA_PORT_PCP_DEI_MAP_DP_PCP_DEI_VAL BIT(3) -#define ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL(x) ((x) & GENMASK(2, 0)) -#define ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL_M GENMASK(2, 0) - -#define ANA_PORT_CPU_FWD_CFG_GSZ 0x100 - -#define ANA_PORT_CPU_FWD_CFG_CPU_VRAP_REDIR_ENA BIT(7) -#define ANA_PORT_CPU_FWD_CFG_CPU_MLD_REDIR_ENA BIT(6) -#define ANA_PORT_CPU_FWD_CFG_CPU_IGMP_REDIR_ENA BIT(5) -#define ANA_PORT_CPU_FWD_CFG_CPU_IPMC_CTRL_COPY_ENA BIT(4) -#define ANA_PORT_CPU_FWD_CFG_CPU_SRC_COPY_ENA BIT(3) -#define ANA_PORT_CPU_FWD_CFG_CPU_ALLBRIDGE_DROP_ENA BIT(2) -#define ANA_PORT_CPU_FWD_CFG_CPU_ALLBRIDGE_REDIR_ENA BIT(1) -#define ANA_PORT_CPU_FWD_CFG_CPU_OAM_ENA BIT(0) - -#define ANA_PORT_CPU_FWD_BPDU_CFG_GSZ 0x100 - -#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_DROP_ENA(x) (((x) << 16) & GENMASK(31, 16)) -#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_DROP_ENA_M GENMASK(31, 16) -#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_DROP_ENA_X(x) (((x) & GENMASK(31, 16)) >> 16) -#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(x) ((x) & GENMASK(15, 0)) -#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA_M GENMASK(15, 0) - -#define ANA_PORT_CPU_FWD_GARP_CFG_GSZ 0x100 - -#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_DROP_ENA(x) (((x) << 16) & GENMASK(31, 16)) -#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_DROP_ENA_M GENMASK(31, 16) -#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_DROP_ENA_X(x) (((x) & GENMASK(31, 16)) >> 16) -#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_REDIR_ENA(x) ((x) & GENMASK(15, 0)) -#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_REDIR_ENA_M GENMASK(15, 0) - -#define ANA_PORT_CPU_FWD_CCM_CFG_GSZ 0x100 - -#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_DROP_ENA(x) (((x) << 16) & GENMASK(31, 16)) -#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_DROP_ENA_M GENMASK(31, 16) -#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_DROP_ENA_X(x) (((x) & GENMASK(31, 16)) >> 16) -#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_REDIR_ENA(x) ((x) & GENMASK(15, 0)) -#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_REDIR_ENA_M GENMASK(15, 0) - -#define ANA_PORT_PORT_CFG_GSZ 0x100 - -#define ANA_PORT_PORT_CFG_SRC_MIRROR_ENA BIT(15) -#define ANA_PORT_PORT_CFG_LIMIT_DROP BIT(14) -#define ANA_PORT_PORT_CFG_LIMIT_CPU BIT(13) -#define ANA_PORT_PORT_CFG_LOCKED_PORTMOVE_DROP BIT(12) -#define ANA_PORT_PORT_CFG_LOCKED_PORTMOVE_CPU BIT(11) -#define ANA_PORT_PORT_CFG_LEARNDROP BIT(10) -#define ANA_PORT_PORT_CFG_LEARNCPU BIT(9) -#define ANA_PORT_PORT_CFG_LEARNAUTO BIT(8) -#define ANA_PORT_PORT_CFG_LEARN_ENA BIT(7) -#define ANA_PORT_PORT_CFG_RECV_ENA BIT(6) -#define ANA_PORT_PORT_CFG_PORTID_VAL(x) (((x) << 2) & GENMASK(5, 2)) -#define ANA_PORT_PORT_CFG_PORTID_VAL_M GENMASK(5, 2) -#define ANA_PORT_PORT_CFG_PORTID_VAL_X(x) (((x) & GENMASK(5, 2)) >> 2) -#define ANA_PORT_PORT_CFG_USE_B_DOM_TBL BIT(1) -#define ANA_PORT_PORT_CFG_LSR_MODE BIT(0) - -#define ANA_PORT_POL_CFG_GSZ 0x100 - -#define ANA_PORT_POL_CFG_POL_CPU_REDIR_8021 BIT(19) -#define ANA_PORT_POL_CFG_POL_CPU_REDIR_IP BIT(18) -#define ANA_PORT_POL_CFG_PORT_POL_ENA BIT(17) -#define ANA_PORT_POL_CFG_QUEUE_POL_ENA(x) (((x) << 9) & GENMASK(16, 9)) -#define ANA_PORT_POL_CFG_QUEUE_POL_ENA_M GENMASK(16, 9) -#define ANA_PORT_POL_CFG_QUEUE_POL_ENA_X(x) (((x) & GENMASK(16, 9)) >> 9) -#define ANA_PORT_POL_CFG_POL_ORDER(x) ((x) & GENMASK(8, 0)) -#define ANA_PORT_POL_CFG_POL_ORDER_M GENMASK(8, 0) - -#define ANA_PORT_PTP_CFG_GSZ 0x100 - -#define ANA_PORT_PTP_CFG_PTP_BACKPLANE_MODE BIT(0) - -#define ANA_PORT_PTP_DLY1_CFG_GSZ 0x100 - -#define ANA_PORT_PTP_DLY2_CFG_GSZ 0x100 - -#define ANA_PORT_SFID_CFG_GSZ 0x100 -#define ANA_PORT_SFID_CFG_RSZ 0x4 - -#define ANA_PORT_SFID_CFG_SFID_VALID BIT(8) -#define ANA_PORT_SFID_CFG_SFID(x) ((x) & GENMASK(7, 0)) -#define ANA_PORT_SFID_CFG_SFID_M GENMASK(7, 0) - -#define ANA_PFC_PFC_CFG_GSZ 0x40 - -#define ANA_PFC_PFC_CFG_RX_PFC_ENA(x) (((x) << 2) & GENMASK(9, 2)) -#define ANA_PFC_PFC_CFG_RX_PFC_ENA_M GENMASK(9, 2) -#define ANA_PFC_PFC_CFG_RX_PFC_ENA_X(x) (((x) & GENMASK(9, 2)) >> 2) -#define ANA_PFC_PFC_CFG_FC_LINK_SPEED(x) ((x) & GENMASK(1, 0)) -#define ANA_PFC_PFC_CFG_FC_LINK_SPEED_M GENMASK(1, 0) - -#define ANA_PFC_PFC_TIMER_GSZ 0x40 -#define ANA_PFC_PFC_TIMER_RSZ 0x4 - -#define ANA_IPT_OAM_MEP_CFG_GSZ 0x8 - -#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_P(x) (((x) << 6) & GENMASK(10, 6)) -#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_P_M GENMASK(10, 6) -#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_P_X(x) (((x) & GENMASK(10, 6)) >> 6) -#define ANA_IPT_OAM_MEP_CFG_MEP_IDX(x) (((x) << 1) & GENMASK(5, 1)) -#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_M GENMASK(5, 1) -#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_X(x) (((x) & GENMASK(5, 1)) >> 1) -#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_ENA BIT(0) - -#define ANA_IPT_IPT_GSZ 0x8 - -#define ANA_IPT_IPT_IPT_CFG(x) (((x) << 15) & GENMASK(16, 15)) -#define ANA_IPT_IPT_IPT_CFG_M GENMASK(16, 15) -#define ANA_IPT_IPT_IPT_CFG_X(x) (((x) & GENMASK(16, 15)) >> 15) -#define ANA_IPT_IPT_ISDX_P(x) (((x) << 7) & GENMASK(14, 7)) -#define ANA_IPT_IPT_ISDX_P_M GENMASK(14, 7) -#define ANA_IPT_IPT_ISDX_P_X(x) (((x) & GENMASK(14, 7)) >> 7) -#define ANA_IPT_IPT_PPT_IDX(x) ((x) & GENMASK(6, 0)) -#define ANA_IPT_IPT_PPT_IDX_M GENMASK(6, 0) - -#define ANA_PPT_PPT_RSZ 0x4 - -#define ANA_FID_MAP_FID_MAP_RSZ 0x4 - -#define ANA_FID_MAP_FID_MAP_FID_C_VAL(x) (((x) << 6) & GENMASK(11, 6)) -#define ANA_FID_MAP_FID_MAP_FID_C_VAL_M GENMASK(11, 6) -#define ANA_FID_MAP_FID_MAP_FID_C_VAL_X(x) (((x) & GENMASK(11, 6)) >> 6) -#define ANA_FID_MAP_FID_MAP_FID_B_VAL(x) ((x) & GENMASK(5, 0)) -#define ANA_FID_MAP_FID_MAP_FID_B_VAL_M GENMASK(5, 0) - -#define ANA_AGGR_CFG_AC_RND_ENA BIT(7) -#define ANA_AGGR_CFG_AC_DMAC_ENA BIT(6) -#define ANA_AGGR_CFG_AC_SMAC_ENA BIT(5) -#define ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA BIT(4) -#define ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA BIT(3) -#define ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA BIT(2) -#define ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA BIT(1) -#define ANA_AGGR_CFG_AC_ISDX_ENA BIT(0) - -#define ANA_CPUQ_CFG_CPUQ_MLD(x) (((x) << 27) & GENMASK(29, 27)) -#define ANA_CPUQ_CFG_CPUQ_MLD_M GENMASK(29, 27) -#define ANA_CPUQ_CFG_CPUQ_MLD_X(x) (((x) & GENMASK(29, 27)) >> 27) -#define ANA_CPUQ_CFG_CPUQ_IGMP(x) (((x) << 24) & GENMASK(26, 24)) -#define ANA_CPUQ_CFG_CPUQ_IGMP_M GENMASK(26, 24) -#define ANA_CPUQ_CFG_CPUQ_IGMP_X(x) (((x) & GENMASK(26, 24)) >> 24) -#define ANA_CPUQ_CFG_CPUQ_IPMC_CTRL(x) (((x) << 21) & GENMASK(23, 21)) -#define ANA_CPUQ_CFG_CPUQ_IPMC_CTRL_M GENMASK(23, 21) -#define ANA_CPUQ_CFG_CPUQ_IPMC_CTRL_X(x) (((x) & GENMASK(23, 21)) >> 21) -#define ANA_CPUQ_CFG_CPUQ_ALLBRIDGE(x) (((x) << 18) & GENMASK(20, 18)) -#define ANA_CPUQ_CFG_CPUQ_ALLBRIDGE_M GENMASK(20, 18) -#define ANA_CPUQ_CFG_CPUQ_ALLBRIDGE_X(x) (((x) & GENMASK(20, 18)) >> 18) -#define ANA_CPUQ_CFG_CPUQ_LOCKED_PORTMOVE(x) (((x) << 15) & GENMASK(17, 15)) -#define ANA_CPUQ_CFG_CPUQ_LOCKED_PORTMOVE_M GENMASK(17, 15) -#define ANA_CPUQ_CFG_CPUQ_LOCKED_PORTMOVE_X(x) (((x) & GENMASK(17, 15)) >> 15) -#define ANA_CPUQ_CFG_CPUQ_SRC_COPY(x) (((x) << 12) & GENMASK(14, 12)) -#define ANA_CPUQ_CFG_CPUQ_SRC_COPY_M GENMASK(14, 12) -#define ANA_CPUQ_CFG_CPUQ_SRC_COPY_X(x) (((x) & GENMASK(14, 12)) >> 12) -#define ANA_CPUQ_CFG_CPUQ_MAC_COPY(x) (((x) << 9) & GENMASK(11, 9)) -#define ANA_CPUQ_CFG_CPUQ_MAC_COPY_M GENMASK(11, 9) -#define ANA_CPUQ_CFG_CPUQ_MAC_COPY_X(x) (((x) & GENMASK(11, 9)) >> 9) -#define ANA_CPUQ_CFG_CPUQ_LRN(x) (((x) << 6) & GENMASK(8, 6)) -#define ANA_CPUQ_CFG_CPUQ_LRN_M GENMASK(8, 6) -#define ANA_CPUQ_CFG_CPUQ_LRN_X(x) (((x) & GENMASK(8, 6)) >> 6) -#define ANA_CPUQ_CFG_CPUQ_MIRROR(x) (((x) << 3) & GENMASK(5, 3)) -#define ANA_CPUQ_CFG_CPUQ_MIRROR_M GENMASK(5, 3) -#define ANA_CPUQ_CFG_CPUQ_MIRROR_X(x) (((x) & GENMASK(5, 3)) >> 3) -#define ANA_CPUQ_CFG_CPUQ_SFLOW(x) ((x) & GENMASK(2, 0)) -#define ANA_CPUQ_CFG_CPUQ_SFLOW_M GENMASK(2, 0) - -#define ANA_CPUQ_8021_CFG_RSZ 0x4 - -#define ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(x) (((x) << 6) & GENMASK(8, 6)) -#define ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL_M GENMASK(8, 6) -#define ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL_X(x) (((x) & GENMASK(8, 6)) >> 6) -#define ANA_CPUQ_8021_CFG_CPUQ_GARP_VAL(x) (((x) << 3) & GENMASK(5, 3)) -#define ANA_CPUQ_8021_CFG_CPUQ_GARP_VAL_M GENMASK(5, 3) -#define ANA_CPUQ_8021_CFG_CPUQ_GARP_VAL_X(x) (((x) & GENMASK(5, 3)) >> 3) -#define ANA_CPUQ_8021_CFG_CPUQ_CCM_VAL(x) ((x) & GENMASK(2, 0)) -#define ANA_CPUQ_8021_CFG_CPUQ_CCM_VAL_M GENMASK(2, 0) - -#define ANA_DSCP_CFG_RSZ 0x4 - -#define ANA_DSCP_CFG_DP_DSCP_VAL BIT(11) -#define ANA_DSCP_CFG_QOS_DSCP_VAL(x) (((x) << 8) & GENMASK(10, 8)) -#define ANA_DSCP_CFG_QOS_DSCP_VAL_M GENMASK(10, 8) -#define ANA_DSCP_CFG_QOS_DSCP_VAL_X(x) (((x) & GENMASK(10, 8)) >> 8) -#define ANA_DSCP_CFG_DSCP_TRANSLATE_VAL(x) (((x) << 2) & GENMASK(7, 2)) -#define ANA_DSCP_CFG_DSCP_TRANSLATE_VAL_M GENMASK(7, 2) -#define ANA_DSCP_CFG_DSCP_TRANSLATE_VAL_X(x) (((x) & GENMASK(7, 2)) >> 2) -#define ANA_DSCP_CFG_DSCP_TRUST_ENA BIT(1) -#define ANA_DSCP_CFG_DSCP_REWR_ENA BIT(0) - -#define ANA_DSCP_REWR_CFG_RSZ 0x4 - -#define ANA_VCAP_RNG_TYPE_CFG_RSZ 0x4 - -#define ANA_VCAP_RNG_VAL_CFG_RSZ 0x4 - -#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MIN_VAL(x) (((x) << 16) & GENMASK(31, 16)) -#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MIN_VAL_M GENMASK(31, 16) -#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MIN_VAL_X(x) (((x) & GENMASK(31, 16)) >> 16) -#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MAX_VAL(x) ((x) & GENMASK(15, 0)) -#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MAX_VAL_M GENMASK(15, 0) - -#define ANA_VRAP_CFG_VRAP_VLAN_AWARE_ENA BIT(12) -#define ANA_VRAP_CFG_VRAP_VID(x) ((x) & GENMASK(11, 0)) -#define ANA_VRAP_CFG_VRAP_VID_M GENMASK(11, 0) - -#define ANA_DISCARD_CFG_DROP_TAGGING_ISDX0 BIT(3) -#define ANA_DISCARD_CFG_DROP_CTRLPROT_ISDX0 BIT(2) -#define ANA_DISCARD_CFG_DROP_TAGGING_S2_ENA BIT(1) -#define ANA_DISCARD_CFG_DROP_CTRLPROT_S2_ENA BIT(0) - -#define ANA_FID_CFG_VID_MC_ENA BIT(0) - -#define ANA_POL_PIR_CFG_GSZ 0x20 - -#define ANA_POL_PIR_CFG_PIR_RATE(x) (((x) << 6) & GENMASK(20, 6)) -#define ANA_POL_PIR_CFG_PIR_RATE_M GENMASK(20, 6) -#define ANA_POL_PIR_CFG_PIR_RATE_X(x) (((x) & GENMASK(20, 6)) >> 6) -#define ANA_POL_PIR_CFG_PIR_BURST(x) ((x) & GENMASK(5, 0)) -#define ANA_POL_PIR_CFG_PIR_BURST_M GENMASK(5, 0) - -#define ANA_POL_CIR_CFG_GSZ 0x20 - -#define ANA_POL_CIR_CFG_CIR_RATE(x) (((x) << 6) & GENMASK(20, 6)) -#define ANA_POL_CIR_CFG_CIR_RATE_M GENMASK(20, 6) -#define ANA_POL_CIR_CFG_CIR_RATE_X(x) (((x) & GENMASK(20, 6)) >> 6) -#define ANA_POL_CIR_CFG_CIR_BURST(x) ((x) & GENMASK(5, 0)) -#define ANA_POL_CIR_CFG_CIR_BURST_M GENMASK(5, 0) - -#define ANA_POL_MODE_CFG_GSZ 0x20 - -#define ANA_POL_MODE_CFG_IPG_SIZE(x) (((x) << 5) & GENMASK(9, 5)) -#define ANA_POL_MODE_CFG_IPG_SIZE_M GENMASK(9, 5) -#define ANA_POL_MODE_CFG_IPG_SIZE_X(x) (((x) & GENMASK(9, 5)) >> 5) -#define ANA_POL_MODE_CFG_FRM_MODE(x) (((x) << 3) & GENMASK(4, 3)) -#define ANA_POL_MODE_CFG_FRM_MODE_M GENMASK(4, 3) -#define ANA_POL_MODE_CFG_FRM_MODE_X(x) (((x) & GENMASK(4, 3)) >> 3) -#define ANA_POL_MODE_CFG_DLB_COUPLED BIT(2) -#define ANA_POL_MODE_CFG_CIR_ENA BIT(1) -#define ANA_POL_MODE_CFG_OVERSHOOT_ENA BIT(0) - -#define ANA_POL_PIR_STATE_GSZ 0x20 - -#define ANA_POL_CIR_STATE_GSZ 0x20 - -#define ANA_POL_STATE_GSZ 0x20 - -#define ANA_POL_FLOWC_RSZ 0x4 - -#define ANA_POL_FLOWC_POL_FLOWC BIT(0) - -#define ANA_POL_HYST_POL_FC_HYST(x) (((x) << 4) & GENMASK(9, 4)) -#define ANA_POL_HYST_POL_FC_HYST_M GENMASK(9, 4) -#define ANA_POL_HYST_POL_FC_HYST_X(x) (((x) & GENMASK(9, 4)) >> 4) -#define ANA_POL_HYST_POL_STOP_HYST(x) ((x) & GENMASK(3, 0)) -#define ANA_POL_HYST_POL_STOP_HYST_M GENMASK(3, 0) - -#define ANA_POL_MISC_CFG_POL_CLOSE_ALL BIT(1) -#define ANA_POL_MISC_CFG_POL_LEAK_DIS BIT(0) - -#endif diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c index 2da8eee27e98..b38820849faa 100644 --- a/drivers/net/ethernet/mscc/ocelot_board.c +++ b/drivers/net/ethernet/mscc/ocelot_board.c @@ -402,9 +402,9 @@ static int mscc_ocelot_probe(struct platform_device *pdev) of_get_phy_mode(portnp, &phy_mode); - priv->phy_mode = phy_mode; + ocelot_port->phy_mode = phy_mode; - switch (priv->phy_mode) { + switch (ocelot_port->phy_mode) { case PHY_INTERFACE_MODE_NA: continue; case PHY_INTERFACE_MODE_SGMII: diff --git a/drivers/net/ethernet/mscc/ocelot_dev.h b/drivers/net/ethernet/mscc/ocelot_dev.h deleted file mode 100644 index 0a50d53bbd3f..000000000000 --- a/drivers/net/ethernet/mscc/ocelot_dev.h +++ /dev/null @@ -1,275 +0,0 @@ -/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ -/* - * Microsemi Ocelot Switch driver - * - * Copyright (c) 2017 Microsemi Corporation - */ - -#ifndef _MSCC_OCELOT_DEV_H_ -#define _MSCC_OCELOT_DEV_H_ - -#define DEV_CLOCK_CFG 0x0 - -#define DEV_CLOCK_CFG_MAC_TX_RST BIT(7) -#define DEV_CLOCK_CFG_MAC_RX_RST BIT(6) -#define DEV_CLOCK_CFG_PCS_TX_RST BIT(5) -#define DEV_CLOCK_CFG_PCS_RX_RST BIT(4) -#define DEV_CLOCK_CFG_PORT_RST BIT(3) -#define DEV_CLOCK_CFG_PHY_RST BIT(2) -#define DEV_CLOCK_CFG_LINK_SPEED(x) ((x) & GENMASK(1, 0)) -#define DEV_CLOCK_CFG_LINK_SPEED_M GENMASK(1, 0) - -#define DEV_PORT_MISC 0x4 - -#define DEV_PORT_MISC_FWD_ERROR_ENA BIT(4) -#define DEV_PORT_MISC_FWD_PAUSE_ENA BIT(3) -#define DEV_PORT_MISC_FWD_CTRL_ENA BIT(2) -#define DEV_PORT_MISC_DEV_LOOP_ENA BIT(1) -#define DEV_PORT_MISC_HDX_FAST_DIS BIT(0) - -#define DEV_EVENTS 0x8 - -#define DEV_EEE_CFG 0xc - -#define DEV_EEE_CFG_EEE_ENA BIT(22) -#define DEV_EEE_CFG_EEE_TIMER_AGE(x) (((x) << 15) & GENMASK(21, 15)) -#define DEV_EEE_CFG_EEE_TIMER_AGE_M GENMASK(21, 15) -#define DEV_EEE_CFG_EEE_TIMER_AGE_X(x) (((x) & GENMASK(21, 15)) >> 15) -#define DEV_EEE_CFG_EEE_TIMER_WAKEUP(x) (((x) << 8) & GENMASK(14, 8)) -#define DEV_EEE_CFG_EEE_TIMER_WAKEUP_M GENMASK(14, 8) -#define DEV_EEE_CFG_EEE_TIMER_WAKEUP_X(x) (((x) & GENMASK(14, 8)) >> 8) -#define DEV_EEE_CFG_EEE_TIMER_HOLDOFF(x) (((x) << 1) & GENMASK(7, 1)) -#define DEV_EEE_CFG_EEE_TIMER_HOLDOFF_M GENMASK(7, 1) -#define DEV_EEE_CFG_EEE_TIMER_HOLDOFF_X(x) (((x) & GENMASK(7, 1)) >> 1) -#define DEV_EEE_CFG_PORT_LPI BIT(0) - -#define DEV_RX_PATH_DELAY 0x10 - -#define DEV_TX_PATH_DELAY 0x14 - -#define DEV_PTP_PREDICT_CFG 0x18 - -#define DEV_PTP_PREDICT_CFG_PTP_PHY_PREDICT_CFG(x) (((x) << 4) & GENMASK(11, 4)) -#define DEV_PTP_PREDICT_CFG_PTP_PHY_PREDICT_CFG_M GENMASK(11, 4) -#define DEV_PTP_PREDICT_CFG_PTP_PHY_PREDICT_CFG_X(x) (((x) & GENMASK(11, 4)) >> 4) -#define DEV_PTP_PREDICT_CFG_PTP_PHASE_PREDICT_CFG(x) ((x) & GENMASK(3, 0)) -#define DEV_PTP_PREDICT_CFG_PTP_PHASE_PREDICT_CFG_M GENMASK(3, 0) - -#define DEV_MAC_ENA_CFG 0x1c - -#define DEV_MAC_ENA_CFG_RX_ENA BIT(4) -#define DEV_MAC_ENA_CFG_TX_ENA BIT(0) - -#define DEV_MAC_MODE_CFG 0x20 - -#define DEV_MAC_MODE_CFG_FC_WORD_SYNC_ENA BIT(8) -#define DEV_MAC_MODE_CFG_GIGA_MODE_ENA BIT(4) -#define DEV_MAC_MODE_CFG_FDX_ENA BIT(0) - -#define DEV_MAC_MAXLEN_CFG 0x24 - -#define DEV_MAC_TAGS_CFG 0x28 - -#define DEV_MAC_TAGS_CFG_TAG_ID(x) (((x) << 16) & GENMASK(31, 16)) -#define DEV_MAC_TAGS_CFG_TAG_ID_M GENMASK(31, 16) -#define DEV_MAC_TAGS_CFG_TAG_ID_X(x) (((x) & GENMASK(31, 16)) >> 16) -#define DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA BIT(2) -#define DEV_MAC_TAGS_CFG_PB_ENA BIT(1) -#define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA BIT(0) - -#define DEV_MAC_ADV_CHK_CFG 0x2c - -#define DEV_MAC_ADV_CHK_CFG_LEN_DROP_ENA BIT(0) - -#define DEV_MAC_IFG_CFG 0x30 - -#define DEV_MAC_IFG_CFG_RESTORE_OLD_IPG_CHECK BIT(17) -#define DEV_MAC_IFG_CFG_REDUCED_TX_IFG BIT(16) -#define DEV_MAC_IFG_CFG_TX_IFG(x) (((x) << 8) & GENMASK(12, 8)) -#define DEV_MAC_IFG_CFG_TX_IFG_M GENMASK(12, 8) -#define DEV_MAC_IFG_CFG_TX_IFG_X(x) (((x) & GENMASK(12, 8)) >> 8) -#define DEV_MAC_IFG_CFG_RX_IFG2(x) (((x) << 4) & GENMASK(7, 4)) -#define DEV_MAC_IFG_CFG_RX_IFG2_M GENMASK(7, 4) -#define DEV_MAC_IFG_CFG_RX_IFG2_X(x) (((x) & GENMASK(7, 4)) >> 4) -#define DEV_MAC_IFG_CFG_RX_IFG1(x) ((x) & GENMASK(3, 0)) -#define DEV_MAC_IFG_CFG_RX_IFG1_M GENMASK(3, 0) - -#define DEV_MAC_HDX_CFG 0x34 - -#define DEV_MAC_HDX_CFG_BYPASS_COL_SYNC BIT(26) -#define DEV_MAC_HDX_CFG_OB_ENA BIT(25) -#define DEV_MAC_HDX_CFG_WEXC_DIS BIT(24) -#define DEV_MAC_HDX_CFG_SEED(x) (((x) << 16) & GENMASK(23, 16)) -#define DEV_MAC_HDX_CFG_SEED_M GENMASK(23, 16) -#define DEV_MAC_HDX_CFG_SEED_X(x) (((x) & GENMASK(23, 16)) >> 16) -#define DEV_MAC_HDX_CFG_SEED_LOAD BIT(12) -#define DEV_MAC_HDX_CFG_RETRY_AFTER_EXC_COL_ENA BIT(8) -#define DEV_MAC_HDX_CFG_LATE_COL_POS(x) ((x) & GENMASK(6, 0)) -#define DEV_MAC_HDX_CFG_LATE_COL_POS_M GENMASK(6, 0) - -#define DEV_MAC_DBG_CFG 0x38 - -#define DEV_MAC_DBG_CFG_TBI_MODE BIT(4) -#define DEV_MAC_DBG_CFG_IFG_CRS_EXT_CHK_ENA BIT(0) - -#define DEV_MAC_FC_MAC_LOW_CFG 0x3c - -#define DEV_MAC_FC_MAC_HIGH_CFG 0x40 - -#define DEV_MAC_STICKY 0x44 - -#define DEV_MAC_STICKY_RX_IPG_SHRINK_STICKY BIT(9) -#define DEV_MAC_STICKY_RX_PREAM_SHRINK_STICKY BIT(8) -#define DEV_MAC_STICKY_RX_CARRIER_EXT_STICKY BIT(7) -#define DEV_MAC_STICKY_RX_CARRIER_EXT_ERR_STICKY BIT(6) -#define DEV_MAC_STICKY_RX_JUNK_STICKY BIT(5) -#define DEV_MAC_STICKY_TX_RETRANSMIT_STICKY BIT(4) -#define DEV_MAC_STICKY_TX_JAM_STICKY BIT(3) -#define DEV_MAC_STICKY_TX_FIFO_OFLW_STICKY BIT(2) -#define DEV_MAC_STICKY_TX_FRM_LEN_OVR_STICKY BIT(1) -#define DEV_MAC_STICKY_TX_ABORT_STICKY BIT(0) - -#define PCS1G_CFG 0x48 - -#define PCS1G_CFG_LINK_STATUS_TYPE BIT(4) -#define PCS1G_CFG_AN_LINK_CTRL_ENA BIT(1) -#define PCS1G_CFG_PCS_ENA BIT(0) - -#define PCS1G_MODE_CFG 0x4c - -#define PCS1G_MODE_CFG_UNIDIR_MODE_ENA BIT(4) -#define PCS1G_MODE_CFG_SGMII_MODE_ENA BIT(0) - -#define PCS1G_SD_CFG 0x50 - -#define PCS1G_SD_CFG_SD_SEL BIT(8) -#define PCS1G_SD_CFG_SD_POL BIT(4) -#define PCS1G_SD_CFG_SD_ENA BIT(0) - -#define PCS1G_ANEG_CFG 0x54 - -#define PCS1G_ANEG_CFG_ADV_ABILITY(x) (((x) << 16) & GENMASK(31, 16)) -#define PCS1G_ANEG_CFG_ADV_ABILITY_M GENMASK(31, 16) -#define PCS1G_ANEG_CFG_ADV_ABILITY_X(x) (((x) & GENMASK(31, 16)) >> 16) -#define PCS1G_ANEG_CFG_SW_RESOLVE_ENA BIT(8) -#define PCS1G_ANEG_CFG_ANEG_RESTART_ONE_SHOT BIT(1) -#define PCS1G_ANEG_CFG_ANEG_ENA BIT(0) - -#define PCS1G_ANEG_NP_CFG 0x58 - -#define PCS1G_ANEG_NP_CFG_NP_TX(x) (((x) << 16) & GENMASK(31, 16)) -#define PCS1G_ANEG_NP_CFG_NP_TX_M GENMASK(31, 16) -#define PCS1G_ANEG_NP_CFG_NP_TX_X(x) (((x) & GENMASK(31, 16)) >> 16) -#define PCS1G_ANEG_NP_CFG_NP_LOADED_ONE_SHOT BIT(0) - -#define PCS1G_LB_CFG 0x5c - -#define PCS1G_LB_CFG_RA_ENA BIT(4) -#define PCS1G_LB_CFG_GMII_PHY_LB_ENA BIT(1) -#define PCS1G_LB_CFG_TBI_HOST_LB_ENA BIT(0) - -#define PCS1G_DBG_CFG 0x60 - -#define PCS1G_DBG_CFG_UDLT BIT(0) - -#define PCS1G_CDET_CFG 0x64 - -#define PCS1G_CDET_CFG_CDET_ENA BIT(0) - -#define PCS1G_ANEG_STATUS 0x68 - -#define PCS1G_ANEG_STATUS_LP_ADV_ABILITY(x) (((x) << 16) & GENMASK(31, 16)) -#define PCS1G_ANEG_STATUS_LP_ADV_ABILITY_M GENMASK(31, 16) -#define PCS1G_ANEG_STATUS_LP_ADV_ABILITY_X(x) (((x) & GENMASK(31, 16)) >> 16) -#define PCS1G_ANEG_STATUS_PR BIT(4) -#define PCS1G_ANEG_STATUS_PAGE_RX_STICKY BIT(3) -#define PCS1G_ANEG_STATUS_ANEG_COMPLETE BIT(0) - -#define PCS1G_ANEG_NP_STATUS 0x6c - -#define PCS1G_LINK_STATUS 0x70 - -#define PCS1G_LINK_STATUS_DELAY_VAR(x) (((x) << 12) & GENMASK(15, 12)) -#define PCS1G_LINK_STATUS_DELAY_VAR_M GENMASK(15, 12) -#define PCS1G_LINK_STATUS_DELAY_VAR_X(x) (((x) & GENMASK(15, 12)) >> 12) -#define PCS1G_LINK_STATUS_SIGNAL_DETECT BIT(8) -#define PCS1G_LINK_STATUS_LINK_STATUS BIT(4) -#define PCS1G_LINK_STATUS_SYNC_STATUS BIT(0) - -#define PCS1G_LINK_DOWN_CNT 0x74 - -#define PCS1G_STICKY 0x78 - -#define PCS1G_STICKY_LINK_DOWN_STICKY BIT(4) -#define PCS1G_STICKY_OUT_OF_SYNC_STICKY BIT(0) - -#define PCS1G_DEBUG_STATUS 0x7c - -#define PCS1G_LPI_CFG 0x80 - -#define PCS1G_LPI_CFG_QSGMII_MS_SEL BIT(20) -#define PCS1G_LPI_CFG_RX_LPI_OUT_DIS BIT(17) -#define PCS1G_LPI_CFG_LPI_TESTMODE BIT(16) -#define PCS1G_LPI_CFG_LPI_RX_WTIM(x) (((x) << 4) & GENMASK(5, 4)) -#define PCS1G_LPI_CFG_LPI_RX_WTIM_M GENMASK(5, 4) -#define PCS1G_LPI_CFG_LPI_RX_WTIM_X(x) (((x) & GENMASK(5, 4)) >> 4) -#define PCS1G_LPI_CFG_TX_ASSERT_LPIDLE BIT(0) - -#define PCS1G_LPI_WAKE_ERROR_CNT 0x84 - -#define PCS1G_LPI_STATUS 0x88 - -#define PCS1G_LPI_STATUS_RX_LPI_FAIL BIT(16) -#define PCS1G_LPI_STATUS_RX_LPI_EVENT_STICKY BIT(12) -#define PCS1G_LPI_STATUS_RX_QUIET BIT(9) -#define PCS1G_LPI_STATUS_RX_LPI_MODE BIT(8) -#define PCS1G_LPI_STATUS_TX_LPI_EVENT_STICKY BIT(4) -#define PCS1G_LPI_STATUS_TX_QUIET BIT(1) -#define PCS1G_LPI_STATUS_TX_LPI_MODE BIT(0) - -#define PCS1G_TSTPAT_MODE_CFG 0x8c - -#define PCS1G_TSTPAT_STATUS 0x90 - -#define PCS1G_TSTPAT_STATUS_JTP_ERR_CNT(x) (((x) << 8) & GENMASK(15, 8)) -#define PCS1G_TSTPAT_STATUS_JTP_ERR_CNT_M GENMASK(15, 8) -#define PCS1G_TSTPAT_STATUS_JTP_ERR_CNT_X(x) (((x) & GENMASK(15, 8)) >> 8) -#define PCS1G_TSTPAT_STATUS_JTP_ERR BIT(4) -#define PCS1G_TSTPAT_STATUS_JTP_LOCK BIT(0) - -#define DEV_PCS_FX100_CFG 0x94 - -#define DEV_PCS_FX100_CFG_SD_SEL BIT(26) -#define DEV_PCS_FX100_CFG_SD_POL BIT(25) -#define DEV_PCS_FX100_CFG_SD_ENA BIT(24) -#define DEV_PCS_FX100_CFG_LOOPBACK_ENA BIT(20) -#define DEV_PCS_FX100_CFG_SWAP_MII_ENA BIT(16) -#define DEV_PCS_FX100_CFG_RXBITSEL(x) (((x) << 12) & GENMASK(15, 12)) -#define DEV_PCS_FX100_CFG_RXBITSEL_M GENMASK(15, 12) -#define DEV_PCS_FX100_CFG_RXBITSEL_X(x) (((x) & GENMASK(15, 12)) >> 12) -#define DEV_PCS_FX100_CFG_SIGDET_CFG(x) (((x) << 9) & GENMASK(10, 9)) -#define DEV_PCS_FX100_CFG_SIGDET_CFG_M GENMASK(10, 9) -#define DEV_PCS_FX100_CFG_SIGDET_CFG_X(x) (((x) & GENMASK(10, 9)) >> 9) -#define DEV_PCS_FX100_CFG_LINKHYST_TM_ENA BIT(8) -#define DEV_PCS_FX100_CFG_LINKHYSTTIMER(x) (((x) << 4) & GENMASK(7, 4)) -#define DEV_PCS_FX100_CFG_LINKHYSTTIMER_M GENMASK(7, 4) -#define DEV_PCS_FX100_CFG_LINKHYSTTIMER_X(x) (((x) & GENMASK(7, 4)) >> 4) -#define DEV_PCS_FX100_CFG_UNIDIR_MODE_ENA BIT(3) -#define DEV_PCS_FX100_CFG_FEFCHK_ENA BIT(2) -#define DEV_PCS_FX100_CFG_FEFGEN_ENA BIT(1) -#define DEV_PCS_FX100_CFG_PCS_ENA BIT(0) - -#define DEV_PCS_FX100_STATUS 0x98 - -#define DEV_PCS_FX100_STATUS_EDGE_POS_PTP(x) (((x) << 8) & GENMASK(11, 8)) -#define DEV_PCS_FX100_STATUS_EDGE_POS_PTP_M GENMASK(11, 8) -#define DEV_PCS_FX100_STATUS_EDGE_POS_PTP_X(x) (((x) & GENMASK(11, 8)) >> 8) -#define DEV_PCS_FX100_STATUS_PCS_ERROR_STICKY BIT(7) -#define DEV_PCS_FX100_STATUS_FEF_FOUND_STICKY BIT(6) -#define DEV_PCS_FX100_STATUS_SSD_ERROR_STICKY BIT(5) -#define DEV_PCS_FX100_STATUS_SYNC_LOST_STICKY BIT(4) -#define DEV_PCS_FX100_STATUS_FEF_STATUS BIT(2) -#define DEV_PCS_FX100_STATUS_SIGNAL_DETECT BIT(1) -#define DEV_PCS_FX100_STATUS_SYNC_STATUS BIT(0) - -#endif diff --git a/drivers/net/ethernet/mscc/ocelot_qsys.h b/drivers/net/ethernet/mscc/ocelot_qsys.h deleted file mode 100644 index d8c63aa761be..000000000000 --- a/drivers/net/ethernet/mscc/ocelot_qsys.h +++ /dev/null @@ -1,270 +0,0 @@ -/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ -/* - * Microsemi Ocelot Switch driver - * - * Copyright (c) 2017 Microsemi Corporation - */ - -#ifndef _MSCC_OCELOT_QSYS_H_ -#define _MSCC_OCELOT_QSYS_H_ - -#define QSYS_PORT_MODE_RSZ 0x4 - -#define QSYS_PORT_MODE_DEQUEUE_DIS BIT(1) -#define QSYS_PORT_MODE_DEQUEUE_LATE BIT(0) - -#define QSYS_SWITCH_PORT_MODE_RSZ 0x4 - -#define QSYS_SWITCH_PORT_MODE_PORT_ENA BIT(14) -#define QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG(x) (((x) << 11) & GENMASK(13, 11)) -#define QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG_M GENMASK(13, 11) -#define QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG_X(x) (((x) & GENMASK(13, 11)) >> 11) -#define QSYS_SWITCH_PORT_MODE_YEL_RSRVD BIT(10) -#define QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE BIT(9) -#define QSYS_SWITCH_PORT_MODE_TX_PFC_ENA(x) (((x) << 1) & GENMASK(8, 1)) -#define QSYS_SWITCH_PORT_MODE_TX_PFC_ENA_M GENMASK(8, 1) -#define QSYS_SWITCH_PORT_MODE_TX_PFC_ENA_X(x) (((x) & GENMASK(8, 1)) >> 1) -#define QSYS_SWITCH_PORT_MODE_TX_PFC_MODE BIT(0) - -#define QSYS_STAT_CNT_CFG_TX_GREEN_CNT_MODE BIT(5) -#define QSYS_STAT_CNT_CFG_TX_YELLOW_CNT_MODE BIT(4) -#define QSYS_STAT_CNT_CFG_DROP_GREEN_CNT_MODE BIT(3) -#define QSYS_STAT_CNT_CFG_DROP_YELLOW_CNT_MODE BIT(2) -#define QSYS_STAT_CNT_CFG_DROP_COUNT_ONCE BIT(1) -#define QSYS_STAT_CNT_CFG_DROP_COUNT_EGRESS BIT(0) - -#define QSYS_EEE_CFG_RSZ 0x4 - -#define QSYS_EEE_THRES_EEE_HIGH_BYTES(x) (((x) << 8) & GENMASK(15, 8)) -#define QSYS_EEE_THRES_EEE_HIGH_BYTES_M GENMASK(15, 8) -#define QSYS_EEE_THRES_EEE_HIGH_BYTES_X(x) (((x) & GENMASK(15, 8)) >> 8) -#define QSYS_EEE_THRES_EEE_HIGH_FRAMES(x) ((x) & GENMASK(7, 0)) -#define QSYS_EEE_THRES_EEE_HIGH_FRAMES_M GENMASK(7, 0) - -#define QSYS_SW_STATUS_RSZ 0x4 - -#define QSYS_EXT_CPU_CFG_EXT_CPU_PORT(x) (((x) << 8) & GENMASK(12, 8)) -#define QSYS_EXT_CPU_CFG_EXT_CPU_PORT_M GENMASK(12, 8) -#define QSYS_EXT_CPU_CFG_EXT_CPU_PORT_X(x) (((x) & GENMASK(12, 8)) >> 8) -#define QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK(x) ((x) & GENMASK(7, 0)) -#define QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M GENMASK(7, 0) - -#define QSYS_QMAP_GSZ 0x4 - -#define QSYS_QMAP_SE_BASE(x) (((x) << 5) & GENMASK(12, 5)) -#define QSYS_QMAP_SE_BASE_M GENMASK(12, 5) -#define QSYS_QMAP_SE_BASE_X(x) (((x) & GENMASK(12, 5)) >> 5) -#define QSYS_QMAP_SE_IDX_SEL(x) (((x) << 2) & GENMASK(4, 2)) -#define QSYS_QMAP_SE_IDX_SEL_M GENMASK(4, 2) -#define QSYS_QMAP_SE_IDX_SEL_X(x) (((x) & GENMASK(4, 2)) >> 2) -#define QSYS_QMAP_SE_INP_SEL(x) ((x) & GENMASK(1, 0)) -#define QSYS_QMAP_SE_INP_SEL_M GENMASK(1, 0) - -#define QSYS_ISDX_SGRP_GSZ 0x4 - -#define QSYS_TIMED_FRAME_ENTRY_GSZ 0x4 - -#define QSYS_TFRM_MISC_TIMED_CANCEL_SLOT(x) (((x) << 9) & GENMASK(18, 9)) -#define QSYS_TFRM_MISC_TIMED_CANCEL_SLOT_M GENMASK(18, 9) -#define QSYS_TFRM_MISC_TIMED_CANCEL_SLOT_X(x) (((x) & GENMASK(18, 9)) >> 9) -#define QSYS_TFRM_MISC_TIMED_CANCEL_1SHOT BIT(8) -#define QSYS_TFRM_MISC_TIMED_SLOT_MODE_MC BIT(7) -#define QSYS_TFRM_MISC_TIMED_ENTRY_FAST_CNT(x) ((x) & GENMASK(6, 0)) -#define QSYS_TFRM_MISC_TIMED_ENTRY_FAST_CNT_M GENMASK(6, 0) - -#define QSYS_RED_PROFILE_RSZ 0x4 - -#define QSYS_RED_PROFILE_WM_RED_LOW(x) (((x) << 8) & GENMASK(15, 8)) -#define QSYS_RED_PROFILE_WM_RED_LOW_M GENMASK(15, 8) -#define QSYS_RED_PROFILE_WM_RED_LOW_X(x) (((x) & GENMASK(15, 8)) >> 8) -#define QSYS_RED_PROFILE_WM_RED_HIGH(x) ((x) & GENMASK(7, 0)) -#define QSYS_RED_PROFILE_WM_RED_HIGH_M GENMASK(7, 0) - -#define QSYS_RES_CFG_GSZ 0x8 - -#define QSYS_RES_STAT_GSZ 0x8 - -#define QSYS_RES_STAT_INUSE(x) (((x) << 12) & GENMASK(23, 12)) -#define QSYS_RES_STAT_INUSE_M GENMASK(23, 12) -#define QSYS_RES_STAT_INUSE_X(x) (((x) & GENMASK(23, 12)) >> 12) -#define QSYS_RES_STAT_MAXUSE(x) ((x) & GENMASK(11, 0)) -#define QSYS_RES_STAT_MAXUSE_M GENMASK(11, 0) - -#define QSYS_EVENTS_CORE_EV_FDC(x) (((x) << 2) & GENMASK(4, 2)) -#define QSYS_EVENTS_CORE_EV_FDC_M GENMASK(4, 2) -#define QSYS_EVENTS_CORE_EV_FDC_X(x) (((x) & GENMASK(4, 2)) >> 2) -#define QSYS_EVENTS_CORE_EV_FRD(x) ((x) & GENMASK(1, 0)) -#define QSYS_EVENTS_CORE_EV_FRD_M GENMASK(1, 0) - -#define QSYS_QMAXSDU_CFG_0_RSZ 0x4 - -#define QSYS_QMAXSDU_CFG_1_RSZ 0x4 - -#define QSYS_QMAXSDU_CFG_2_RSZ 0x4 - -#define QSYS_QMAXSDU_CFG_3_RSZ 0x4 - -#define QSYS_QMAXSDU_CFG_4_RSZ 0x4 - -#define QSYS_QMAXSDU_CFG_5_RSZ 0x4 - -#define QSYS_QMAXSDU_CFG_6_RSZ 0x4 - -#define QSYS_QMAXSDU_CFG_7_RSZ 0x4 - -#define QSYS_PREEMPTION_CFG_RSZ 0x4 - -#define QSYS_PREEMPTION_CFG_P_QUEUES(x) ((x) & GENMASK(7, 0)) -#define QSYS_PREEMPTION_CFG_P_QUEUES_M GENMASK(7, 0) -#define QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE(x) (((x) << 8) & GENMASK(9, 8)) -#define QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE_M GENMASK(9, 8) -#define QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE_X(x) (((x) & GENMASK(9, 8)) >> 8) -#define QSYS_PREEMPTION_CFG_STRICT_IPG(x) (((x) << 12) & GENMASK(13, 12)) -#define QSYS_PREEMPTION_CFG_STRICT_IPG_M GENMASK(13, 12) -#define QSYS_PREEMPTION_CFG_STRICT_IPG_X(x) (((x) & GENMASK(13, 12)) >> 12) -#define QSYS_PREEMPTION_CFG_HOLD_ADVANCE(x) (((x) << 16) & GENMASK(31, 16)) -#define QSYS_PREEMPTION_CFG_HOLD_ADVANCE_M GENMASK(31, 16) -#define QSYS_PREEMPTION_CFG_HOLD_ADVANCE_X(x) (((x) & GENMASK(31, 16)) >> 16) - -#define QSYS_CIR_CFG_GSZ 0x80 - -#define QSYS_CIR_CFG_CIR_RATE(x) (((x) << 6) & GENMASK(20, 6)) -#define QSYS_CIR_CFG_CIR_RATE_M GENMASK(20, 6) -#define QSYS_CIR_CFG_CIR_RATE_X(x) (((x) & GENMASK(20, 6)) >> 6) -#define QSYS_CIR_CFG_CIR_BURST(x) ((x) & GENMASK(5, 0)) -#define QSYS_CIR_CFG_CIR_BURST_M GENMASK(5, 0) - -#define QSYS_EIR_CFG_GSZ 0x80 - -#define QSYS_EIR_CFG_EIR_RATE(x) (((x) << 7) & GENMASK(21, 7)) -#define QSYS_EIR_CFG_EIR_RATE_M GENMASK(21, 7) -#define QSYS_EIR_CFG_EIR_RATE_X(x) (((x) & GENMASK(21, 7)) >> 7) -#define QSYS_EIR_CFG_EIR_BURST(x) (((x) << 1) & GENMASK(6, 1)) -#define QSYS_EIR_CFG_EIR_BURST_M GENMASK(6, 1) -#define QSYS_EIR_CFG_EIR_BURST_X(x) (((x) & GENMASK(6, 1)) >> 1) -#define QSYS_EIR_CFG_EIR_MARK_ENA BIT(0) - -#define QSYS_SE_CFG_GSZ 0x80 - -#define QSYS_SE_CFG_SE_DWRR_CNT(x) (((x) << 6) & GENMASK(9, 6)) -#define QSYS_SE_CFG_SE_DWRR_CNT_M GENMASK(9, 6) -#define QSYS_SE_CFG_SE_DWRR_CNT_X(x) (((x) & GENMASK(9, 6)) >> 6) -#define QSYS_SE_CFG_SE_RR_ENA BIT(5) -#define QSYS_SE_CFG_SE_AVB_ENA BIT(4) -#define QSYS_SE_CFG_SE_FRM_MODE(x) (((x) << 2) & GENMASK(3, 2)) -#define QSYS_SE_CFG_SE_FRM_MODE_M GENMASK(3, 2) -#define QSYS_SE_CFG_SE_FRM_MODE_X(x) (((x) & GENMASK(3, 2)) >> 2) -#define QSYS_SE_CFG_SE_EXC_ENA BIT(1) -#define QSYS_SE_CFG_SE_EXC_FWD BIT(0) - -#define QSYS_SE_DWRR_CFG_GSZ 0x80 -#define QSYS_SE_DWRR_CFG_RSZ 0x4 - -#define QSYS_SE_CONNECT_GSZ 0x80 - -#define QSYS_SE_CONNECT_SE_OUTP_IDX(x) (((x) << 17) & GENMASK(24, 17)) -#define QSYS_SE_CONNECT_SE_OUTP_IDX_M GENMASK(24, 17) -#define QSYS_SE_CONNECT_SE_OUTP_IDX_X(x) (((x) & GENMASK(24, 17)) >> 17) -#define QSYS_SE_CONNECT_SE_INP_IDX(x) (((x) << 9) & GENMASK(16, 9)) -#define QSYS_SE_CONNECT_SE_INP_IDX_M GENMASK(16, 9) -#define QSYS_SE_CONNECT_SE_INP_IDX_X(x) (((x) & GENMASK(16, 9)) >> 9) -#define QSYS_SE_CONNECT_SE_OUTP_CON(x) (((x) << 5) & GENMASK(8, 5)) -#define QSYS_SE_CONNECT_SE_OUTP_CON_M GENMASK(8, 5) -#define QSYS_SE_CONNECT_SE_OUTP_CON_X(x) (((x) & GENMASK(8, 5)) >> 5) -#define QSYS_SE_CONNECT_SE_INP_CNT(x) (((x) << 1) & GENMASK(4, 1)) -#define QSYS_SE_CONNECT_SE_INP_CNT_M GENMASK(4, 1) -#define QSYS_SE_CONNECT_SE_INP_CNT_X(x) (((x) & GENMASK(4, 1)) >> 1) -#define QSYS_SE_CONNECT_SE_TERMINAL BIT(0) - -#define QSYS_SE_DLB_SENSE_GSZ 0x80 - -#define QSYS_SE_DLB_SENSE_SE_DLB_PRIO(x) (((x) << 11) & GENMASK(13, 11)) -#define QSYS_SE_DLB_SENSE_SE_DLB_PRIO_M GENMASK(13, 11) -#define QSYS_SE_DLB_SENSE_SE_DLB_PRIO_X(x) (((x) & GENMASK(13, 11)) >> 11) -#define QSYS_SE_DLB_SENSE_SE_DLB_SPORT(x) (((x) << 7) & GENMASK(10, 7)) -#define QSYS_SE_DLB_SENSE_SE_DLB_SPORT_M GENMASK(10, 7) -#define QSYS_SE_DLB_SENSE_SE_DLB_SPORT_X(x) (((x) & GENMASK(10, 7)) >> 7) -#define QSYS_SE_DLB_SENSE_SE_DLB_DPORT(x) (((x) << 3) & GENMASK(6, 3)) -#define QSYS_SE_DLB_SENSE_SE_DLB_DPORT_M GENMASK(6, 3) -#define QSYS_SE_DLB_SENSE_SE_DLB_DPORT_X(x) (((x) & GENMASK(6, 3)) >> 3) -#define QSYS_SE_DLB_SENSE_SE_DLB_PRIO_ENA BIT(2) -#define QSYS_SE_DLB_SENSE_SE_DLB_SPORT_ENA BIT(1) -#define QSYS_SE_DLB_SENSE_SE_DLB_DPORT_ENA BIT(0) - -#define QSYS_CIR_STATE_GSZ 0x80 - -#define QSYS_CIR_STATE_CIR_LVL(x) (((x) << 4) & GENMASK(25, 4)) -#define QSYS_CIR_STATE_CIR_LVL_M GENMASK(25, 4) -#define QSYS_CIR_STATE_CIR_LVL_X(x) (((x) & GENMASK(25, 4)) >> 4) -#define QSYS_CIR_STATE_SHP_TIME(x) ((x) & GENMASK(3, 0)) -#define QSYS_CIR_STATE_SHP_TIME_M GENMASK(3, 0) - -#define QSYS_EIR_STATE_GSZ 0x80 - -#define QSYS_SE_STATE_GSZ 0x80 - -#define QSYS_SE_STATE_SE_OUTP_LVL(x) (((x) << 1) & GENMASK(2, 1)) -#define QSYS_SE_STATE_SE_OUTP_LVL_M GENMASK(2, 1) -#define QSYS_SE_STATE_SE_OUTP_LVL_X(x) (((x) & GENMASK(2, 1)) >> 1) -#define QSYS_SE_STATE_SE_WAS_YEL BIT(0) - -#define QSYS_HSCH_MISC_CFG_SE_CONNECT_VLD BIT(8) -#define QSYS_HSCH_MISC_CFG_FRM_ADJ(x) (((x) << 3) & GENMASK(7, 3)) -#define QSYS_HSCH_MISC_CFG_FRM_ADJ_M GENMASK(7, 3) -#define QSYS_HSCH_MISC_CFG_FRM_ADJ_X(x) (((x) & GENMASK(7, 3)) >> 3) -#define QSYS_HSCH_MISC_CFG_LEAK_DIS BIT(2) -#define QSYS_HSCH_MISC_CFG_QSHP_EXC_ENA BIT(1) -#define QSYS_HSCH_MISC_CFG_PFC_BYP_UPD BIT(0) - -#define QSYS_TAG_CONFIG_RSZ 0x4 - -#define QSYS_TAG_CONFIG_ENABLE BIT(0) -#define QSYS_TAG_CONFIG_LINK_SPEED(x) (((x) << 4) & GENMASK(5, 4)) -#define QSYS_TAG_CONFIG_LINK_SPEED_M GENMASK(5, 4) -#define QSYS_TAG_CONFIG_LINK_SPEED_X(x) (((x) & GENMASK(5, 4)) >> 4) -#define QSYS_TAG_CONFIG_INIT_GATE_STATE(x) (((x) << 8) & GENMASK(15, 8)) -#define QSYS_TAG_CONFIG_INIT_GATE_STATE_M GENMASK(15, 8) -#define QSYS_TAG_CONFIG_INIT_GATE_STATE_X(x) (((x) & GENMASK(15, 8)) >> 8) -#define QSYS_TAG_CONFIG_SCH_TRAFFIC_QUEUES(x) (((x) << 16) & GENMASK(23, 16)) -#define QSYS_TAG_CONFIG_SCH_TRAFFIC_QUEUES_M GENMASK(23, 16) -#define QSYS_TAG_CONFIG_SCH_TRAFFIC_QUEUES_X(x) (((x) & GENMASK(23, 16)) >> 16) - -#define QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(x) ((x) & GENMASK(7, 0)) -#define QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM_M GENMASK(7, 0) -#define QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q BIT(8) -#define QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE BIT(16) - -#define QSYS_PORT_MAX_SDU_RSZ 0x4 - -#define QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB(x) ((x) & GENMASK(15, 0)) -#define QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB_M GENMASK(15, 0) -#define QSYS_PARAM_CFG_REG_3_LIST_LENGTH(x) (((x) << 16) & GENMASK(31, 16)) -#define QSYS_PARAM_CFG_REG_3_LIST_LENGTH_M GENMASK(31, 16) -#define QSYS_PARAM_CFG_REG_3_LIST_LENGTH_X(x) (((x) & GENMASK(31, 16)) >> 16) - -#define QSYS_GCL_CFG_REG_1_GCL_ENTRY_NUM(x) ((x) & GENMASK(5, 0)) -#define QSYS_GCL_CFG_REG_1_GCL_ENTRY_NUM_M GENMASK(5, 0) -#define QSYS_GCL_CFG_REG_1_GATE_STATE(x) (((x) << 8) & GENMASK(15, 8)) -#define QSYS_GCL_CFG_REG_1_GATE_STATE_M GENMASK(15, 8) -#define QSYS_GCL_CFG_REG_1_GATE_STATE_X(x) (((x) & GENMASK(15, 8)) >> 8) - -#define QSYS_PARAM_STATUS_REG_3_BASE_TIME_SEC_MSB(x) ((x) & GENMASK(15, 0)) -#define QSYS_PARAM_STATUS_REG_3_BASE_TIME_SEC_MSB_M GENMASK(15, 0) -#define QSYS_PARAM_STATUS_REG_3_LIST_LENGTH(x) (((x) << 16) & GENMASK(31, 16)) -#define QSYS_PARAM_STATUS_REG_3_LIST_LENGTH_M GENMASK(31, 16) -#define QSYS_PARAM_STATUS_REG_3_LIST_LENGTH_X(x) (((x) & GENMASK(31, 16)) >> 16) - -#define QSYS_PARAM_STATUS_REG_8_CFG_CHG_TIME_SEC_MSB(x) ((x) & GENMASK(15, 0)) -#define QSYS_PARAM_STATUS_REG_8_CFG_CHG_TIME_SEC_MSB_M GENMASK(15, 0) -#define QSYS_PARAM_STATUS_REG_8_OPER_GATE_STATE(x) (((x) << 16) & GENMASK(23, 16)) -#define QSYS_PARAM_STATUS_REG_8_OPER_GATE_STATE_M GENMASK(23, 16) -#define QSYS_PARAM_STATUS_REG_8_OPER_GATE_STATE_X(x) (((x) & GENMASK(23, 16)) >> 16) -#define QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING BIT(24) - -#define QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM(x) ((x) & GENMASK(5, 0)) -#define QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM_M GENMASK(5, 0) -#define QSYS_GCL_STATUS_REG_1_GATE_STATE(x) (((x) << 8) & GENMASK(15, 8)) -#define QSYS_GCL_STATUS_REG_1_GATE_STATE_M GENMASK(15, 8) -#define QSYS_GCL_STATUS_REG_1_GATE_STATE_X(x) (((x) & GENMASK(15, 8)) >> 8) - -#endif diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index c979f38a2e0c..2ee0d0be113a 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -2892,7 +2892,7 @@ drop: static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev) { - struct sk_buff *segs, *curr; + struct sk_buff *segs, *curr, *next; struct myri10ge_priv *mgp = netdev_priv(dev); struct myri10ge_slice_state *ss; netdev_tx_t status; @@ -2901,10 +2901,8 @@ static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, if (IS_ERR(segs)) goto drop; - while (segs) { - curr = segs; - segs = segs->next; - curr->next = NULL; + skb_list_walk_safe(segs, curr, next) { + skb_mark_not_on_list(curr); status = myri10ge_xmit(curr, dev); if (status != 0) { dev_kfree_skb_any(curr); diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 1a2634cbbb69..d21d706b83a7 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -612,7 +612,7 @@ static void undo_cable_magic(struct net_device *dev); static void check_link(struct net_device *dev); static void netdev_timer(struct timer_list *t); static void dump_ring(struct net_device *dev); -static void ns_tx_timeout(struct net_device *dev); +static void ns_tx_timeout(struct net_device *dev, unsigned int txqueue); static int alloc_ring(struct net_device *dev); static void refill_rx(struct net_device *dev); static void init_ring(struct net_device *dev); @@ -1881,7 +1881,7 @@ static void dump_ring(struct net_device *dev) } } -static void ns_tx_timeout(struct net_device *dev) +static void ns_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem * ioaddr = ns_ioaddr(dev); diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 6af9a7eee114..be5f62f06785 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1549,7 +1549,7 @@ static int ns83820_stop(struct net_device *ndev) return 0; } -static void ns83820_tx_timeout(struct net_device *ndev) +static void ns83820_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ns83820 *dev = PRIV(ndev); u32 tx_done_idx; @@ -1603,7 +1603,7 @@ static void ns83820_tx_watch(struct timer_list *t) ndev->name, dev->tx_done_idx, dev->tx_free_idx, atomic_read(&dev->nr_tx_skbs)); - ns83820_tx_timeout(ndev); + ns83820_tx_timeout(ndev, UINT_MAX); } mod_timer(&dev->tx_watchdog, jiffies + 2*HZ); diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index b339125b2f09..fdebc8598b22 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -161,7 +161,7 @@ static int sonic_close(struct net_device *dev) return 0; } -static void sonic_tx_timeout(struct net_device *dev) +static void sonic_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sonic_local *lp = netdev_priv(dev); int i; diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index 2b27f7049acb..f1544481aac1 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -336,7 +336,7 @@ static int sonic_close(struct net_device *dev); static struct net_device_stats *sonic_get_stats(struct net_device *dev); static void sonic_multicast_list(struct net_device *dev); static int sonic_init(struct net_device *dev); -static void sonic_tx_timeout(struct net_device *dev); +static void sonic_tx_timeout(struct net_device *dev, unsigned int txqueue); static void sonic_msg_init(struct net_device *dev); /* Internal inlines for reading/writing DMA buffers. Note that bus diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index e0b2bf327905..0ec6b8e8b549 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -7238,7 +7238,7 @@ out_unlock: * void */ -static void s2io_tx_watchdog(struct net_device *dev) +static void s2io_tx_watchdog(struct net_device *dev, unsigned int txqueue) { struct s2io_nic *sp = netdev_priv(dev); struct swStat *swstats = &sp->mac_control.stats_info->sw_stat; diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h index 0a921f30f98f..6fa3159a977f 100644 --- a/drivers/net/ethernet/neterion/s2io.h +++ b/drivers/net/ethernet/neterion/s2io.h @@ -1065,7 +1065,7 @@ static void s2io_txpic_intr_handle(struct s2io_nic *sp); static void tx_intr_handler(struct fifo_info *fifo_data); static void s2io_handle_errors(void * dev_id); -static void s2io_tx_watchdog(struct net_device *dev); +static void s2io_tx_watchdog(struct net_device *dev, unsigned int txqueue); static void s2io_set_multicast(struct net_device *dev); static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp); static void s2io_link(struct s2io_nic * sp, int link); diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 1d334f2e0a56..9b63574b6202 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3273,7 +3273,7 @@ static int vxge_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) * This function is triggered if the Tx Queue is stopped * for a pre-defined amount of time when the Interface is still up. */ -static void vxge_tx_watchdog(struct net_device *dev) +static void vxge_tx_watchdog(struct net_device *dev, unsigned int txqueue) { struct vxgedev *vdev; diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig index bac5be4d4f43..a3f68a718813 100644 --- a/drivers/net/ethernet/netronome/Kconfig +++ b/drivers/net/ethernet/netronome/Kconfig @@ -31,6 +31,7 @@ config NFP_APP_FLOWER bool "NFP4000/NFP6000 TC Flower offload support" depends on NFP depends on NET_SWITCHDEV + depends on IPV6!=m || NFP=m default y ---help--- Enable driver support for TC Flower offload on NFP4000 and NFP6000. diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c index 9f8a1f69c0c4..23ebddfb9532 100644 --- a/drivers/net/ethernet/netronome/nfp/abm/cls.c +++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c @@ -176,10 +176,8 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, u8 mask, val; int err; - if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) { - err = -EOPNOTSUPP; + if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) goto err_delete; - } tos_off = proto == htons(ETH_P_IP) ? 16 : 20; @@ -200,18 +198,14 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, if ((iter->val & cmask) == (val & cmask) && iter->band != knode->res->classid) { NL_SET_ERR_MSG_MOD(extack, "conflict with already offloaded filter"); - err = -EOPNOTSUPP; goto err_delete; } } if (!match) { match = kzalloc(sizeof(*match), GFP_KERNEL); - if (!match) { - err = -ENOMEM; - goto err_delete; - } - + if (!match) + return -ENOMEM; list_add(&match->list, &alink->dscp_map); } match->handle = knode->handle; @@ -227,7 +221,7 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, err_delete: nfp_abm_u32_knode_delete(alink, knode); - return err; + return -EOPNOTSUPP; } static int nfp_abm_setup_tc_block_cb(enum tc_setup_type type, diff --git a/drivers/net/ethernet/netronome/nfp/ccm.h b/drivers/net/ethernet/netronome/nfp/ccm.h index a460c75522be..d81d450be50e 100644 --- a/drivers/net/ethernet/netronome/nfp/ccm.h +++ b/drivers/net/ethernet/netronome/nfp/ccm.h @@ -26,6 +26,7 @@ enum nfp_ccm_type { NFP_CCM_TYPE_CRYPTO_ADD = 10, NFP_CCM_TYPE_CRYPTO_DEL = 11, NFP_CCM_TYPE_CRYPTO_UPDATE = 12, + NFP_CCM_TYPE_CRYPTO_RESYNC = 13, __NFP_CCM_TYPE_MAX, }; diff --git a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h index 60372ddf69f0..bffe58bb2f27 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h +++ b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h @@ -4,6 +4,10 @@ #ifndef NFP_CRYPTO_H #define NFP_CRYPTO_H 1 +struct net_device; +struct nfp_net; +struct nfp_net_tls_resync_req; + struct nfp_net_tls_offload_ctx { __be32 fw_handle[2]; @@ -17,11 +21,22 @@ struct nfp_net_tls_offload_ctx { #ifdef CONFIG_TLS_DEVICE int nfp_net_tls_init(struct nfp_net *nn); +int nfp_net_tls_rx_resync_req(struct net_device *netdev, + struct nfp_net_tls_resync_req *req, + void *pkt, unsigned int pkt_len); #else static inline int nfp_net_tls_init(struct nfp_net *nn) { return 0; } + +static inline int +nfp_net_tls_rx_resync_req(struct net_device *netdev, + struct nfp_net_tls_resync_req *req, + void *pkt, unsigned int pkt_len) +{ + return -EOPNOTSUPP; +} #endif #endif diff --git a/drivers/net/ethernet/netronome/nfp/crypto/fw.h b/drivers/net/ethernet/netronome/nfp/crypto/fw.h index 67413d946c4a..8d1458896bcb 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/fw.h +++ b/drivers/net/ethernet/netronome/nfp/crypto/fw.h @@ -9,6 +9,14 @@ #define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC 0 #define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC 1 +struct nfp_net_tls_resync_req { + __be32 fw_handle[2]; + __be32 tcp_seq; + u8 l3_offset; + u8 l4_offset; + u8 resv[2]; +}; + struct nfp_crypto_reply_simple { struct nfp_ccm_hdr hdr; __be32 error; diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c index 96a96b35c0ca..7c50e3dfb9d5 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/tls.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c @@ -5,6 +5,7 @@ #include <linux/ipv6.h> #include <linux/skbuff.h> #include <linux/string.h> +#include <net/inet6_hashtables.h> #include <net/tls.h> #include "../ccm.h" @@ -391,8 +392,9 @@ nfp_net_tls_add(struct net_device *netdev, struct sock *sk, if (direction == TLS_OFFLOAD_CTX_DIR_TX) return 0; - tls_offload_rx_resync_set_type(sk, - TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT); + if (!nn->tlv_caps.tls_resync_ss) + tls_offload_rx_resync_set_type(sk, TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT); + return 0; err_fw_remove: @@ -424,6 +426,7 @@ nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq, struct nfp_net *nn = netdev_priv(netdev); struct nfp_net_tls_offload_ctx *ntls; struct nfp_crypto_req_update *req; + enum nfp_ccm_type type; struct sk_buff *skb; gfp_t flags; int err; @@ -442,15 +445,18 @@ nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq, req->tcp_seq = cpu_to_be32(seq); memcpy(req->rec_no, rcd_sn, sizeof(req->rec_no)); + type = NFP_CCM_TYPE_CRYPTO_UPDATE; if (direction == TLS_OFFLOAD_CTX_DIR_TX) { - err = nfp_net_tls_communicate_simple(nn, skb, "sync", - NFP_CCM_TYPE_CRYPTO_UPDATE); + err = nfp_net_tls_communicate_simple(nn, skb, "sync", type); if (err) return err; ntls->next_seq = seq; } else { - nfp_ccm_mbox_post(nn, skb, NFP_CCM_TYPE_CRYPTO_UPDATE, + if (nn->tlv_caps.tls_resync_ss) + type = NFP_CCM_TYPE_CRYPTO_RESYNC; + nfp_ccm_mbox_post(nn, skb, type, sizeof(struct nfp_crypto_reply_simple)); + atomic_inc(&nn->ktls_rx_resync_sent); } return 0; @@ -462,6 +468,79 @@ static const struct tlsdev_ops nfp_net_tls_ops = { .tls_dev_resync = nfp_net_tls_resync, }; +int nfp_net_tls_rx_resync_req(struct net_device *netdev, + struct nfp_net_tls_resync_req *req, + void *pkt, unsigned int pkt_len) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_tls_offload_ctx *ntls; + struct ipv6hdr *ipv6h; + struct tcphdr *th; + struct iphdr *iph; + struct sock *sk; + __be32 tcp_seq; + int err; + + iph = pkt + req->l3_offset; + ipv6h = pkt + req->l3_offset; + th = pkt + req->l4_offset; + + if ((u8 *)&th[1] > (u8 *)pkt + pkt_len) { + netdev_warn_once(netdev, "invalid TLS RX resync request (l3_off: %hhu l4_off: %hhu pkt_len: %u)\n", + req->l3_offset, req->l4_offset, pkt_len); + err = -EINVAL; + goto err_cnt_ign; + } + + switch (iph->version) { + case 4: + sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo, + iph->saddr, th->source, iph->daddr, + th->dest, netdev->ifindex); + break; +#if IS_ENABLED(CONFIG_IPV6) + case 6: + sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo, + &ipv6h->saddr, th->source, + &ipv6h->daddr, ntohs(th->dest), + netdev->ifindex, 0); + break; +#endif + default: + netdev_warn_once(netdev, "invalid TLS RX resync request (l3_off: %hhu l4_off: %hhu ipver: %u)\n", + req->l3_offset, req->l4_offset, iph->version); + err = -EINVAL; + goto err_cnt_ign; + } + + err = 0; + if (!sk) + goto err_cnt_ign; + if (!tls_is_sk_rx_device_offloaded(sk) || + sk->sk_shutdown & RCV_SHUTDOWN) + goto err_put_sock; + + ntls = tls_driver_ctx(sk, TLS_OFFLOAD_CTX_DIR_RX); + /* some FW versions can't report the handle and report 0s */ + if (memchr_inv(&req->fw_handle, 0, sizeof(req->fw_handle)) && + memcmp(&req->fw_handle, &ntls->fw_handle, sizeof(ntls->fw_handle))) + goto err_put_sock; + + /* copy to ensure alignment */ + memcpy(&tcp_seq, &req->tcp_seq, sizeof(tcp_seq)); + tls_offload_rx_resync_request(sk, tcp_seq); + atomic_inc(&nn->ktls_rx_resync_req); + + sock_gen_put(sk); + return 0; + +err_put_sock: + sock_gen_put(sk); +err_cnt_ign: + atomic_inc(&nn->ktls_rx_resync_ign); + return err; +} + static int nfp_net_tls_reset(struct nfp_net *nn) { struct nfp_crypto_req_reset *req; diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 1b019fdfcd97..c06600fb47ff 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -22,8 +22,9 @@ #define NFP_FL_TUNNEL_CSUM cpu_to_be16(0x01) #define NFP_FL_TUNNEL_KEY cpu_to_be16(0x04) #define NFP_FL_TUNNEL_GENEVE_OPT cpu_to_be16(0x0800) -#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS IP_TUNNEL_INFO_TX -#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \ +#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS (IP_TUNNEL_INFO_TX | \ + IP_TUNNEL_INFO_IPV6) +#define NFP_FL_SUPPORTED_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \ NFP_FL_TUNNEL_KEY | \ NFP_FL_TUNNEL_GENEVE_OPT) @@ -394,19 +395,26 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, } static int -nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun, - const struct flow_action_entry *act, - struct nfp_fl_pre_tunnel *pre_tun, - enum nfp_flower_tun_type tun_type, - struct net_device *netdev, struct netlink_ext_ack *extack) +nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, + const struct flow_action_entry *act, + struct nfp_fl_pre_tunnel *pre_tun, + enum nfp_flower_tun_type tun_type, + struct net_device *netdev, struct netlink_ext_ack *extack) { - size_t act_size = sizeof(struct nfp_fl_set_ipv4_tun); const struct ip_tunnel_info *ip_tun = act->tunnel; + bool ipv6 = ip_tunnel_info_af(ip_tun) == AF_INET6; + size_t act_size = sizeof(struct nfp_fl_set_tun); struct nfp_flower_priv *priv = app->priv; u32 tmp_set_ip_tun_type_index = 0; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; + if (!IS_ENABLED(CONFIG_IPV6) && ipv6) + return -EOPNOTSUPP; + + if (ipv6 && !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) + return -EOPNOTSUPP; + BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM || NFP_FL_TUNNEL_KEY != TUNNEL_KEY || NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT); @@ -417,19 +425,35 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun, return -EOPNOTSUPP; } - set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; + set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_TUNNEL; set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; /* Set tunnel type and pre-tunnel index. */ tmp_set_ip_tun_type_index |= - FIELD_PREP(NFP_FL_IPV4_TUNNEL_TYPE, tun_type) | - FIELD_PREP(NFP_FL_IPV4_PRE_TUN_INDEX, pretun_idx); + FIELD_PREP(NFP_FL_TUNNEL_TYPE, tun_type) | + FIELD_PREP(NFP_FL_PRE_TUN_INDEX, pretun_idx); set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); set_tun->tun_id = ip_tun->key.tun_id; if (ip_tun->key.ttl) { set_tun->ttl = ip_tun->key.ttl; +#ifdef CONFIG_IPV6 + } else if (ipv6) { + struct net *net = dev_net(netdev); + struct flowi6 flow = {}; + struct dst_entry *dst; + + flow.daddr = ip_tun->key.u.ipv6.dst; + flow.flowi4_proto = IPPROTO_UDP; + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow, NULL); + if (!IS_ERR(dst)) { + set_tun->ttl = ip6_dst_hoplimit(dst); + dst_release(dst); + } else { + set_tun->ttl = net->ipv6.devconf_all->hop_limit; + } +#endif } else { struct net *net = dev_net(netdev); struct flowi4 flow = {}; @@ -455,7 +479,7 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun, set_tun->tos = ip_tun->key.tos; if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) || - ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) { + ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_UDP_TUN_FLAGS) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support tunnel flag offload"); return -EOPNOTSUPP; } @@ -467,7 +491,12 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun, } /* Complete pre_tunnel action. */ - pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; + if (ipv6) { + pre_tun->flags |= cpu_to_be16(NFP_FL_PRE_TUN_IPV6); + pre_tun->ipv6_dst = ip_tun->key.u.ipv6.dst; + } else { + pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; + } return 0; } @@ -956,8 +985,8 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, struct nfp_flower_pedit_acts *set_act, bool *pkt_host, struct netlink_ext_ack *extack, int act_idx) { - struct nfp_fl_set_ipv4_tun *set_tun; struct nfp_fl_pre_tunnel *pre_tun; + struct nfp_fl_set_tun *set_tun; struct nfp_fl_push_vlan *psh_v; struct nfp_fl_push_mpls *psh_m; struct nfp_fl_pop_vlan *pop_v; @@ -1032,7 +1061,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, * If none, the packet falls back before applying other actions. */ if (*a_len + sizeof(struct nfp_fl_pre_tunnel) + - sizeof(struct nfp_fl_set_ipv4_tun) > NFP_FL_MAX_A_SIZ) { + sizeof(struct nfp_fl_set_tun) > NFP_FL_MAX_A_SIZ) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at tunnel encap"); return -EOPNOTSUPP; } @@ -1046,11 +1075,11 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, return err; set_tun = (void *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_set_ipv4_tun(app, set_tun, act, pre_tun, - *tun_type, netdev, extack); + err = nfp_fl_set_tun(app, set_tun, act, pre_tun, *tun_type, + netdev, extack); if (err) return err; - *a_len += sizeof(struct nfp_fl_set_ipv4_tun); + *a_len += sizeof(struct nfp_fl_set_tun); } break; case FLOW_ACTION_TUNNEL_DECAP: diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 05981b54eaab..a595ddb92bff 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -270,11 +270,17 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) } goto err_default; case NFP_FLOWER_CMSG_TYPE_NO_NEIGH: - nfp_tunnel_request_route(app, skb); + nfp_tunnel_request_route_v4(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6: + nfp_tunnel_request_route_v6(app, skb); break; case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS: nfp_tunnel_keep_alive(app, skb); break; + case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6: + nfp_tunnel_keep_alive_v6(app, skb); + break; case NFP_FLOWER_CMSG_TYPE_QOS_STATS: nfp_flower_stats_rlim_reply(app, skb); break; @@ -361,7 +367,8 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb) nfp_flower_process_mtu_ack(app, skb)) { /* Handle MTU acks outside wq to prevent RTNL conflict. */ dev_consume_skb_any(skb); - } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH) { + } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH || + cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6) { /* Acks from the NFP that the route is added - ignore. */ dev_consume_skb_any(skb); } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 7eb2ec8969c3..9b50d76bbc09 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -26,6 +26,7 @@ #define NFP_FLOWER_LAYER2_GRE BIT(0) #define NFP_FLOWER_LAYER2_GENEVE BIT(5) #define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) +#define NFP_FLOWER_LAYER2_TUN_IPV6 BIT(7) #define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) #define NFP_FLOWER_MASK_VLAN_PRESENT BIT(12) @@ -63,6 +64,7 @@ #define NFP_FL_MAX_GENEVE_OPT_ACT 32 #define NFP_FL_MAX_GENEVE_OPT_CNT 64 #define NFP_FL_MAX_GENEVE_OPT_KEY 32 +#define NFP_FL_MAX_GENEVE_OPT_KEY_V6 8 /* Action opcodes */ #define NFP_FL_ACTION_OPCODE_OUTPUT 0 @@ -70,7 +72,7 @@ #define NFP_FL_ACTION_OPCODE_POP_VLAN 2 #define NFP_FL_ACTION_OPCODE_PUSH_MPLS 3 #define NFP_FL_ACTION_OPCODE_POP_MPLS 4 -#define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL 6 +#define NFP_FL_ACTION_OPCODE_SET_TUNNEL 6 #define NFP_FL_ACTION_OPCODE_SET_ETHERNET 7 #define NFP_FL_ACTION_OPCODE_SET_MPLS 8 #define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS 9 @@ -99,8 +101,8 @@ /* Tunnel ports */ #define NFP_FL_PORT_TYPE_TUN 0x50000000 -#define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4) -#define NFP_FL_IPV4_PRE_TUN_INDEX GENMASK(2, 0) +#define NFP_FL_TUNNEL_TYPE GENMASK(7, 4) +#define NFP_FL_PRE_TUN_INDEX GENMASK(2, 0) #define NFP_FLOWER_WORKQ_MAX_SKBS 30000 @@ -206,13 +208,16 @@ struct nfp_fl_pre_lag { struct nfp_fl_pre_tunnel { struct nfp_fl_act_head head; - __be16 reserved; - __be32 ipv4_dst; - /* reserved for use with IPv6 addresses */ - __be32 extra[3]; + __be16 flags; + union { + __be32 ipv4_dst; + struct in6_addr ipv6_dst; + }; }; -struct nfp_fl_set_ipv4_tun { +#define NFP_FL_PRE_TUN_IPV6 BIT(0) + +struct nfp_fl_set_tun { struct nfp_fl_act_head head; __be16 reserved; __be64 tun_id __packed; @@ -387,6 +392,11 @@ struct nfp_flower_tun_ipv4 { __be32 dst; }; +struct nfp_flower_tun_ipv6 { + struct in6_addr src; + struct in6_addr dst; +}; + struct nfp_flower_tun_ip_ext { u8 tos; u8 ttl; @@ -416,6 +426,42 @@ struct nfp_flower_ipv4_udp_tun { __be32 tun_id; }; +/* Flow Frame IPv6 UDP TUNNEL --> Tunnel details (11W/44B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VNI | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv6_udp_tun { + struct nfp_flower_tun_ipv6 ipv6; + __be16 reserved1; + struct nfp_flower_tun_ip_ext ip_ext; + __be32 reserved2; + __be32 tun_id; +}; + /* Flow Frame GRE TUNNEL --> Tunnel details (6W/24B) * ----------------------------------------------------------------- * 3 2 1 @@ -445,6 +491,46 @@ struct nfp_flower_ipv4_gre_tun { __be32 reserved2; }; +/* Flow Frame GRE TUNNEL V6 --> Tunnel details (12W/48B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | tun_flags | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | Ethertype | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Key | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv6_gre_tun { + struct nfp_flower_tun_ipv6 ipv6; + __be16 tun_flags; + struct nfp_flower_tun_ip_ext ip_ext; + __be16 reserved1; + __be16 ethertype; + __be32 tun_key; + __be32 reserved2; +}; + struct nfp_flower_geneve_options { u8 data[NFP_FL_MAX_GENEVE_OPT_KEY]; }; @@ -485,6 +571,10 @@ enum nfp_flower_cmsg_type_port { NFP_FLOWER_CMSG_TYPE_QOS_DEL = 19, NFP_FLOWER_CMSG_TYPE_QOS_STATS = 20, NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE = 21, + NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6 = 22, + NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6 = 23, + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 = 24, + NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6 = 25, NFP_FLOWER_CMSG_TYPE_MAX = 32, }; diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index e0c985fcaec1..d55d0d33bc45 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -43,6 +43,7 @@ struct nfp_app; #define NFP_FL_FEATS_VF_RLIM BIT(4) #define NFP_FL_FEATS_FLOW_MOD BIT(5) #define NFP_FL_FEATS_PRE_TUN_RULES BIT(6) +#define NFP_FL_FEATS_IPV6_TUN BIT(7) #define NFP_FL_FEATS_FLOW_MERGE BIT(30) #define NFP_FL_FEATS_LAG BIT(31) @@ -62,18 +63,26 @@ struct nfp_fl_stats_id { * struct nfp_fl_tunnel_offloads - priv data for tunnel offloads * @offloaded_macs: Hashtable of the offloaded MAC addresses * @ipv4_off_list: List of IPv4 addresses to offload - * @neigh_off_list: List of neighbour offloads + * @ipv6_off_list: List of IPv6 addresses to offload + * @neigh_off_list_v4: List of IPv4 neighbour offloads + * @neigh_off_list_v6: List of IPv6 neighbour offloads * @ipv4_off_lock: Lock for the IPv4 address list - * @neigh_off_lock: Lock for the neighbour address list + * @ipv6_off_lock: Lock for the IPv6 address list + * @neigh_off_lock_v4: Lock for the IPv4 neighbour address list + * @neigh_off_lock_v6: Lock for the IPv6 neighbour address list * @mac_off_ids: IDA to manage id assignment for offloaded MACs * @neigh_nb: Notifier to monitor neighbour state */ struct nfp_fl_tunnel_offloads { struct rhashtable offloaded_macs; struct list_head ipv4_off_list; - struct list_head neigh_off_list; + struct list_head ipv6_off_list; + struct list_head neigh_off_list_v4; + struct list_head neigh_off_list_v6; struct mutex ipv4_off_lock; - spinlock_t neigh_off_lock; + struct mutex ipv6_off_lock; + spinlock_t neigh_off_lock_v4; + spinlock_t neigh_off_lock_v6; struct ida mac_off_ids; struct notifier_block neigh_nb; }; @@ -273,12 +282,25 @@ struct nfp_fl_stats { u64 used; }; +/** + * struct nfp_ipv6_addr_entry - cached IPv6 addresses + * @ipv6_addr: IP address + * @ref_count: number of rules currently using this IP + * @list: list pointer + */ +struct nfp_ipv6_addr_entry { + struct in6_addr ipv6_addr; + int ref_count; + struct list_head list; +}; + struct nfp_fl_payload { struct nfp_fl_rule_metadata meta; unsigned long tc_flower_cookie; struct rhash_head fl_node; struct rcu_head rcu; __be32 nfp_tun_ipv4_addr; + struct nfp_ipv6_addr_entry *nfp_tun_ipv6; struct net_device *ingress_dev; char *unmasked_data; char *mask_data; @@ -396,8 +418,14 @@ int nfp_tunnel_mac_event_handler(struct nfp_app *app, unsigned long event, void *ptr); void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); -void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb); +void +nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry); +struct nfp_ipv6_addr_entry * +nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6); +void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb); void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb); void nfp_flower_lag_init(struct nfp_fl_lag *lag); void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag); int nfp_flower_lag_reset(struct nfp_fl_lag *lag); diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 9cc3ba17ff69..546bc01d507d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -10,9 +10,8 @@ static void nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext, struct nfp_flower_meta_tci *msk, - struct flow_cls_offload *flow, u8 key_type) + struct flow_rule *rule, u8 key_type) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); u16 tmp_tci; memset(ext, 0, sizeof(struct nfp_flower_meta_tci)); @@ -77,11 +76,8 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, static void nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, - struct nfp_flower_mac_mpls *msk, - struct flow_cls_offload *flow) + struct nfp_flower_mac_mpls *msk, struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - memset(ext, 0, sizeof(struct nfp_flower_mac_mpls)); memset(msk, 0, sizeof(struct nfp_flower_mac_mpls)); @@ -130,10 +126,8 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, static void nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, struct nfp_flower_tp_ports *msk, - struct flow_cls_offload *flow) + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - memset(ext, 0, sizeof(struct nfp_flower_tp_ports)); memset(msk, 0, sizeof(struct nfp_flower_tp_ports)); @@ -150,11 +144,8 @@ nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, static void nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, - struct nfp_flower_ip_ext *msk, - struct flow_cls_offload *flow) + struct nfp_flower_ip_ext *msk, struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; @@ -224,10 +215,8 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, static void nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, - struct nfp_flower_ipv4 *msk, - struct flow_cls_offload *flow) + struct nfp_flower_ipv4 *msk, struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct flow_match_ipv4_addrs match; memset(ext, 0, sizeof(struct nfp_flower_ipv4)); @@ -241,16 +230,13 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, msk->ipv4_dst = match.mask->dst; } - nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); + nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); } static void nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, - struct nfp_flower_ipv6 *msk, - struct flow_cls_offload *flow) + struct nfp_flower_ipv6 *msk, struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - memset(ext, 0, sizeof(struct nfp_flower_ipv6)); memset(msk, 0, sizeof(struct nfp_flower_ipv6)); @@ -264,16 +250,15 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, msk->ipv6_dst = match.mask->dst; } - nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); + nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); } static int -nfp_flower_compile_geneve_opt(void *ext, void *msk, - struct flow_cls_offload *flow) +nfp_flower_compile_geneve_opt(void *ext, void *msk, struct flow_rule *rule) { struct flow_match_enc_opts match; - flow_rule_match_enc_opts(flow->rule, &match); + flow_rule_match_enc_opts(rule, &match); memcpy(ext, match.key->data, match.key->len); memcpy(msk, match.mask->data, match.mask->len); @@ -283,10 +268,8 @@ nfp_flower_compile_geneve_opt(void *ext, void *msk, static void nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext, struct nfp_flower_tun_ipv4 *msk, - struct flow_cls_offload *flow) + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { struct flow_match_ipv4_addrs match; @@ -299,12 +282,26 @@ nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext, } static void +nfp_flower_compile_tun_ipv6_addrs(struct nfp_flower_tun_ipv6 *ext, + struct nfp_flower_tun_ipv6 *msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + ext->src = match.key->src; + ext->dst = match.key->dst; + msk->src = match.mask->src; + msk->dst = match.mask->dst; + } +} + +static void nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext, struct nfp_flower_tun_ip_ext *msk, - struct flow_cls_offload *flow) + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { struct flow_match_ip match; @@ -317,57 +314,97 @@ nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext, } static void -nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, - struct nfp_flower_ipv4_gre_tun *msk, - struct flow_cls_offload *flow) +nfp_flower_compile_tun_udp_key(__be32 *key, __be32 *key_msk, + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - - memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); - memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; + u32 vni; - /* NVGRE is the only supported GRE tunnel type */ - ext->ethertype = cpu_to_be16(ETH_P_TEB); - msk->ethertype = cpu_to_be16(~0); + flow_rule_match_enc_keyid(rule, &match); + vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET; + *key = cpu_to_be32(vni); + vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET; + *key_msk = cpu_to_be32(vni); + } +} +static void +nfp_flower_compile_tun_gre_key(__be32 *key, __be32 *key_msk, __be16 *flags, + __be16 *flags_msk, struct flow_rule *rule) +{ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { struct flow_match_enc_keyid match; flow_rule_match_enc_keyid(rule, &match); - ext->tun_key = match.key->keyid; - msk->tun_key = match.mask->keyid; + *key = match.key->keyid; + *key_msk = match.mask->keyid; - ext->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); - msk->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); + *flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); + *flags_msk = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); } +} + +static void +nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, + struct nfp_flower_ipv4_gre_tun *msk, + struct flow_rule *rule) +{ + memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); + memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); + + /* NVGRE is the only supported GRE tunnel type */ + ext->ethertype = cpu_to_be16(ETH_P_TEB); + msk->ethertype = cpu_to_be16(~0); - nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow); - nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); + nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key, + &ext->tun_flags, &msk->tun_flags, rule); } static void nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, struct nfp_flower_ipv4_udp_tun *msk, - struct flow_cls_offload *flow) + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_match_enc_keyid match; - u32 temp_vni; + nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); +} - flow_rule_match_enc_keyid(rule, &match); - temp_vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET; - ext->tun_id = cpu_to_be32(temp_vni); - temp_vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET; - msk->tun_id = cpu_to_be32(temp_vni); - } +static void +nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext, + struct nfp_flower_ipv6_udp_tun *msk, + struct flow_rule *rule) +{ + memset(ext, 0, sizeof(struct nfp_flower_ipv6_udp_tun)); + memset(msk, 0, sizeof(struct nfp_flower_ipv6_udp_tun)); + + nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); +} + +static void +nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext, + struct nfp_flower_ipv6_gre_tun *msk, + struct flow_rule *rule) +{ + memset(ext, 0, sizeof(struct nfp_flower_ipv6_gre_tun)); + memset(msk, 0, sizeof(struct nfp_flower_ipv6_gre_tun)); + + /* NVGRE is the only supported GRE tunnel type */ + ext->ethertype = cpu_to_be16(ETH_P_TEB); + msk->ethertype = cpu_to_be16(~0); - nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow); - nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); + nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key, + &ext->tun_flags, &msk->tun_flags, rule); } int nfp_flower_compile_flow_match(struct nfp_app *app, @@ -378,6 +415,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, enum nfp_flower_tun_type tun_type, struct netlink_ext_ack *extack) { + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); u32 port_id; int err; u8 *ext; @@ -393,7 +431,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)ext, (struct nfp_flower_meta_tci *)msk, - flow, key_ls->key_layer); + rule, key_ls->key_layer); ext += sizeof(struct nfp_flower_meta_tci); msk += sizeof(struct nfp_flower_meta_tci); @@ -425,7 +463,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) { nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext, (struct nfp_flower_mac_mpls *)msk, - flow); + rule); ext += sizeof(struct nfp_flower_mac_mpls); msk += sizeof(struct nfp_flower_mac_mpls); } @@ -433,7 +471,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) { nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext, (struct nfp_flower_tp_ports *)msk, - flow); + rule); ext += sizeof(struct nfp_flower_tp_ports); msk += sizeof(struct nfp_flower_tp_ports); } @@ -441,7 +479,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) { nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext, (struct nfp_flower_ipv4 *)msk, - flow); + rule); ext += sizeof(struct nfp_flower_ipv4); msk += sizeof(struct nfp_flower_ipv4); } @@ -449,43 +487,83 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) { nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext, (struct nfp_flower_ipv6 *)msk, - flow); + rule); ext += sizeof(struct nfp_flower_ipv6); msk += sizeof(struct nfp_flower_ipv6); } if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) { - __be32 tun_dst; - - nfp_flower_compile_ipv4_gre_tun((void *)ext, (void *)msk, flow); - tun_dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst; - ext += sizeof(struct nfp_flower_ipv4_gre_tun); - msk += sizeof(struct nfp_flower_ipv4_gre_tun); - - /* Store the tunnel destination in the rule data. - * This must be present and be an exact match. - */ - nfp_flow->nfp_tun_ipv4_addr = tun_dst; - nfp_tunnel_add_ipv4_off(app, tun_dst); + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_gre_tun *gre_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + nfp_flower_compile_ipv6_gre_tun((void *)ext, + (void *)msk, rule); + gre_match = (struct nfp_flower_ipv6_gre_tun *)ext; + dst = &gre_match->ipv6.dst; + ext += sizeof(struct nfp_flower_ipv6_gre_tun); + msk += sizeof(struct nfp_flower_ipv6_gre_tun); + + entry = nfp_tunnel_add_ipv6_off(app, dst); + if (!entry) + return -EOPNOTSUPP; + + nfp_flow->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + nfp_flower_compile_ipv4_gre_tun((void *)ext, + (void *)msk, rule); + dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst; + ext += sizeof(struct nfp_flower_ipv4_gre_tun); + msk += sizeof(struct nfp_flower_ipv4_gre_tun); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(app, dst); + } } if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN || key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { - __be32 tun_dst; - - nfp_flower_compile_ipv4_udp_tun((void *)ext, (void *)msk, flow); - tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst; - ext += sizeof(struct nfp_flower_ipv4_udp_tun); - msk += sizeof(struct nfp_flower_ipv4_udp_tun); - - /* Store the tunnel destination in the rule data. - * This must be present and be an exact match. - */ - nfp_flow->nfp_tun_ipv4_addr = tun_dst; - nfp_tunnel_add_ipv4_off(app, tun_dst); + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_udp_tun *udp_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + nfp_flower_compile_ipv6_udp_tun((void *)ext, + (void *)msk, rule); + udp_match = (struct nfp_flower_ipv6_udp_tun *)ext; + dst = &udp_match->ipv6.dst; + ext += sizeof(struct nfp_flower_ipv6_udp_tun); + msk += sizeof(struct nfp_flower_ipv6_udp_tun); + + entry = nfp_tunnel_add_ipv6_off(app, dst); + if (!entry) + return -EOPNOTSUPP; + + nfp_flow->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + nfp_flower_compile_ipv4_udp_tun((void *)ext, + (void *)msk, rule); + dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst; + ext += sizeof(struct nfp_flower_ipv4_udp_tun); + msk += sizeof(struct nfp_flower_ipv4_udp_tun); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(app, dst); + } if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { - err = nfp_flower_compile_geneve_opt(ext, msk, flow); + err = nfp_flower_compile_geneve_opt(ext, msk, rule); if (err) return err; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 987ae221f6be..7ca5c1becfcf 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -54,6 +54,10 @@ (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R \ + (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) + #define NFP_FLOWER_MERGE_FIELDS \ (NFP_FLOWER_LAYER_PORT | \ NFP_FLOWER_LAYER_MAC | \ @@ -64,7 +68,8 @@ #define NFP_FLOWER_PRE_TUN_RULE_FIELDS \ (NFP_FLOWER_LAYER_PORT | \ NFP_FLOWER_LAYER_MAC | \ - NFP_FLOWER_LAYER_IPV4) + NFP_FLOWER_LAYER_IPV4 | \ + NFP_FLOWER_LAYER_IPV6) struct nfp_flower_merge_check { union { @@ -146,10 +151,11 @@ static bool nfp_flower_check_higher_than_l3(struct flow_cls_offload *f) static int nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts, - u32 *key_layer_two, int *key_size, + u32 *key_layer_two, int *key_size, bool ipv6, struct netlink_ext_ack *extack) { - if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) { + if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY || + (ipv6 && enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY_V6)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: geneve options exceed maximum length"); return -EOPNOTSUPP; } @@ -167,7 +173,7 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, struct flow_dissector_key_enc_opts *enc_op, u32 *key_layer_two, u8 *key_layer, int *key_size, struct nfp_flower_priv *priv, - enum nfp_flower_tun_type *tun_type, + enum nfp_flower_tun_type *tun_type, bool ipv6, struct netlink_ext_ack *extack) { int err; @@ -176,7 +182,15 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, case htons(IANA_VXLAN_UDP_PORT): *tun_type = NFP_FL_TUNNEL_VXLAN; *key_layer |= NFP_FLOWER_LAYER_VXLAN; - *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (ipv6) { + *key_layer |= NFP_FLOWER_LAYER_EXT_META; + *key_size += sizeof(struct nfp_flower_ext_meta); + *key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + *key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + } else { + *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } if (enc_op) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on vxlan tunnels"); @@ -192,7 +206,13 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, *key_layer |= NFP_FLOWER_LAYER_EXT_META; *key_size += sizeof(struct nfp_flower_ext_meta); *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE; - *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (ipv6) { + *key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + *key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + } else { + *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } if (!enc_op) break; @@ -200,8 +220,8 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve option offload"); return -EOPNOTSUPP; } - err = nfp_flower_calc_opt_layer(enc_op, key_layer_two, - key_size, extack); + err = nfp_flower_calc_opt_layer(enc_op, key_layer_two, key_size, + ipv6, extack); if (err) return err; break; @@ -237,6 +257,8 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, /* If any tun dissector is used then the required set must be used. */ if (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR && + (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R) + != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R && (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel match not supported"); @@ -268,8 +290,10 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { struct flow_match_enc_opts enc_op = { NULL, NULL }; struct flow_match_ipv4_addrs ipv4_addrs; + struct flow_match_ipv6_addrs ipv6_addrs; struct flow_match_control enc_ctl; struct flow_match_ports enc_ports; + bool ipv6_tun = false; flow_rule_match_enc_control(rule, &enc_ctl); @@ -277,38 +301,62 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, NL_SET_ERR_MSG_MOD(extack, "unsupported offload: wildcarded protocols on tunnels are not supported"); return -EOPNOTSUPP; } - if (enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only IPv4 tunnels are supported"); + + ipv6_tun = enc_ctl.key->addr_type == + FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (ipv6_tun && + !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: firmware does not support IPv6 tunnels"); return -EOPNOTSUPP; } - /* These fields are already verified as used. */ - flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs); - if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported"); + if (!ipv6_tun && + enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel address type not IPv4 or IPv6"); return -EOPNOTSUPP; } + if (ipv6_tun) { + flow_rule_match_enc_ipv6_addrs(rule, &ipv6_addrs); + if (memchr_inv(&ipv6_addrs.mask->dst, 0xff, + sizeof(ipv6_addrs.mask->dst))) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv6 destination address is supported"); + return -EOPNOTSUPP; + } + } else { + flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs); + if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported"); + return -EOPNOTSUPP; + } + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) flow_rule_match_enc_opts(rule, &enc_op); - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { /* check if GRE, which has no enc_ports */ - if (netif_is_gretap(netdev)) { - *tun_type = NFP_FL_TUNNEL_GRE; - key_layer |= NFP_FLOWER_LAYER_EXT_META; - key_size += sizeof(struct nfp_flower_ext_meta); - key_layer_two |= NFP_FLOWER_LAYER2_GRE; - key_size += - sizeof(struct nfp_flower_ipv4_gre_tun); + if (!netif_is_gretap(netdev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels"); + return -EOPNOTSUPP; + } - if (enc_op.key) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels"); - return -EOPNOTSUPP; - } + *tun_type = NFP_FL_TUNNEL_GRE; + key_layer |= NFP_FLOWER_LAYER_EXT_META; + key_size += sizeof(struct nfp_flower_ext_meta); + key_layer_two |= NFP_FLOWER_LAYER2_GRE; + + if (ipv6_tun) { + key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + key_size += + sizeof(struct nfp_flower_ipv6_udp_tun); } else { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels"); + key_size += + sizeof(struct nfp_flower_ipv4_udp_tun); + } + + if (enc_op.key) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels"); return -EOPNOTSUPP; } } else { @@ -323,7 +371,8 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, &key_layer_two, &key_layer, &key_size, priv, - tun_type, extack); + tun_type, ipv6_tun, + extack); if (err) return err; @@ -491,6 +540,7 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) goto err_free_mask; flow_pay->nfp_tun_ipv4_addr = 0; + flow_pay->nfp_tun_ipv6 = NULL; flow_pay->meta.flags = 0; INIT_LIST_HEAD(&flow_pay->linked_flows); flow_pay->in_hw = false; @@ -517,10 +567,12 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow, struct nfp_fl_set_ip4_addrs *ipv4_add; struct nfp_fl_set_ipv6_addr *ipv6_add; struct nfp_fl_push_vlan *push_vlan; + struct nfp_fl_pre_tunnel *pre_tun; struct nfp_fl_set_tport *tport; struct nfp_fl_set_eth *eth; struct nfp_fl_act_head *a; unsigned int act_off = 0; + bool ipv6_tun = false; u8 act_id = 0; u8 *ports; int i; @@ -542,14 +594,18 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow, case NFP_FL_ACTION_OPCODE_POP_VLAN: merge->tci = cpu_to_be16(0); break; - case NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL: + case NFP_FL_ACTION_OPCODE_SET_TUNNEL: /* New tunnel header means l2 to l4 can be matched. */ eth_broadcast_addr(&merge->l2.mac_dst[0]); eth_broadcast_addr(&merge->l2.mac_src[0]); memset(&merge->l4, 0xff, sizeof(struct nfp_flower_tp_ports)); - memset(&merge->ipv4, 0xff, - sizeof(struct nfp_flower_ipv4)); + if (ipv6_tun) + memset(&merge->ipv6, 0xff, + sizeof(struct nfp_flower_ipv6)); + else + memset(&merge->ipv4, 0xff, + sizeof(struct nfp_flower_ipv4)); break; case NFP_FL_ACTION_OPCODE_SET_ETHERNET: eth = (struct nfp_fl_set_eth *)a; @@ -597,6 +653,10 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow, ports[i] |= tport->tp_port_mask[i]; break; case NFP_FL_ACTION_OPCODE_PRE_TUNNEL: + pre_tun = (struct nfp_fl_pre_tunnel *)a; + ipv6_tun = be16_to_cpu(pre_tun->flags) & + NFP_FL_PRE_TUN_IPV6; + break; case NFP_FL_ACTION_OPCODE_PRE_LAG: case NFP_FL_ACTION_OPCODE_PUSH_GENEVE: break; @@ -765,15 +825,15 @@ nfp_fl_verify_post_tun_acts(char *acts, int len, struct nfp_fl_push_vlan **vlan) static int nfp_fl_push_vlan_after_tun(char *acts, int len, struct nfp_fl_push_vlan *vlan) { - struct nfp_fl_set_ipv4_tun *tun; + struct nfp_fl_set_tun *tun; struct nfp_fl_act_head *a; unsigned int act_off = 0; while (act_off < len) { a = (struct nfp_fl_act_head *)&acts[act_off]; - if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL) { - tun = (struct nfp_fl_set_ipv4_tun *)a; + if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_TUNNEL) { + tun = (struct nfp_fl_set_tun *)a; tun->outer_vlan_tpid = vlan->vlan_tpid; tun->outer_vlan_tci = vlan->vlan_tci; @@ -1058,15 +1118,22 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, return -EOPNOTSUPP; } - if (key_layer & NFP_FLOWER_LAYER_IPV4) { + if (key_layer & NFP_FLOWER_LAYER_IPV4 || + key_layer & NFP_FLOWER_LAYER_IPV6) { + /* Flags and proto fields have same offset in IPv4 and IPv6. */ int ip_flags = offsetof(struct nfp_flower_ipv4, ip_ext.flags); int ip_proto = offsetof(struct nfp_flower_ipv4, ip_ext.proto); + int size; int i; + size = key_layer & NFP_FLOWER_LAYER_IPV4 ? + sizeof(struct nfp_flower_ipv4) : + sizeof(struct nfp_flower_ipv6); + mask += sizeof(struct nfp_flower_mac_mpls); /* Ensure proto and flags are the only IP layer fields. */ - for (i = 0; i < sizeof(struct nfp_flower_ipv4); i++) + for (i = 0; i < size; i++) if (mask[i] && i != ip_flags && i != ip_proto) { NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: only flags and proto can be matched in ip header"); return -EOPNOTSUPP; @@ -1195,6 +1262,8 @@ err_remove_rhash: err_release_metadata: nfp_modify_flow_metadata(app, flow_pay); err_destroy_flow: + if (flow_pay->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6); kfree(flow_pay->action_data); kfree(flow_pay->mask_data); kfree(flow_pay->unmasked_data); @@ -1311,6 +1380,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, if (nfp_flow->nfp_tun_ipv4_addr) nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr); + if (nfp_flow->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, nfp_flow->nfp_tun_ipv6); + if (!nfp_flow->in_hw) { err = 0; goto err_free_merge_flow; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 2600ce476d6b..2df3deedf9fd 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -55,6 +55,25 @@ struct nfp_tun_active_tuns { }; /** + * struct nfp_tun_active_tuns_v6 - periodic message of active IPv6 tunnels + * @seq: sequence number of the message + * @count: number of tunnels report in message + * @flags: options part of the request + * @tun_info.ipv6: dest IPv6 address of active route + * @tun_info.egress_port: port the encapsulated packet egressed + * @tun_info: tunnels that have sent traffic in reported period + */ +struct nfp_tun_active_tuns_v6 { + __be32 seq; + __be32 count; + __be32 flags; + struct route_ip_info_v6 { + struct in6_addr ipv6; + __be32 egress_port; + } tun_info[]; +}; + +/** * struct nfp_tun_neigh - neighbour/route entry on the NFP * @dst_ipv4: destination IPv4 address * @src_ipv4: source IPv4 address @@ -71,6 +90,22 @@ struct nfp_tun_neigh { }; /** + * struct nfp_tun_neigh_v6 - neighbour/route entry on the NFP + * @dst_ipv6: destination IPv6 address + * @src_ipv6: source IPv6 address + * @dst_addr: destination MAC address + * @src_addr: source MAC address + * @port_id: NFP port to output packet on - associated with source IPv6 + */ +struct nfp_tun_neigh_v6 { + struct in6_addr dst_ipv6; + struct in6_addr src_ipv6; + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; + __be32 port_id; +}; + +/** * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup * @ingress_port: ingress port of packet that signalled request * @ipv4_addr: destination ipv4 address for route @@ -83,13 +118,23 @@ struct nfp_tun_req_route_ipv4 { }; /** - * struct nfp_ipv4_route_entry - routes that are offloaded to the NFP - * @ipv4_addr: destination of route + * struct nfp_tun_req_route_ipv6 - NFP requests an IPv6 route/neighbour lookup + * @ingress_port: ingress port of packet that signalled request + * @ipv6_addr: destination ipv6 address for route + */ +struct nfp_tun_req_route_ipv6 { + __be32 ingress_port; + struct in6_addr ipv6_addr; +}; + +/** + * struct nfp_offloaded_route - routes that are offloaded to the NFP * @list: list pointer + * @ip_add: destination of route - can be IPv4 or IPv6 */ -struct nfp_ipv4_route_entry { - __be32 ipv4_addr; +struct nfp_offloaded_route { struct list_head list; + u8 ip_add[]; }; #define NFP_FL_IPV4_ADDRS_MAX 32 @@ -116,6 +161,18 @@ struct nfp_ipv4_addr_entry { struct list_head list; }; +#define NFP_FL_IPV6_ADDRS_MAX 4 + +/** + * struct nfp_tun_ipv6_addr - set the IP address list on the NFP + * @count: number of IPs populated in the array + * @ipv6_addr: array of IPV6_ADDRS_MAX 128 bit IPv6 addresses + */ +struct nfp_tun_ipv6_addr { + __be32 count; + struct in6_addr ipv6_addr[NFP_FL_IPV6_ADDRS_MAX]; +}; + #define NFP_TUN_MAC_OFFLOAD_DEL_FLAG 0x2 /** @@ -206,6 +263,49 @@ void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) rcu_read_unlock(); } +void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct nfp_tun_active_tuns_v6 *payload; + struct net_device *netdev; + int count, i, pay_len; + struct neighbour *n; + void *ipv6_add; + u32 port; + + payload = nfp_flower_cmsg_get_data(skb); + count = be32_to_cpu(payload->count); + if (count > NFP_FL_IPV6_ADDRS_MAX) { + nfp_flower_cmsg_warn(app, "IPv6 tunnel keep-alive request exceeds max routes.\n"); + return; + } + + pay_len = nfp_flower_cmsg_get_data_len(skb); + if (pay_len != struct_size(payload, tun_info, count)) { + nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n"); + return; + } + + rcu_read_lock(); + for (i = 0; i < count; i++) { + ipv6_add = &payload->tun_info[i].ipv6; + port = be32_to_cpu(payload->tun_info[i].egress_port); + netdev = nfp_app_dev_get(app, port, NULL); + if (!netdev) + continue; + + n = neigh_lookup(&nd_tbl, ipv6_add, netdev); + if (!n) + continue; + + /* Update the used timestamp of neighbour */ + neigh_event_send(n, NULL); + neigh_release(n); + } + rcu_read_unlock(); +#endif +} + static int nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, gfp_t flag) @@ -224,71 +324,126 @@ nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, return 0; } -static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr) +static bool +__nfp_tun_has_route(struct list_head *route_list, spinlock_t *list_lock, + void *add, int add_len) { - struct nfp_flower_priv *priv = app->priv; - struct nfp_ipv4_route_entry *entry; - struct list_head *ptr, *storage; + struct nfp_offloaded_route *entry; - spin_lock_bh(&priv->tun.neigh_off_lock); - list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { - entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); - if (entry->ipv4_addr == ipv4_addr) { - spin_unlock_bh(&priv->tun.neigh_off_lock); + spin_lock_bh(list_lock); + list_for_each_entry(entry, route_list, list) + if (!memcmp(entry->ip_add, add, add_len)) { + spin_unlock_bh(list_lock); return true; } - } - spin_unlock_bh(&priv->tun.neigh_off_lock); + spin_unlock_bh(list_lock); return false; } -static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr) +static int +__nfp_tun_add_route_to_cache(struct list_head *route_list, + spinlock_t *list_lock, void *add, int add_len) { - struct nfp_flower_priv *priv = app->priv; - struct nfp_ipv4_route_entry *entry; - struct list_head *ptr, *storage; + struct nfp_offloaded_route *entry; - spin_lock_bh(&priv->tun.neigh_off_lock); - list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { - entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); - if (entry->ipv4_addr == ipv4_addr) { - spin_unlock_bh(&priv->tun.neigh_off_lock); - return; + spin_lock_bh(list_lock); + list_for_each_entry(entry, route_list, list) + if (!memcmp(entry->ip_add, add, add_len)) { + spin_unlock_bh(list_lock); + return 0; } - } - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + + entry = kmalloc(sizeof(*entry) + add_len, GFP_ATOMIC); if (!entry) { - spin_unlock_bh(&priv->tun.neigh_off_lock); - nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n"); - return; + spin_unlock_bh(list_lock); + return -ENOMEM; } - entry->ipv4_addr = ipv4_addr; - list_add_tail(&entry->list, &priv->tun.neigh_off_list); - spin_unlock_bh(&priv->tun.neigh_off_lock); + memcpy(entry->ip_add, add, add_len); + list_add_tail(&entry->list, route_list); + spin_unlock_bh(list_lock); + + return 0; } -static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr) +static void +__nfp_tun_del_route_from_cache(struct list_head *route_list, + spinlock_t *list_lock, void *add, int add_len) { - struct nfp_flower_priv *priv = app->priv; - struct nfp_ipv4_route_entry *entry; - struct list_head *ptr, *storage; + struct nfp_offloaded_route *entry; - spin_lock_bh(&priv->tun.neigh_off_lock); - list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { - entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); - if (entry->ipv4_addr == ipv4_addr) { + spin_lock_bh(list_lock); + list_for_each_entry(entry, route_list, list) + if (!memcmp(entry->ip_add, add, add_len)) { list_del(&entry->list); kfree(entry); break; } - } - spin_unlock_bh(&priv->tun.neigh_off_lock); + spin_unlock_bh(list_lock); +} + +static bool nfp_tun_has_route_v4(struct nfp_app *app, __be32 *ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + return __nfp_tun_has_route(&priv->tun.neigh_off_list_v4, + &priv->tun.neigh_off_lock_v4, ipv4_addr, + sizeof(*ipv4_addr)); +} + +static bool +nfp_tun_has_route_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + return __nfp_tun_has_route(&priv->tun.neigh_off_list_v6, + &priv->tun.neigh_off_lock_v6, ipv6_addr, + sizeof(*ipv6_addr)); +} + +static void +nfp_tun_add_route_to_cache_v4(struct nfp_app *app, __be32 *ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + __nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v4, + &priv->tun.neigh_off_lock_v4, ipv4_addr, + sizeof(*ipv4_addr)); +} + +static void +nfp_tun_add_route_to_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + __nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v6, + &priv->tun.neigh_off_lock_v6, ipv6_addr, + sizeof(*ipv6_addr)); } static void -nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, - struct flowi4 *flow, struct neighbour *neigh, gfp_t flag) +nfp_tun_del_route_from_cache_v4(struct nfp_app *app, __be32 *ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + __nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v4, + &priv->tun.neigh_off_lock_v4, ipv4_addr, + sizeof(*ipv4_addr)); +} + +static void +nfp_tun_del_route_from_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + __nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v6, + &priv->tun.neigh_off_lock_v6, ipv6_addr, + sizeof(*ipv6_addr)); +} + +static void +nfp_tun_write_neigh_v4(struct net_device *netdev, struct nfp_app *app, + struct flowi4 *flow, struct neighbour *neigh, gfp_t flag) { struct nfp_tun_neigh payload; u32 port_id; @@ -302,7 +457,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, /* If entry has expired send dst IP with all other fields 0. */ if (!(neigh->nud_state & NUD_VALID) || neigh->dead) { - nfp_tun_del_route_from_cache(app, payload.dst_ipv4); + nfp_tun_del_route_from_cache_v4(app, &payload.dst_ipv4); /* Trigger ARP to verify invalid neighbour state. */ neigh_event_send(neigh, NULL); goto send_msg; @@ -314,7 +469,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, neigh_ha_snapshot(payload.dst_addr, neigh, netdev); payload.port_id = cpu_to_be32(port_id); /* Add destination of new route to NFP cache. */ - nfp_tun_add_route_to_cache(app, payload.dst_ipv4); + nfp_tun_add_route_to_cache_v4(app, &payload.dst_ipv4); send_msg: nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, @@ -322,16 +477,54 @@ send_msg: (unsigned char *)&payload, flag); } +static void +nfp_tun_write_neigh_v6(struct net_device *netdev, struct nfp_app *app, + struct flowi6 *flow, struct neighbour *neigh, gfp_t flag) +{ + struct nfp_tun_neigh_v6 payload; + u32 port_id; + + port_id = nfp_flower_get_port_id_from_netdev(app, netdev); + if (!port_id) + return; + + memset(&payload, 0, sizeof(struct nfp_tun_neigh_v6)); + payload.dst_ipv6 = flow->daddr; + + /* If entry has expired send dst IP with all other fields 0. */ + if (!(neigh->nud_state & NUD_VALID) || neigh->dead) { + nfp_tun_del_route_from_cache_v6(app, &payload.dst_ipv6); + /* Trigger probe to verify invalid neighbour state. */ + neigh_event_send(neigh, NULL); + goto send_msg; + } + + /* Have a valid neighbour so populate rest of entry. */ + payload.src_ipv6 = flow->saddr; + ether_addr_copy(payload.src_addr, netdev->dev_addr); + neigh_ha_snapshot(payload.dst_addr, neigh, netdev); + payload.port_id = cpu_to_be32(port_id); + /* Add destination of new route to NFP cache. */ + nfp_tun_add_route_to_cache_v6(app, &payload.dst_ipv6); + +send_msg: + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6, + sizeof(struct nfp_tun_neigh_v6), + (unsigned char *)&payload, flag); +} + static int nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, void *ptr) { struct nfp_flower_priv *app_priv; struct netevent_redirect *redir; - struct flowi4 flow = {}; + struct flowi4 flow4 = {}; + struct flowi6 flow6 = {}; struct neighbour *n; struct nfp_app *app; struct rtable *rt; + bool ipv6 = false; int err; switch (event) { @@ -346,7 +539,13 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; } - flow.daddr = *(__be32 *)n->primary_key; + if (n->tbl->family == AF_INET6) + ipv6 = true; + + if (ipv6) + flow6.daddr = *(struct in6_addr *)n->primary_key; + else + flow4.daddr = *(__be32 *)n->primary_key; app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); app = app_priv->app; @@ -356,28 +555,46 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; /* Only concerned with changes to routes already added to NFP. */ - if (!nfp_tun_has_route(app, flow.daddr)) + if ((ipv6 && !nfp_tun_has_route_v6(app, &flow6.daddr)) || + (!ipv6 && !nfp_tun_has_route_v4(app, &flow4.daddr))) return NOTIFY_DONE; #if IS_ENABLED(CONFIG_INET) - /* Do a route lookup to populate flow data. */ - rt = ip_route_output_key(dev_net(n->dev), &flow); - err = PTR_ERR_OR_ZERO(rt); - if (err) + if (ipv6) { +#if IS_ENABLED(CONFIG_IPV6) + struct dst_entry *dst; + + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(n->dev), NULL, + &flow6, NULL); + if (IS_ERR(dst)) + return NOTIFY_DONE; + + dst_release(dst); + flow6.flowi6_proto = IPPROTO_UDP; + nfp_tun_write_neigh_v6(n->dev, app, &flow6, n, GFP_ATOMIC); +#else return NOTIFY_DONE; +#endif /* CONFIG_IPV6 */ + } else { + /* Do a route lookup to populate flow data. */ + rt = ip_route_output_key(dev_net(n->dev), &flow4); + err = PTR_ERR_OR_ZERO(rt); + if (err) + return NOTIFY_DONE; - ip_rt_put(rt); + ip_rt_put(rt); + + flow4.flowi4_proto = IPPROTO_UDP; + nfp_tun_write_neigh_v4(n->dev, app, &flow4, n, GFP_ATOMIC); + } #else return NOTIFY_DONE; -#endif - - flow.flowi4_proto = IPPROTO_UDP; - nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC); +#endif /* CONFIG_INET */ return NOTIFY_OK; } -void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) +void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) { struct nfp_tun_req_route_ipv4 *payload; struct net_device *netdev; @@ -411,7 +628,7 @@ void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) ip_rt_put(rt); if (!n) goto fail_rcu_unlock; - nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC); + nfp_tun_write_neigh_v4(n->dev, app, &flow, n, GFP_ATOMIC); neigh_release(n); rcu_read_unlock(); return; @@ -421,6 +638,48 @@ fail_rcu_unlock: nfp_flower_cmsg_warn(app, "Requested route not found.\n"); } +void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_req_route_ipv6 *payload; + struct net_device *netdev; + struct flowi6 flow = {}; + struct dst_entry *dst; + struct neighbour *n; + + payload = nfp_flower_cmsg_get_data(skb); + + rcu_read_lock(); + netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); + if (!netdev) + goto fail_rcu_unlock; + + flow.daddr = payload->ipv6_addr; + flow.flowi6_proto = IPPROTO_UDP; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(netdev), NULL, &flow, + NULL); + if (IS_ERR(dst)) + goto fail_rcu_unlock; +#else + goto fail_rcu_unlock; +#endif + + n = dst_neigh_lookup(dst, &flow.daddr); + dst_release(dst); + if (!n) + goto fail_rcu_unlock; + + nfp_tun_write_neigh_v6(n->dev, app, &flow, n, GFP_ATOMIC); + neigh_release(n); + rcu_read_unlock(); + return; + +fail_rcu_unlock: + rcu_read_unlock(); + nfp_flower_cmsg_warn(app, "Requested IPv6 route not found.\n"); +} + static void nfp_tun_write_ipv4_list(struct nfp_app *app) { struct nfp_flower_priv *priv = app->priv; @@ -502,6 +761,78 @@ void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4) nfp_tun_write_ipv4_list(app); } +static void nfp_tun_write_ipv6_list(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv6_addr_entry *entry; + struct nfp_tun_ipv6_addr payload; + int count = 0; + + memset(&payload, 0, sizeof(struct nfp_tun_ipv6_addr)); + mutex_lock(&priv->tun.ipv6_off_lock); + list_for_each_entry(entry, &priv->tun.ipv6_off_list, list) { + if (count >= NFP_FL_IPV6_ADDRS_MAX) { + nfp_flower_cmsg_warn(app, "Too many IPv6 tunnel endpoint addresses, some cannot be offloaded.\n"); + break; + } + payload.ipv6_addr[count++] = entry->ipv6_addr; + } + mutex_unlock(&priv->tun.ipv6_off_lock); + payload.count = cpu_to_be32(count); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6, + sizeof(struct nfp_tun_ipv6_addr), + &payload, GFP_KERNEL); +} + +struct nfp_ipv6_addr_entry * +nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv6_addr_entry *entry; + + mutex_lock(&priv->tun.ipv6_off_lock); + list_for_each_entry(entry, &priv->tun.ipv6_off_list, list) + if (!memcmp(&entry->ipv6_addr, ipv6, sizeof(*ipv6))) { + entry->ref_count++; + mutex_unlock(&priv->tun.ipv6_off_lock); + return entry; + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->tun.ipv6_off_lock); + nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n"); + return NULL; + } + entry->ipv6_addr = *ipv6; + entry->ref_count = 1; + list_add_tail(&entry->list, &priv->tun.ipv6_off_list); + mutex_unlock(&priv->tun.ipv6_off_lock); + + nfp_tun_write_ipv6_list(app); + + return entry; +} + +void +nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry) +{ + struct nfp_flower_priv *priv = app->priv; + bool freed = false; + + mutex_lock(&priv->tun.ipv6_off_lock); + if (!--entry->ref_count) { + list_del(&entry->list); + kfree(entry); + freed = true; + } + mutex_unlock(&priv->tun.ipv6_off_lock); + + if (freed) + nfp_tun_write_ipv6_list(app); +} + static int __nfp_tunnel_offload_mac(struct nfp_app *app, u8 *mac, u16 idx, bool del) { @@ -1013,13 +1344,17 @@ int nfp_tunnel_config_start(struct nfp_app *app) ida_init(&priv->tun.mac_off_ids); - /* Initialise priv data for IPv4 offloading. */ + /* Initialise priv data for IPv4/v6 offloading. */ mutex_init(&priv->tun.ipv4_off_lock); INIT_LIST_HEAD(&priv->tun.ipv4_off_list); + mutex_init(&priv->tun.ipv6_off_lock); + INIT_LIST_HEAD(&priv->tun.ipv6_off_list); /* Initialise priv data for neighbour offloading. */ - spin_lock_init(&priv->tun.neigh_off_lock); - INIT_LIST_HEAD(&priv->tun.neigh_off_list); + spin_lock_init(&priv->tun.neigh_off_lock_v4); + INIT_LIST_HEAD(&priv->tun.neigh_off_list_v4); + spin_lock_init(&priv->tun.neigh_off_lock_v6); + INIT_LIST_HEAD(&priv->tun.neigh_off_list_v6); priv->tun.neigh_nb.notifier_call = nfp_tun_neigh_event_handler; err = register_netevent_notifier(&priv->tun.neigh_nb); @@ -1034,9 +1369,11 @@ int nfp_tunnel_config_start(struct nfp_app *app) void nfp_tunnel_config_stop(struct nfp_app *app) { + struct nfp_offloaded_route *route_entry, *temp; struct nfp_flower_priv *priv = app->priv; - struct nfp_ipv4_route_entry *route_entry; struct nfp_ipv4_addr_entry *ip_entry; + struct nfp_tun_neigh_v6 ipv6_route; + struct nfp_tun_neigh ipv4_route; struct list_head *ptr, *storage; unregister_netevent_notifier(&priv->tun.neigh_nb); @@ -1050,12 +1387,35 @@ void nfp_tunnel_config_stop(struct nfp_app *app) kfree(ip_entry); } - /* Free any memory that may be occupied by the route list. */ - list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { - route_entry = list_entry(ptr, struct nfp_ipv4_route_entry, - list); + mutex_destroy(&priv->tun.ipv6_off_lock); + + /* Free memory in the route list and remove entries from fw cache. */ + list_for_each_entry_safe(route_entry, temp, + &priv->tun.neigh_off_list_v4, list) { + memset(&ipv4_route, 0, sizeof(ipv4_route)); + memcpy(&ipv4_route.dst_ipv4, &route_entry->ip_add, + sizeof(ipv4_route.dst_ipv4)); list_del(&route_entry->list); kfree(route_entry); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, + sizeof(struct nfp_tun_neigh), + (unsigned char *)&ipv4_route, + GFP_KERNEL); + } + + list_for_each_entry_safe(route_entry, temp, + &priv->tun.neigh_off_list_v6, list) { + memset(&ipv6_route, 0, sizeof(ipv6_route)); + memcpy(&ipv6_route.dst_ipv6, &route_entry->ip_add, + sizeof(ipv6_route.dst_ipv6)); + list_del(&route_entry->list); + kfree(route_entry); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6, + sizeof(struct nfp_tun_neigh), + (unsigned char *)&ipv6_route, + GFP_KERNEL); } /* Destroy rhash. Entries should be cleaned on netdev notifier unreg. */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 250f510b1d21..ff4438478ea9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -586,6 +586,9 @@ struct nfp_net_dp { * @ktls_conn_id_gen: Trivial generator for kTLS connection ids (for TX) * @ktls_no_space: Counter of firmware rejecting kTLS connection due to * lack of space + * @ktls_rx_resync_req: Counter of TLS RX resync requested + * @ktls_rx_resync_ign: Counter of TLS RX resync requests ignored + * @ktls_rx_resync_sent: Counter of TLS RX resync completed * @mbox_cmsg: Common Control Message via vNIC mailbox state * @mbox_cmsg.queue: CCM mbox queue of pending messages * @mbox_cmsg.wq: CCM mbox wait queue of waiting processes @@ -674,6 +677,9 @@ struct nfp_net { atomic64_t ktls_conn_id_gen; atomic_t ktls_no_space; + atomic_t ktls_rx_resync_req; + atomic_t ktls_rx_resync_ign; + atomic_t ktls_rx_resync_sent; struct { struct sk_buff_head queue; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index bcdcd6de7dea..9bfb3b077bc1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -47,6 +47,7 @@ #include "nfp_net_sriov.h" #include "nfp_port.h" #include "crypto/crypto.h" +#include "crypto/fw.h" /** * nfp_net_get_fw_version() - Read and parse the FW version @@ -1321,17 +1322,11 @@ nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) netdev_tx_reset_queue(nd_q); } -static void nfp_net_tx_timeout(struct net_device *netdev) +static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct nfp_net *nn = netdev_priv(netdev); - int i; - for (i = 0; i < nn->dp.netdev->real_num_tx_queues; i++) { - if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i))) - continue; - nn_warn(nn, "TX timeout on ring: %d\n", i); - } - nn_warn(nn, "TX watchdog timeout\n"); + nn_warn(nn, "TX watchdog timeout on ring: %u\n", txqueue); } /* Receive processing @@ -1667,9 +1662,9 @@ nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta, &rx_hash->hash); } -static void * +static bool nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, - void *data, int meta_len) + void *data, void *pkt, unsigned int pkt_len, int meta_len) { u32 meta_info; @@ -1699,14 +1694,20 @@ nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, (__force __wsum)__get_unaligned_cpu32(data); data += 4; break; + case NFP_NET_META_RESYNC_INFO: + if (nfp_net_tls_rx_resync_req(netdev, data, pkt, + pkt_len)) + return NULL; + data += sizeof(struct nfp_net_tls_resync_req); + break; default: - return NULL; + return true; } meta_info >>= NFP_NET_META_FIELD_SIZE; } - return data; + return data != pkt; } static void @@ -1891,12 +1892,10 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) nfp_net_set_hash_desc(dp->netdev, &meta, rxbuf->frag + meta_off, rxd); } else if (meta_len) { - void *end; - - end = nfp_net_parse_meta(dp->netdev, &meta, - rxbuf->frag + meta_off, - meta_len); - if (unlikely(end != rxbuf->frag + pkt_off)) { + if (unlikely(nfp_net_parse_meta(dp->netdev, &meta, + rxbuf->frag + meta_off, + rxbuf->frag + pkt_off, + pkt_len, meta_len))) { nn_dp_warn(dp, "invalid RX packet metadata\n"); nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c index d835c14b7257..c3a763134e79 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c @@ -17,6 +17,30 @@ static void nfp_net_tlv_caps_reset(struct nfp_net_tlv_caps *caps) caps->mbox_len = NFP_NET_CFG_MBOX_VAL_MAX_SZ; } +static bool +nfp_net_tls_parse_crypto_ops(struct device *dev, struct nfp_net_tlv_caps *caps, + u8 __iomem *ctrl_mem, u8 __iomem *data, + unsigned int length, unsigned int offset, + bool rx_stream_scan) +{ + /* Ignore the legacy TLV if new one was already parsed */ + if (caps->tls_resync_ss && !rx_stream_scan) + return true; + + if (length < 32) { + dev_err(dev, + "CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n", + length, offset); + return false; + } + + caps->crypto_ops = readl(data); + caps->crypto_enable_off = data - ctrl_mem + 16; + caps->tls_resync_ss = rx_stream_scan; + + return true; +} + int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, struct nfp_net_tlv_caps *caps) { @@ -104,15 +128,25 @@ int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, caps->mbox_cmsg_types = readl(data); break; case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS: - if (length < 32) { - dev_err(dev, - "CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n", - length, offset); + if (!nfp_net_tls_parse_crypto_ops(dev, caps, ctrl_mem, + data, length, offset, + false)) return -EINVAL; + break; + case NFP_NET_CFG_TLV_TYPE_VNIC_STATS: + if ((data - ctrl_mem) % 8) { + dev_warn(dev, "VNIC STATS TLV misaligned, ignoring offset:%u len:%u\n", + offset, length); + break; } - - caps->crypto_ops = readl(data); - caps->crypto_enable_off = data - ctrl_mem + 16; + caps->vnic_stats_off = data - ctrl_mem; + caps->vnic_stats_cnt = length / 10; + break; + case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN: + if (!nfp_net_tls_parse_crypto_ops(dev, caps, ctrl_mem, + data, length, offset, + true)) + return -EINVAL; break; default: if (!FIELD_GET(NFP_NET_CFG_TLV_HEADER_REQUIRED, hdr)) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index ee6b24e4eacd..3d61a8cb60b0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -45,6 +45,7 @@ #define NFP_NET_META_PORTID 5 #define NFP_NET_META_CSUM 6 /* checksum complete type */ #define NFP_NET_META_CONN_HANDLE 7 +#define NFP_NET_META_RESYNC_INFO 8 /* RX resync info request */ #define NFP_META_PORT_ID_CTRL ~0U @@ -479,6 +480,22 @@ * 8 words, bitmaps of supported and enabled crypto operations. * First 16B (4 words) contains a bitmap of supported crypto operations, * and next 16B contain the enabled operations. + * This capability is made obsolete by ones with better sync methods. + * + * %NFP_NET_CFG_TLV_TYPE_VNIC_STATS: + * Variable, per-vNIC statistics, data should be 8B aligned (FW should insert + * zero-length RESERVED TLV to pad). + * TLV data has two sections. First is an array of statistics' IDs (2B each). + * Second 8B statistics themselves. Statistics are 8B aligned, meaning there + * may be a padding between sections. + * Number of statistics can be determined as floor(tlv.length / (2 + 8)). + * This TLV overwrites %NFP_NET_CFG_STATS_* values (statistics in this TLV + * duplicate the old ones, so driver should be careful not to unnecessarily + * render both). + * + * %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN: + * Same as %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS, but crypto TLS does stream scan + * RX sync, rather than kernel-assisted sync. */ #define NFP_NET_CFG_TLV_TYPE_UNKNOWN 0 #define NFP_NET_CFG_TLV_TYPE_RESERVED 1 @@ -490,6 +507,8 @@ #define NFP_NET_CFG_TLV_TYPE_REPR_CAP 7 #define NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES 10 #define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS 11 /* see crypto/fw.h */ +#define NFP_NET_CFG_TLV_TYPE_VNIC_STATS 12 +#define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN 13 struct device; @@ -502,6 +521,9 @@ struct device; * @mbox_cmsg_types: cmsgs which can be passed through the mailbox * @crypto_ops: supported crypto operations * @crypto_enable_off: offset of crypto ops enable region + * @vnic_stats_off: offset of vNIC stats area + * @vnic_stats_cnt: number of vNIC stats + * @tls_resync_ss: TLS resync will be performed via stream scan */ struct nfp_net_tlv_caps { u32 me_freq_mhz; @@ -511,6 +533,9 @@ struct nfp_net_tlv_caps { u32 mbox_cmsg_types; u32 crypto_ops; unsigned int crypto_enable_off; + unsigned int vnic_stats_off; + unsigned int vnic_stats_cnt; + unsigned int tls_resync_ss:1; }; int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 1b840ee47339..d648e32c0520 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -148,11 +148,33 @@ static const struct nfp_et_stat nfp_mac_et_stats[] = { { "tx_pause_frames_class7", NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS7, }, }; +static const char nfp_tlv_stat_names[][ETH_GSTRING_LEN] = { + [1] = "dev_rx_discards", + [2] = "dev_rx_errors", + [3] = "dev_rx_bytes", + [4] = "dev_rx_uc_bytes", + [5] = "dev_rx_mc_bytes", + [6] = "dev_rx_bc_bytes", + [7] = "dev_rx_pkts", + [8] = "dev_rx_mc_pkts", + [9] = "dev_rx_bc_pkts", + + [10] = "dev_tx_discards", + [11] = "dev_tx_errors", + [12] = "dev_tx_bytes", + [13] = "dev_tx_uc_bytes", + [14] = "dev_tx_mc_bytes", + [15] = "dev_tx_bc_bytes", + [16] = "dev_tx_pkts", + [17] = "dev_tx_mc_pkts", + [18] = "dev_tx_bc_pkts", +}; + #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) #define NN_ET_SWITCH_STATS_LEN 9 #define NN_RVEC_GATHER_STATS 13 #define NN_RVEC_PER_Q_STATS 3 -#define NN_CTRL_PATH_STATS 1 +#define NN_CTRL_PATH_STATS 4 #define SFP_SFF_REV_COMPLIANCE 1 @@ -454,6 +476,9 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) data = nfp_pr_et(data, "tx_tls_drop_no_sync_data"); data = nfp_pr_et(data, "hw_tls_no_space"); + data = nfp_pr_et(data, "rx_tls_resync_req_ok"); + data = nfp_pr_et(data, "rx_tls_resync_req_ign"); + data = nfp_pr_et(data, "rx_tls_resync_sent"); return data; } @@ -502,6 +527,9 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) *data++ = gathered_stats[j]; *data++ = atomic_read(&nn->ktls_no_space); + *data++ = atomic_read(&nn->ktls_rx_resync_req); + *data++ = atomic_read(&nn->ktls_rx_resync_ign); + *data++ = atomic_read(&nn->ktls_rx_resync_sent); return data; } @@ -560,6 +588,65 @@ nfp_vnic_get_hw_stats(u64 *data, u8 __iomem *mem, unsigned int num_vecs) return data; } +static unsigned int nfp_vnic_get_tlv_stats_count(struct nfp_net *nn) +{ + return nn->tlv_caps.vnic_stats_cnt + nn->max_r_vecs * 4; +} + +static u8 *nfp_vnic_get_tlv_stats_strings(struct nfp_net *nn, u8 *data) +{ + unsigned int i, id; + u8 __iomem *mem; + u64 id_word = 0; + + mem = nn->dp.ctrl_bar + nn->tlv_caps.vnic_stats_off; + for (i = 0; i < nn->tlv_caps.vnic_stats_cnt; i++) { + if (!(i % 4)) + id_word = readq(mem + i * 2); + + id = (u16)id_word; + id_word >>= 16; + + if (id < ARRAY_SIZE(nfp_tlv_stat_names) && + nfp_tlv_stat_names[id][0]) { + memcpy(data, nfp_tlv_stat_names[id], ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } else { + data = nfp_pr_et(data, "dev_unknown_stat%u", id); + } + } + + for (i = 0; i < nn->max_r_vecs; i++) { + data = nfp_pr_et(data, "rxq_%u_pkts", i); + data = nfp_pr_et(data, "rxq_%u_bytes", i); + data = nfp_pr_et(data, "txq_%u_pkts", i); + data = nfp_pr_et(data, "txq_%u_bytes", i); + } + + return data; +} + +static u64 *nfp_vnic_get_tlv_stats(struct nfp_net *nn, u64 *data) +{ + u8 __iomem *mem; + unsigned int i; + + mem = nn->dp.ctrl_bar + nn->tlv_caps.vnic_stats_off; + mem += roundup(2 * nn->tlv_caps.vnic_stats_cnt, 8); + for (i = 0; i < nn->tlv_caps.vnic_stats_cnt; i++) + *data++ = readq(mem + i * 8); + + mem = nn->dp.ctrl_bar; + for (i = 0; i < nn->max_r_vecs; i++) { + *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i)); + *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i) + 8); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i)); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8); + } + + return data; +} + static unsigned int nfp_mac_get_stats_count(struct net_device *netdev) { struct nfp_port *port; @@ -609,8 +696,12 @@ static void nfp_net_get_strings(struct net_device *netdev, switch (stringset) { case ETH_SS_STATS: data = nfp_vnic_get_sw_stats_strings(netdev, data); - data = nfp_vnic_get_hw_stats_strings(data, nn->max_r_vecs, - false); + if (!nn->tlv_caps.vnic_stats_off) + data = nfp_vnic_get_hw_stats_strings(data, + nn->max_r_vecs, + false); + else + data = nfp_vnic_get_tlv_stats_strings(nn, data); data = nfp_mac_get_stats_strings(netdev, data); data = nfp_app_port_get_stats_strings(nn->port, data); break; @@ -624,7 +715,11 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats, struct nfp_net *nn = netdev_priv(netdev); data = nfp_vnic_get_sw_stats(netdev, data); - data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, nn->max_r_vecs); + if (!nn->tlv_caps.vnic_stats_off) + data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, + nn->max_r_vecs); + else + data = nfp_vnic_get_tlv_stats(nn, data); data = nfp_mac_get_stats(netdev, data); data = nfp_app_port_get_stats(nn->port, data); } @@ -632,13 +727,18 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats, static int nfp_net_get_sset_count(struct net_device *netdev, int sset) { struct nfp_net *nn = netdev_priv(netdev); + unsigned int cnt; switch (sset) { case ETH_SS_STATS: - return nfp_vnic_get_sw_stats_count(netdev) + - nfp_vnic_get_hw_stats_count(nn->max_r_vecs) + - nfp_mac_get_stats_count(netdev) + - nfp_app_port_get_stats_count(nn->port); + cnt = nfp_vnic_get_sw_stats_count(netdev); + if (!nn->tlv_caps.vnic_stats_off) + cnt += nfp_vnic_get_hw_stats_count(nn->max_r_vecs); + else + cnt += nfp_vnic_get_tlv_stats_count(nn); + cnt += nfp_mac_get_stats_count(netdev); + cnt += nfp_app_port_get_stats_count(nn->port); + return cnt; default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 6b54cb3b681d..2fc10a36afa4 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -2739,7 +2739,7 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit) * nv_tx_timeout: dev->tx_timeout function * Called with netif_tx_lock held. */ -static void nv_tx_timeout(struct net_device *dev) +static void nv_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 18e6d87c607b..73ec195fbc30 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2271,7 +2271,7 @@ static int pch_gbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) * pch_gbe_tx_timeout - Respond to a Tx Hang * @netdev: Network interface device structure */ -static void pch_gbe_tx_timeout(struct net_device *netdev) +static void pch_gbe_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct pch_gbe_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index eee883a2aa8d..70816d2e2990 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -548,7 +548,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int hamachi_open(struct net_device *dev); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static void hamachi_timer(struct timer_list *t); -static void hamachi_tx_timeout(struct net_device *dev); +static void hamachi_tx_timeout(struct net_device *dev, unsigned int txqueue); static void hamachi_init_ring(struct net_device *dev); static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb, struct net_device *dev); @@ -1042,7 +1042,7 @@ static void hamachi_timer(struct timer_list *t) add_timer(&hmp->timer); } -static void hamachi_tx_timeout(struct net_device *dev) +static void hamachi_tx_timeout(struct net_device *dev, unsigned int txqueue) { int i; struct hamachi_private *hmp = netdev_priv(dev); diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index 5113ee647090..520779f05e1a 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -344,7 +344,7 @@ static void mdio_write(void __iomem *ioaddr, int phy_id, int location, int value static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int yellowfin_open(struct net_device *dev); static void yellowfin_timer(struct timer_list *t); -static void yellowfin_tx_timeout(struct net_device *dev); +static void yellowfin_tx_timeout(struct net_device *dev, unsigned int txqueue); static int yellowfin_init_ring(struct net_device *dev); static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb, struct net_device *dev); @@ -677,7 +677,7 @@ static void yellowfin_timer(struct timer_list *t) add_timer(&yp->timer); } -static void yellowfin_tx_timeout(struct net_device *dev) +static void yellowfin_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct yellowfin_private *yp = netdev_priv(dev); void __iomem *ioaddr = yp->base; diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h index 98e102af7756..bb106a32f416 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic.h +++ b/drivers/net/ethernet/pensando/ionic/ionic.h @@ -12,19 +12,27 @@ struct ionic_lif; #define IONIC_DRV_NAME "ionic" #define IONIC_DRV_DESCRIPTION "Pensando Ethernet NIC Driver" -#define IONIC_DRV_VERSION "0.18.0-k" +#define IONIC_DRV_VERSION "0.20.0-k" #define PCI_VENDOR_ID_PENSANDO 0x1dd8 #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF 0x1002 #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF 0x1003 -#define IONIC_SUBDEV_ID_NAPLES_25 0x4000 -#define IONIC_SUBDEV_ID_NAPLES_100_4 0x4001 -#define IONIC_SUBDEV_ID_NAPLES_100_8 0x4002 - #define DEVCMD_TIMEOUT 10 +struct ionic_vf { + u16 index; + u8 macaddr[6]; + __le32 maxrate; + __le16 vlanid; + u8 spoofchk; + u8 trusted; + u8 linkstate; + dma_addr_t stats_pa; + struct ionic_lif_stats stats; +}; + struct ionic { struct pci_dev *pdev; struct device *dev; @@ -46,6 +54,9 @@ struct ionic { DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX); struct work_struct nb_work; struct notifier_block nb; + struct rw_semaphore vf_op_lock; /* lock for VF operations */ + struct ionic_vf *vfs; + int num_vfs; struct timer_list watchdog_timer; int watchdog_period; }; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 9a9ab8cb2cb3..448d7b23b2f7 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -104,10 +104,112 @@ void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page) iounmap(page); } +static void ionic_vf_dealloc_locked(struct ionic *ionic) +{ + struct ionic_vf *v; + dma_addr_t dma = 0; + int i; + + if (!ionic->vfs) + return; + + for (i = ionic->num_vfs - 1; i >= 0; i--) { + v = &ionic->vfs[i]; + + if (v->stats_pa) { + (void)ionic_set_vf_config(ionic, i, + IONIC_VF_ATTR_STATSADDR, + (u8 *)&dma); + dma_unmap_single(ionic->dev, v->stats_pa, + sizeof(v->stats), DMA_FROM_DEVICE); + v->stats_pa = 0; + } + } + + kfree(ionic->vfs); + ionic->vfs = NULL; + ionic->num_vfs = 0; +} + +static void ionic_vf_dealloc(struct ionic *ionic) +{ + down_write(&ionic->vf_op_lock); + ionic_vf_dealloc_locked(ionic); + up_write(&ionic->vf_op_lock); +} + +static int ionic_vf_alloc(struct ionic *ionic, int num_vfs) +{ + struct ionic_vf *v; + int err = 0; + int i; + + down_write(&ionic->vf_op_lock); + + ionic->vfs = kcalloc(num_vfs, sizeof(struct ionic_vf), GFP_KERNEL); + if (!ionic->vfs) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < num_vfs; i++) { + v = &ionic->vfs[i]; + v->stats_pa = dma_map_single(ionic->dev, &v->stats, + sizeof(v->stats), DMA_FROM_DEVICE); + if (dma_mapping_error(ionic->dev, v->stats_pa)) { + v->stats_pa = 0; + err = -ENODEV; + goto out; + } + + /* ignore failures from older FW, we just won't get stats */ + (void)ionic_set_vf_config(ionic, i, IONIC_VF_ATTR_STATSADDR, + (u8 *)&v->stats_pa); + ionic->num_vfs++; + } + +out: + if (err) + ionic_vf_dealloc_locked(ionic); + up_write(&ionic->vf_op_lock); + return err; +} + +static int ionic_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct ionic *ionic = pci_get_drvdata(pdev); + struct device *dev = ionic->dev; + int ret = 0; + + if (num_vfs > 0) { + ret = pci_enable_sriov(pdev, num_vfs); + if (ret) { + dev_err(dev, "Cannot enable SRIOV: %d\n", ret); + goto out; + } + + ret = ionic_vf_alloc(ionic, num_vfs); + if (ret) { + dev_err(dev, "Cannot alloc VFs: %d\n", ret); + pci_disable_sriov(pdev); + goto out; + } + + ret = num_vfs; + } else { + pci_disable_sriov(pdev); + ionic_vf_dealloc(ionic); + } + +out: + return ret; +} + static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct device *dev = &pdev->dev; struct ionic *ionic; + int num_vfs; int err; ionic = ionic_devlink_alloc(dev); @@ -206,6 +308,15 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_free_lifs; } + init_rwsem(&ionic->vf_op_lock); + num_vfs = pci_num_vf(pdev); + if (num_vfs) { + dev_info(dev, "%d VFs found already enabled\n", num_vfs); + err = ionic_vf_alloc(ionic, num_vfs); + if (err) + dev_err(dev, "Cannot enable existing VFs: %d\n", err); + } + err = ionic_lifs_register(ionic); if (err) { dev_err(dev, "Cannot register LIFs: %d, aborting\n", err); @@ -223,6 +334,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_out_deregister_lifs: ionic_lifs_unregister(ionic); err_out_deinit_lifs: + ionic_vf_dealloc(ionic); ionic_lifs_deinit(ionic); err_out_free_lifs: ionic_lifs_free(ionic); @@ -279,6 +391,7 @@ static struct pci_driver ionic_driver = { .id_table = ionic_id_table, .probe = ionic_probe, .remove = ionic_remove, + .sriov_configure = ionic_sriov_configure, }; int ionic_bus_register_driver(void) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 5f9d2ec70446..87f82f36812f 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -286,6 +286,64 @@ void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type) ionic_dev_cmd_go(idev, &cmd); } +/* VF commands */ +int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data) +{ + union ionic_dev_cmd cmd = { + .vf_setattr.opcode = IONIC_CMD_VF_SETATTR, + .vf_setattr.attr = attr, + .vf_setattr.vf_index = vf, + }; + int err; + + switch (attr) { + case IONIC_VF_ATTR_SPOOFCHK: + cmd.vf_setattr.spoofchk = *data; + dev_dbg(ionic->dev, "%s: vf %d spoof %d\n", + __func__, vf, *data); + break; + case IONIC_VF_ATTR_TRUST: + cmd.vf_setattr.trust = *data; + dev_dbg(ionic->dev, "%s: vf %d trust %d\n", + __func__, vf, *data); + break; + case IONIC_VF_ATTR_LINKSTATE: + cmd.vf_setattr.linkstate = *data; + dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n", + __func__, vf, *data); + break; + case IONIC_VF_ATTR_MAC: + ether_addr_copy(cmd.vf_setattr.macaddr, data); + dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n", + __func__, vf, data); + break; + case IONIC_VF_ATTR_VLAN: + cmd.vf_setattr.vlanid = cpu_to_le16(*(u16 *)data); + dev_dbg(ionic->dev, "%s: vf %d vlan %d\n", + __func__, vf, *(u16 *)data); + break; + case IONIC_VF_ATTR_RATE: + cmd.vf_setattr.maxrate = cpu_to_le32(*(u32 *)data); + dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n", + __func__, vf, *(u32 *)data); + break; + case IONIC_VF_ATTR_STATSADDR: + cmd.vf_setattr.stats_pa = cpu_to_le64(*(u64 *)data); + dev_dbg(ionic->dev, "%s: vf %d stats_pa 0x%08llx\n", + __func__, vf, *(u64 *)data); + break; + default: + return -EINVAL; + } + + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_go(&ionic->idev, &cmd); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + + return err; +} + /* LIF commands */ void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index 4665c5dc5324..7838e342c4fd 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -113,6 +113,12 @@ static_assert(sizeof(struct ionic_rxq_desc) == 16); static_assert(sizeof(struct ionic_rxq_sg_desc) == 128); static_assert(sizeof(struct ionic_rxq_comp) == 16); +/* SR/IOV */ +static_assert(sizeof(struct ionic_vf_setattr_cmd) == 64); +static_assert(sizeof(struct ionic_vf_setattr_comp) == 16); +static_assert(sizeof(struct ionic_vf_getattr_cmd) == 64); +static_assert(sizeof(struct ionic_vf_getattr_comp) == 16); + struct ionic_devinfo { u8 asic_type; u8 asic_rev; @@ -275,6 +281,7 @@ void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable); void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type); void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type); +int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data); void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver); void ionic_dev_cmd_lif_init(struct ionic_dev *idev, u16 lif_index, dma_addr_t addr); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 39317cdfa6cf..f131adad96e3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -51,6 +51,10 @@ enum ionic_cmd_opcode { IONIC_CMD_RDMA_CREATE_CQ = 52, IONIC_CMD_RDMA_CREATE_ADMINQ = 53, + /* SR/IOV commands */ + IONIC_CMD_VF_GETATTR = 60, + IONIC_CMD_VF_SETATTR = 61, + /* QoS commands */ IONIC_CMD_QOS_CLASS_IDENTIFY = 240, IONIC_CMD_QOS_CLASS_INIT = 241, @@ -1639,6 +1643,93 @@ enum ionic_qos_sched_type { IONIC_QOS_SCHED_TYPE_DWRR = 1, /* Deficit weighted round-robin */ }; +enum ionic_vf_attr { + IONIC_VF_ATTR_SPOOFCHK = 1, + IONIC_VF_ATTR_TRUST = 2, + IONIC_VF_ATTR_MAC = 3, + IONIC_VF_ATTR_LINKSTATE = 4, + IONIC_VF_ATTR_VLAN = 5, + IONIC_VF_ATTR_RATE = 6, + IONIC_VF_ATTR_STATSADDR = 7, +}; + +/** + * VF link status + */ +enum ionic_vf_link_status { + IONIC_VF_LINK_STATUS_AUTO = 0, /* link state of the uplink */ + IONIC_VF_LINK_STATUS_UP = 1, /* link is always up */ + IONIC_VF_LINK_STATUS_DOWN = 2, /* link is always down */ +}; + +/** + * struct ionic_vf_setattr_cmd - Set VF attributes on the NIC + * @opcode: Opcode + * @index: VF index + * @attr: Attribute type (enum ionic_vf_attr) + * macaddr mac address + * vlanid vlan ID + * maxrate max Tx rate in Mbps + * spoofchk enable address spoof checking + * trust enable VF trust + * linkstate set link up or down + * stats_pa set DMA address for VF stats + */ +struct ionic_vf_setattr_cmd { + u8 opcode; + u8 attr; + __le16 vf_index; + union { + u8 macaddr[6]; + __le16 vlanid; + __le32 maxrate; + u8 spoofchk; + u8 trust; + u8 linkstate; + __le64 stats_pa; + u8 pad[60]; + }; +}; + +struct ionic_vf_setattr_comp { + u8 status; + u8 attr; + __le16 vf_index; + __le16 comp_index; + u8 rsvd[9]; + u8 color; +}; + +/** + * struct ionic_vf_getattr_cmd - Get VF attributes from the NIC + * @opcode: Opcode + * @index: VF index + * @attr: Attribute type (enum ionic_vf_attr) + */ +struct ionic_vf_getattr_cmd { + u8 opcode; + u8 attr; + __le16 vf_index; + u8 rsvd[60]; +}; + +struct ionic_vf_getattr_comp { + u8 status; + u8 attr; + __le16 vf_index; + union { + u8 macaddr[6]; + __le16 vlanid; + __le32 maxrate; + u8 spoofchk; + u8 trust; + u8 linkstate; + __le64 stats_pa; + u8 pad[11]; + }; + u8 color; +}; + /** * union ionic_qos_config - Qos configuration structure * @flags: Configuration flags @@ -2289,6 +2380,9 @@ union ionic_dev_cmd { struct ionic_port_getattr_cmd port_getattr; struct ionic_port_setattr_cmd port_setattr; + struct ionic_vf_setattr_cmd vf_setattr; + struct ionic_vf_getattr_cmd vf_getattr; + struct ionic_lif_identify_cmd lif_identify; struct ionic_lif_init_cmd lif_init; struct ionic_lif_reset_cmd lif_reset; @@ -2318,6 +2412,9 @@ union ionic_dev_cmd_comp { struct ionic_port_getattr_comp port_getattr; struct ionic_port_setattr_comp port_setattr; + struct ionic_vf_setattr_comp vf_setattr; + struct ionic_vf_getattr_comp vf_getattr; + struct ionic_lif_identify_comp lif_identify; struct ionic_lif_init_comp lif_init; ionic_lif_reset_comp lif_reset; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index ef8258713369..191271f6260d 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1285,7 +1285,7 @@ static void ionic_tx_timeout_work(struct work_struct *ws) rtnl_unlock(); } -static void ionic_tx_timeout(struct net_device *netdev) +static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ionic_lif *lif = netdev_priv(netdev); @@ -1619,6 +1619,227 @@ int ionic_stop(struct net_device *netdev) return err; } +static int ionic_get_vf_config(struct net_device *netdev, + int vf, struct ifla_vf_info *ivf) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic *ionic = lif->ionic; + int ret = 0; + + down_read(&ionic->vf_op_lock); + + if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { + ret = -EINVAL; + } else { + ivf->vf = vf; + ivf->vlan = ionic->vfs[vf].vlanid; + ivf->qos = 0; + ivf->spoofchk = ionic->vfs[vf].spoofchk; + ivf->linkstate = ionic->vfs[vf].linkstate; + ivf->max_tx_rate = ionic->vfs[vf].maxrate; + ivf->trusted = ionic->vfs[vf].trusted; + ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr); + } + + up_read(&ionic->vf_op_lock); + return ret; +} + +static int ionic_get_vf_stats(struct net_device *netdev, int vf, + struct ifla_vf_stats *vf_stats) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic *ionic = lif->ionic; + struct ionic_lif_stats *vs; + int ret = 0; + + down_read(&ionic->vf_op_lock); + + if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { + ret = -EINVAL; + } else { + memset(vf_stats, 0, sizeof(*vf_stats)); + vs = &ionic->vfs[vf].stats; + + vf_stats->rx_packets = le64_to_cpu(vs->rx_ucast_packets); + vf_stats->tx_packets = le64_to_cpu(vs->tx_ucast_packets); + vf_stats->rx_bytes = le64_to_cpu(vs->rx_ucast_bytes); + vf_stats->tx_bytes = le64_to_cpu(vs->tx_ucast_bytes); + vf_stats->broadcast = le64_to_cpu(vs->rx_bcast_packets); + vf_stats->multicast = le64_to_cpu(vs->rx_mcast_packets); + vf_stats->rx_dropped = le64_to_cpu(vs->rx_ucast_drop_packets) + + le64_to_cpu(vs->rx_mcast_drop_packets) + + le64_to_cpu(vs->rx_bcast_drop_packets); + vf_stats->tx_dropped = le64_to_cpu(vs->tx_ucast_drop_packets) + + le64_to_cpu(vs->tx_mcast_drop_packets) + + le64_to_cpu(vs->tx_bcast_drop_packets); + } + + up_read(&ionic->vf_op_lock); + return ret; +} + +static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic *ionic = lif->ionic; + int ret; + + if (!(is_zero_ether_addr(mac) || is_valid_ether_addr(mac))) + return -EINVAL; + + down_read(&ionic->vf_op_lock); + + if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { + ret = -EINVAL; + } else { + ret = ionic_set_vf_config(ionic, vf, IONIC_VF_ATTR_MAC, mac); + if (!ret) + ether_addr_copy(ionic->vfs[vf].macaddr, mac); + } + + up_read(&ionic->vf_op_lock); + return ret; +} + +static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, + u8 qos, __be16 proto) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic *ionic = lif->ionic; + int ret; + + /* until someday when we support qos */ + if (qos) + return -EINVAL; + + if (vlan > 4095) + return -EINVAL; + + if (proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + + down_read(&ionic->vf_op_lock); + + if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { + ret = -EINVAL; + } else { + ret = ionic_set_vf_config(ionic, vf, + IONIC_VF_ATTR_VLAN, (u8 *)&vlan); + if (!ret) + ionic->vfs[vf].vlanid = vlan; + } + + up_read(&ionic->vf_op_lock); + return ret; +} + +static int ionic_set_vf_rate(struct net_device *netdev, int vf, + int tx_min, int tx_max) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic *ionic = lif->ionic; + int ret; + + /* setting the min just seems silly */ + if (tx_min) + return -EINVAL; + + down_write(&ionic->vf_op_lock); + + if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { + ret = -EINVAL; + } else { + ret = ionic_set_vf_config(ionic, vf, + IONIC_VF_ATTR_RATE, (u8 *)&tx_max); + if (!ret) + lif->ionic->vfs[vf].maxrate = tx_max; + } + + up_write(&ionic->vf_op_lock); + return ret; +} + +static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic *ionic = lif->ionic; + u8 data = set; /* convert to u8 for config */ + int ret; + + down_write(&ionic->vf_op_lock); + + if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { + ret = -EINVAL; + } else { + ret = ionic_set_vf_config(ionic, vf, + IONIC_VF_ATTR_SPOOFCHK, &data); + if (!ret) + ionic->vfs[vf].spoofchk = data; + } + + up_write(&ionic->vf_op_lock); + return ret; +} + +static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic *ionic = lif->ionic; + u8 data = set; /* convert to u8 for config */ + int ret; + + down_write(&ionic->vf_op_lock); + + if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { + ret = -EINVAL; + } else { + ret = ionic_set_vf_config(ionic, vf, + IONIC_VF_ATTR_TRUST, &data); + if (!ret) + ionic->vfs[vf].trusted = data; + } + + up_write(&ionic->vf_op_lock); + return ret; +} + +static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic *ionic = lif->ionic; + u8 data; + int ret; + + switch (set) { + case IFLA_VF_LINK_STATE_ENABLE: + data = IONIC_VF_LINK_STATUS_UP; + break; + case IFLA_VF_LINK_STATE_DISABLE: + data = IONIC_VF_LINK_STATUS_DOWN; + break; + case IFLA_VF_LINK_STATE_AUTO: + data = IONIC_VF_LINK_STATUS_AUTO; + break; + default: + return -EINVAL; + } + + down_write(&ionic->vf_op_lock); + + if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { + ret = -EINVAL; + } else { + ret = ionic_set_vf_config(ionic, vf, + IONIC_VF_ATTR_LINKSTATE, &data); + if (!ret) + ionic->vfs[vf].linkstate = set; + } + + up_write(&ionic->vf_op_lock); + return ret; +} + static const struct net_device_ops ionic_netdev_ops = { .ndo_open = ionic_open, .ndo_stop = ionic_stop, @@ -1632,6 +1853,14 @@ static const struct net_device_ops ionic_netdev_ops = { .ndo_change_mtu = ionic_change_mtu, .ndo_vlan_rx_add_vid = ionic_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ionic_vlan_rx_kill_vid, + .ndo_set_vf_vlan = ionic_set_vf_vlan, + .ndo_set_vf_trust = ionic_set_vf_trust, + .ndo_set_vf_mac = ionic_set_vf_mac, + .ndo_set_vf_rate = ionic_set_vf_rate, + .ndo_set_vf_spoofchk = ionic_set_vf_spoofchk, + .ndo_get_vf_config = ionic_get_vf_config, + .ndo_set_vf_link_state = ionic_set_vf_link_state, + .ndo_get_vf_stats = ionic_get_vf_stats, }; int ionic_reset_queues(struct ionic_lif *lif) @@ -1965,18 +2194,22 @@ static int ionic_station_set(struct ionic_lif *lif) if (err) return err; + if (is_zero_ether_addr(ctx.comp.lif_getattr.mac)) + return 0; + memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len); addr.sa_family = AF_INET; err = eth_prepare_mac_addr_change(netdev, &addr); - if (err) - return err; - - if (!is_zero_ether_addr(netdev->dev_addr)) { - netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n", - netdev->dev_addr); - ionic_lif_addr(lif, netdev->dev_addr, false); + if (err) { + netdev_warn(lif->netdev, "ignoring bad MAC addr from NIC %pM\n", + addr.sa_data); + return 0; } + netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n", + netdev->dev_addr); + ionic_lif_addr(lif, netdev->dev_addr, false); + eth_commit_mac_addr_change(netdev, &addr); netdev_dbg(lif->netdev, "adding station MAC addr %pM\n", netdev->dev_addr); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index a55fd1f8c31b..9c5a7dd45f9d 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -37,6 +37,7 @@ struct ionic_rx_stats { u64 csum_complete; u64 csum_error; u64 buffers_posted; + u64 dropped; }; #define IONIC_QCQ_F_INITED BIT(0) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 3590ea7fd88a..a8e3fb73b465 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -165,6 +165,10 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode) return "IONIC_CMD_FW_DOWNLOAD"; case IONIC_CMD_FW_CONTROL: return "IONIC_CMD_FW_CONTROL"; + case IONIC_CMD_VF_GETATTR: + return "IONIC_CMD_VF_GETATTR"; + case IONIC_CMD_VF_SETATTR: + return "IONIC_CMD_VF_SETATTR"; default: return "DEVCMD_UNKNOWN"; } @@ -326,9 +330,9 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds) unsigned long max_wait; unsigned long duration; int opcode; + int hb = 0; int done; int err; - int hb; WARN_ON(in_interrupt()); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c index 03916b6d47f2..a1e9796a660a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c @@ -39,6 +39,7 @@ static const struct ionic_stat_desc ionic_rx_stats_desc[] = { IONIC_RX_STAT_DESC(csum_none), IONIC_RX_STAT_DESC(csum_complete), IONIC_RX_STAT_DESC(csum_error), + IONIC_RX_STAT_DESC(dropped), }; static const struct ionic_stat_desc ionic_txq_stats_desc[] = { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 97e79949b359..e452f4242ba0 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -152,12 +152,16 @@ static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_i stats = q_to_rx_stats(q); netdev = q->lif->netdev; - if (comp->status) + if (comp->status) { + stats->dropped++; return; + } /* no packet processing while resetting */ - if (unlikely(test_bit(IONIC_LIF_QUEUE_RESET, q->lif->state))) + if (unlikely(test_bit(IONIC_LIF_QUEUE_RESET, q->lif->state))) { + stats->dropped++; return; + } stats->pkts++; stats->bytes += le16_to_cpu(comp->len); @@ -167,8 +171,10 @@ static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_i else skb = ionic_rx_frags(q, desc_info, cq_info); - if (unlikely(!skb)) + if (unlikely(!skb)) { + stats->dropped++; return; + } skb_record_rx_queue(skb, q->index); @@ -337,6 +343,8 @@ void ionic_rx_fill(struct ionic_queue *q) struct ionic_rxq_sg_desc *sg_desc; struct ionic_rxq_sg_elem *sg_elem; struct ionic_rxq_desc *desc; + unsigned int remain_len; + unsigned int seg_len; unsigned int nfrags; bool ring_doorbell; unsigned int i, j; @@ -346,6 +354,7 @@ void ionic_rx_fill(struct ionic_queue *q) nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE; for (i = ionic_q_space_avail(q); i; i--) { + remain_len = len; desc_info = q->head; desc = desc_info->desc; sg_desc = desc_info->sg_desc; @@ -369,7 +378,9 @@ void ionic_rx_fill(struct ionic_queue *q) return; } desc->addr = cpu_to_le64(page_info->dma_addr); - desc->len = cpu_to_le16(PAGE_SIZE); + seg_len = min_t(unsigned int, PAGE_SIZE, len); + desc->len = cpu_to_le16(seg_len); + remain_len -= seg_len; page_info++; /* fill sg descriptors - pages[1..n] */ @@ -385,7 +396,9 @@ void ionic_rx_fill(struct ionic_queue *q) return; } sg_elem->addr = cpu_to_le64(page_info->dma_addr); - sg_elem->len = cpu_to_le16(PAGE_SIZE); + seg_len = min_t(unsigned int, PAGE_SIZE, remain_len); + sg_elem->len = cpu_to_le16(seg_len); + remain_len -= seg_len; page_info++; } diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index c692a41e4548..8067ea04d455 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -49,7 +49,7 @@ static int netxen_nic_open(struct net_device *netdev); static int netxen_nic_close(struct net_device *netdev); static netdev_tx_t netxen_nic_xmit_frame(struct sk_buff *, struct net_device *); -static void netxen_tx_timeout(struct net_device *netdev); +static void netxen_tx_timeout(struct net_device *netdev, unsigned int txqueue); static void netxen_tx_timeout_task(struct work_struct *work); static void netxen_fw_poll_work(struct work_struct *work); static void netxen_schedule_work(struct netxen_adapter *adapter, @@ -2222,7 +2222,7 @@ static void netxen_nic_handle_phy_intr(struct netxen_adapter *adapter) netxen_advert_link_change(adapter, linkup); } -static void netxen_tx_timeout(struct net_device *netdev) +static void netxen_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct netxen_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 7e0b795230b2..900bc603e30a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -331,8 +331,8 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, u8 sb_index = p_hwfn->p_eq->eq_sb_index; struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EINVAL; u8 page_cnt, i; + int rc; /* update initial eq producer */ qed_eq_prod_update(p_hwfn, @@ -447,7 +447,7 @@ int qed_sp_pf_update(struct qed_hwfn *p_hwfn) { struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EINVAL; + int rc; /* Get SPQ entry */ memset(&init_data, 0, sizeof(init_data)); @@ -471,7 +471,7 @@ int qed_sp_pf_update_ufp(struct qed_hwfn *p_hwfn) { struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EOPNOTSUPP; + int rc; if (p_hwfn->ufp_info.pri_type == QED_UFP_PRI_UNKNOWN) { DP_INFO(p_hwfn, "Invalid priority type %d\n", @@ -509,7 +509,7 @@ int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn, { struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EINVAL; + int rc; if (IS_VF(p_hwfn->cdev)) return qed_vf_pf_tunnel_param_update(p_hwfn, p_tunn); @@ -546,7 +546,7 @@ int qed_sp_pf_stop(struct qed_hwfn *p_hwfn) { struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EINVAL; + int rc; /* Get SPQ entry */ memset(&init_data, 0, sizeof(init_data)); diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index f815435cf106..4c7f7a7fc151 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -247,6 +247,7 @@ static int qede_ptp_cfg_filters(struct qede_dev *edev) break; case HWTSTAMP_TX_ONESTEP_SYNC: + case HWTSTAMP_TX_ONESTEP_P2P: DP_ERR(edev, "One-step timestamping is not supported\n"); return -ERANGE; } diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 986f26578d34..0fade19e00d4 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3602,7 +3602,7 @@ static int ql3xxx_set_mac_address(struct net_device *ndev, void *p) return 0; } -static void ql3xxx_tx_timeout(struct net_device *ndev) +static void ql3xxx_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ql3_adapter *qdev = netdev_priv(ndev); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index c07438db30ba..9dd6cb36f366 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -56,7 +56,7 @@ static int qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent); static void qlcnic_remove(struct pci_dev *pdev); static int qlcnic_open(struct net_device *netdev); static int qlcnic_close(struct net_device *netdev); -static void qlcnic_tx_timeout(struct net_device *netdev); +static void qlcnic_tx_timeout(struct net_device *netdev, unsigned int txqueue); static void qlcnic_attach_work(struct work_struct *work); static void qlcnic_fwinit_work(struct work_struct *work); @@ -3068,7 +3068,7 @@ static void qlcnic_dump_rings(struct qlcnic_adapter *adapter) } -static void qlcnic_tx_timeout(struct net_device *netdev) +static void qlcnic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct qlcnic_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 98f92268cbaa..522fad4cb2cd 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -282,7 +282,7 @@ static int emac_close(struct net_device *netdev) } /* Respond to a TX hang */ -static void emac_tx_timeout(struct net_device *netdev) +static void emac_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct emac_adapter *adpt = netdev_priv(netdev); diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index baac016f3ec0..5a3b65a6eb4f 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -785,7 +785,7 @@ qcaspi_netdev_xmit(struct sk_buff *skb, struct net_device *dev) } static void -qcaspi_netdev_tx_timeout(struct net_device *dev) +qcaspi_netdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct qcaspi *qca = netdev_priv(dev); diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c index 0981068504fa..375a844cd27c 100644 --- a/drivers/net/ethernet/qualcomm/qca_uart.c +++ b/drivers/net/ethernet/qualcomm/qca_uart.c @@ -248,7 +248,7 @@ out: return NETDEV_TX_OK; } -static void qcauart_netdev_tx_timeout(struct net_device *dev) +static void qcauart_netdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct qcauart *qca = netdev_priv(dev); diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 274e5b4bc4ac..c23cb61bbd30 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -410,7 +410,7 @@ static void r6040_init_mac_regs(struct net_device *dev) iowrite16(TM2TX, ioaddr + MTPR); } -static void r6040_tx_timeout(struct net_device *dev) +static void r6040_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct r6040_private *priv = netdev_priv(dev); void __iomem *ioaddr = priv->base; diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 4f910c4f67b0..60d342f82fb3 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1235,7 +1235,7 @@ static int cp_close (struct net_device *dev) return 0; } -static void cp_tx_timeout(struct net_device *dev) +static void cp_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct cp_private *cp = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 55d01266e615..5caeb8368eab 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -642,7 +642,7 @@ static int mdio_read (struct net_device *dev, int phy_id, int location); static void mdio_write (struct net_device *dev, int phy_id, int location, int val); static void rtl8139_start_thread(struct rtl8139_private *tp); -static void rtl8139_tx_timeout (struct net_device *dev); +static void rtl8139_tx_timeout (struct net_device *dev, unsigned int txqueue); static void rtl8139_init_ring (struct net_device *dev); static netdev_tx_t rtl8139_start_xmit (struct sk_buff *skb, struct net_device *dev); @@ -1700,7 +1700,7 @@ static void rtl8139_tx_timeout_task (struct work_struct *work) spin_unlock_bh(&tp->rx_lock); } -static void rtl8139_tx_timeout (struct net_device *dev) +static void rtl8139_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct rtl8139_private *tp = netdev_priv(dev); diff --git a/drivers/net/ethernet/realtek/Makefile b/drivers/net/ethernet/realtek/Makefile index d5304bad2372..2e1d78b106b0 100644 --- a/drivers/net/ethernet/realtek/Makefile +++ b/drivers/net/ethernet/realtek/Makefile @@ -6,5 +6,5 @@ obj-$(CONFIG_8139CP) += 8139cp.o obj-$(CONFIG_8139TOO) += 8139too.o obj-$(CONFIG_ATP) += atp.o -r8169-objs += r8169_main.o r8169_firmware.o +r8169-objs += r8169_main.o r8169_firmware.o r8169_phy_config.o obj-$(CONFIG_R8169) += r8169.o diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index 58e0ca9093d3..9e3b35c97e63 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -204,7 +204,7 @@ static void net_rx(struct net_device *dev); static void read_block(long ioaddr, int length, unsigned char *buffer, int data_mode); static int net_close(struct net_device *dev); static void set_rx_mode(struct net_device *dev); -static void tx_timeout(struct net_device *dev); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); /* A list of all installed ATP devices, for removing the driver module. */ @@ -533,7 +533,7 @@ static void write_packet(long ioaddr, int length, unsigned char *packet, int pad outb(Ctrl_HNibWrite | Ctrl_SelData | Ctrl_IRQEN, ioaddr + PAR_CONTROL); } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { long ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h new file mode 100644 index 000000000000..22a6a057b11e --- /dev/null +++ b/drivers/net/ethernet/realtek/r8169.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* r8169.h: RealTek 8169/8168/8101 ethernet driver. + * + * Copyright (c) 2002 ShuChen <shuchen@realtek.com.tw> + * Copyright (c) 2003 - 2007 Francois Romieu <romieu@fr.zoreil.com> + * Copyright (c) a lot of people too. Please respect their work. + * + * See MAINTAINERS file for support contact information. + */ + +#include <linux/types.h> +#include <linux/phy.h> + +enum mac_version { + /* support for ancient RTL_GIGA_MAC_VER_01 has been removed */ + RTL_GIGA_MAC_VER_02, + RTL_GIGA_MAC_VER_03, + RTL_GIGA_MAC_VER_04, + RTL_GIGA_MAC_VER_05, + RTL_GIGA_MAC_VER_06, + RTL_GIGA_MAC_VER_07, + RTL_GIGA_MAC_VER_08, + RTL_GIGA_MAC_VER_09, + RTL_GIGA_MAC_VER_10, + RTL_GIGA_MAC_VER_11, + RTL_GIGA_MAC_VER_12, + RTL_GIGA_MAC_VER_13, + RTL_GIGA_MAC_VER_14, + RTL_GIGA_MAC_VER_15, + RTL_GIGA_MAC_VER_16, + RTL_GIGA_MAC_VER_17, + RTL_GIGA_MAC_VER_18, + RTL_GIGA_MAC_VER_19, + RTL_GIGA_MAC_VER_20, + RTL_GIGA_MAC_VER_21, + RTL_GIGA_MAC_VER_22, + RTL_GIGA_MAC_VER_23, + RTL_GIGA_MAC_VER_24, + RTL_GIGA_MAC_VER_25, + RTL_GIGA_MAC_VER_26, + RTL_GIGA_MAC_VER_27, + RTL_GIGA_MAC_VER_28, + RTL_GIGA_MAC_VER_29, + RTL_GIGA_MAC_VER_30, + RTL_GIGA_MAC_VER_31, + RTL_GIGA_MAC_VER_32, + RTL_GIGA_MAC_VER_33, + RTL_GIGA_MAC_VER_34, + RTL_GIGA_MAC_VER_35, + RTL_GIGA_MAC_VER_36, + RTL_GIGA_MAC_VER_37, + RTL_GIGA_MAC_VER_38, + RTL_GIGA_MAC_VER_39, + RTL_GIGA_MAC_VER_40, + RTL_GIGA_MAC_VER_41, + RTL_GIGA_MAC_VER_42, + RTL_GIGA_MAC_VER_43, + RTL_GIGA_MAC_VER_44, + RTL_GIGA_MAC_VER_45, + RTL_GIGA_MAC_VER_46, + RTL_GIGA_MAC_VER_47, + RTL_GIGA_MAC_VER_48, + RTL_GIGA_MAC_VER_49, + RTL_GIGA_MAC_VER_50, + RTL_GIGA_MAC_VER_51, + RTL_GIGA_MAC_VER_52, + RTL_GIGA_MAC_VER_60, + RTL_GIGA_MAC_VER_61, + RTL_GIGA_MAC_NONE +}; + +struct rtl8169_private; + +void r8169_apply_firmware(struct rtl8169_private *tp); +u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp); +u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr); +void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev, + enum mac_version ver); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 67a4d5d45e3a..6d699df7d27f 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -31,6 +31,7 @@ #include <linux/ipv6.h> #include <net/ip6_checksum.h> +#include "r8169.h" #include "r8169_firmware.h" #define MODULENAME "r8169" @@ -84,64 +85,6 @@ #define RTL_R16(tp, reg) readw(tp->mmio_addr + (reg)) #define RTL_R32(tp, reg) readl(tp->mmio_addr + (reg)) -enum mac_version { - /* support for ancient RTL_GIGA_MAC_VER_01 has been removed */ - RTL_GIGA_MAC_VER_02, - RTL_GIGA_MAC_VER_03, - RTL_GIGA_MAC_VER_04, - RTL_GIGA_MAC_VER_05, - RTL_GIGA_MAC_VER_06, - RTL_GIGA_MAC_VER_07, - RTL_GIGA_MAC_VER_08, - RTL_GIGA_MAC_VER_09, - RTL_GIGA_MAC_VER_10, - RTL_GIGA_MAC_VER_11, - RTL_GIGA_MAC_VER_12, - RTL_GIGA_MAC_VER_13, - RTL_GIGA_MAC_VER_14, - RTL_GIGA_MAC_VER_15, - RTL_GIGA_MAC_VER_16, - RTL_GIGA_MAC_VER_17, - RTL_GIGA_MAC_VER_18, - RTL_GIGA_MAC_VER_19, - RTL_GIGA_MAC_VER_20, - RTL_GIGA_MAC_VER_21, - RTL_GIGA_MAC_VER_22, - RTL_GIGA_MAC_VER_23, - RTL_GIGA_MAC_VER_24, - RTL_GIGA_MAC_VER_25, - RTL_GIGA_MAC_VER_26, - RTL_GIGA_MAC_VER_27, - RTL_GIGA_MAC_VER_28, - RTL_GIGA_MAC_VER_29, - RTL_GIGA_MAC_VER_30, - RTL_GIGA_MAC_VER_31, - RTL_GIGA_MAC_VER_32, - RTL_GIGA_MAC_VER_33, - RTL_GIGA_MAC_VER_34, - RTL_GIGA_MAC_VER_35, - RTL_GIGA_MAC_VER_36, - RTL_GIGA_MAC_VER_37, - RTL_GIGA_MAC_VER_38, - RTL_GIGA_MAC_VER_39, - RTL_GIGA_MAC_VER_40, - RTL_GIGA_MAC_VER_41, - RTL_GIGA_MAC_VER_42, - RTL_GIGA_MAC_VER_43, - RTL_GIGA_MAC_VER_44, - RTL_GIGA_MAC_VER_45, - RTL_GIGA_MAC_VER_46, - RTL_GIGA_MAC_VER_47, - RTL_GIGA_MAC_VER_48, - RTL_GIGA_MAC_VER_49, - RTL_GIGA_MAC_VER_50, - RTL_GIGA_MAC_VER_51, - RTL_GIGA_MAC_VER_52, - RTL_GIGA_MAC_VER_60, - RTL_GIGA_MAC_VER_61, - RTL_GIGA_MAC_NONE -}; - #define JUMBO_1K ETH_DATA_LEN #define JUMBO_4K (4*1024 - ETH_HLEN - 2) #define JUMBO_6K (6*1024 - ETH_HLEN - 2) @@ -492,6 +435,7 @@ enum rtl_register_content { /* CPlusCmd p.31 */ EnableBist = (1 << 15), // 8168 8101 Mac_dbgo_oe = (1 << 14), // 8168 8101 + EnAnaPLL = (1 << 14), // 8169 Normal_mode = (1 << 13), // unused Force_half_dup = (1 << 12), // 8168 8101 Force_rxflow_en = (1 << 11), // 8168 8101 @@ -1078,52 +1022,6 @@ static int rtl_readphy(struct rtl8169_private *tp, int location) } } -static void rtl_patchphy(struct rtl8169_private *tp, int reg_addr, int value) -{ - rtl_writephy(tp, reg_addr, rtl_readphy(tp, reg_addr) | value); -} - -static void rtl_w0w1_phy(struct rtl8169_private *tp, int reg_addr, int p, int m) -{ - int val; - - val = rtl_readphy(tp, reg_addr); - rtl_writephy(tp, reg_addr, (val & ~m) | p); -} - -static void r8168d_modify_extpage(struct phy_device *phydev, int extpage, - int reg, u16 mask, u16 val) -{ - int oldpage = phy_select_page(phydev, 0x0007); - - __phy_write(phydev, 0x1e, extpage); - __phy_modify(phydev, reg, mask, val); - - phy_restore_page(phydev, oldpage, 0); -} - -static void r8168d_phy_param(struct phy_device *phydev, u16 parm, - u16 mask, u16 val) -{ - int oldpage = phy_select_page(phydev, 0x0005); - - __phy_write(phydev, 0x05, parm); - __phy_modify(phydev, 0x06, mask, val); - - phy_restore_page(phydev, oldpage, 0); -} - -static void r8168g_phy_param(struct phy_device *phydev, u16 parm, - u16 mask, u16 val) -{ - int oldpage = phy_select_page(phydev, 0x0a43); - - __phy_write(phydev, 0x13, parm); - __phy_modify(phydev, 0x14, mask, val); - - phy_restore_page(phydev, oldpage, 0); -} - DECLARE_RTL_COND(rtl_ephyar_cond) { return RTL_R32(tp, EPHYAR) & EPHYAR_FLAG; @@ -1372,7 +1270,7 @@ DECLARE_RTL_COND(rtl_efusear_cond) return RTL_R32(tp, EFUSEAR) & EFUSEAR_FLAG; } -static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr) +u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr) { RTL_W32(tp, EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT); @@ -2268,22 +2166,6 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp) } } -struct phy_reg { - u16 reg; - u16 val; -}; - -static void __rtl_writephy_batch(struct rtl8169_private *tp, - const struct phy_reg *regs, int len) -{ - while (len-- > 0) { - rtl_writephy(tp, regs->reg, regs->val); - regs++; - } -} - -#define rtl_writephy_batch(tp, a) __rtl_writephy_batch(tp, a, ARRAY_SIZE(a)) - static void rtl_release_firmware(struct rtl8169_private *tp) { if (tp->rtl_fw) { @@ -2293,7 +2175,7 @@ static void rtl_release_firmware(struct rtl8169_private *tp) } } -static void rtl_apply_firmware(struct rtl8169_private *tp) +void r8169_apply_firmware(struct rtl8169_private *tp) { /* TODO: release firmware if rtl_fw_write_firmware signals failure. */ if (tp->rtl_fw) @@ -2315,594 +2197,6 @@ static void rtl8125_config_eee_mac(struct rtl8169_private *tp) r8168_mac_ocp_modify(tp, 0xeb62, 0, BIT(2) | BIT(1)); } -static void rtl8168f_config_eee_phy(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - - r8168d_modify_extpage(phydev, 0x0020, 0x15, 0, BIT(8)); - r8168d_phy_param(phydev, 0x8b85, 0, BIT(13)); -} - -static void rtl8168g_config_eee_phy(struct rtl8169_private *tp) -{ - phy_modify_paged(tp->phydev, 0x0a43, 0x11, 0, BIT(4)); -} - -static void rtl8168h_config_eee_phy(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - - rtl8168g_config_eee_phy(tp); - - phy_modify_paged(phydev, 0xa4a, 0x11, 0x0000, 0x0200); - phy_modify_paged(phydev, 0xa42, 0x14, 0x0000, 0x0080); -} - -static void rtl8125_config_eee_phy(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - - rtl8168h_config_eee_phy(tp); - - phy_modify_paged(phydev, 0xa6d, 0x12, 0x0001, 0x0000); - phy_modify_paged(phydev, 0xa6d, 0x14, 0x0010, 0x0000); -} - -static void rtl8169s_hw_phy_config(struct rtl8169_private *tp) -{ - static const struct phy_reg phy_reg_init[] = { - { 0x1f, 0x0001 }, - { 0x06, 0x006e }, - { 0x08, 0x0708 }, - { 0x15, 0x4000 }, - { 0x18, 0x65c7 }, - - { 0x1f, 0x0001 }, - { 0x03, 0x00a1 }, - { 0x02, 0x0008 }, - { 0x01, 0x0120 }, - { 0x00, 0x1000 }, - { 0x04, 0x0800 }, - { 0x04, 0x0000 }, - - { 0x03, 0xff41 }, - { 0x02, 0xdf60 }, - { 0x01, 0x0140 }, - { 0x00, 0x0077 }, - { 0x04, 0x7800 }, - { 0x04, 0x7000 }, - - { 0x03, 0x802f }, - { 0x02, 0x4f02 }, - { 0x01, 0x0409 }, - { 0x00, 0xf0f9 }, - { 0x04, 0x9800 }, - { 0x04, 0x9000 }, - - { 0x03, 0xdf01 }, - { 0x02, 0xdf20 }, - { 0x01, 0xff95 }, - { 0x00, 0xba00 }, - { 0x04, 0xa800 }, - { 0x04, 0xa000 }, - - { 0x03, 0xff41 }, - { 0x02, 0xdf20 }, - { 0x01, 0x0140 }, - { 0x00, 0x00bb }, - { 0x04, 0xb800 }, - { 0x04, 0xb000 }, - - { 0x03, 0xdf41 }, - { 0x02, 0xdc60 }, - { 0x01, 0x6340 }, - { 0x00, 0x007d }, - { 0x04, 0xd800 }, - { 0x04, 0xd000 }, - - { 0x03, 0xdf01 }, - { 0x02, 0xdf20 }, - { 0x01, 0x100a }, - { 0x00, 0xa0ff }, - { 0x04, 0xf800 }, - { 0x04, 0xf000 }, - - { 0x1f, 0x0000 }, - { 0x0b, 0x0000 }, - { 0x00, 0x9200 } - }; - - rtl_writephy_batch(tp, phy_reg_init); -} - -static void rtl8169sb_hw_phy_config(struct rtl8169_private *tp) -{ - phy_write_paged(tp->phydev, 0x0002, 0x01, 0x90d0); -} - -static void rtl8169scd_hw_phy_config_quirk(struct rtl8169_private *tp) -{ - struct pci_dev *pdev = tp->pci_dev; - - if ((pdev->subsystem_vendor != PCI_VENDOR_ID_GIGABYTE) || - (pdev->subsystem_device != 0xe000)) - return; - - phy_write_paged(tp->phydev, 0x0001, 0x10, 0xf01b); -} - -static void rtl8169scd_hw_phy_config(struct rtl8169_private *tp) -{ - static const struct phy_reg phy_reg_init[] = { - { 0x1f, 0x0001 }, - { 0x04, 0x0000 }, - { 0x03, 0x00a1 }, - { 0x02, 0x0008 }, - { 0x01, 0x0120 }, - { 0x00, 0x1000 }, - { 0x04, 0x0800 }, - { 0x04, 0x9000 }, - { 0x03, 0x802f }, - { 0x02, 0x4f02 }, - { 0x01, 0x0409 }, - { 0x00, 0xf099 }, - { 0x04, 0x9800 }, - { 0x04, 0xa000 }, - { 0x03, 0xdf01 }, - { 0x02, 0xdf20 }, - { 0x01, 0xff95 }, - { 0x00, 0xba00 }, - { 0x04, 0xa800 }, - { 0x04, 0xf000 }, - { 0x03, 0xdf01 }, - { 0x02, 0xdf20 }, - { 0x01, 0x101a }, - { 0x00, 0xa0ff }, - { 0x04, 0xf800 }, - { 0x04, 0x0000 }, - { 0x1f, 0x0000 }, - - { 0x1f, 0x0001 }, - { 0x10, 0xf41b }, - { 0x14, 0xfb54 }, - { 0x18, 0xf5c7 }, - { 0x1f, 0x0000 }, - - { 0x1f, 0x0001 }, - { 0x17, 0x0cc0 }, - { 0x1f, 0x0000 } - }; - - rtl_writephy_batch(tp, phy_reg_init); - - rtl8169scd_hw_phy_config_quirk(tp); -} - -static void rtl8169sce_hw_phy_config(struct rtl8169_private *tp) -{ - static const struct phy_reg phy_reg_init[] = { - { 0x1f, 0x0001 }, - { 0x04, 0x0000 }, - { 0x03, 0x00a1 }, - { 0x02, 0x0008 }, - { 0x01, 0x0120 }, - { 0x00, 0x1000 }, - { 0x04, 0x0800 }, - { 0x04, 0x9000 }, - { 0x03, 0x802f }, - { 0x02, 0x4f02 }, - { 0x01, 0x0409 }, - { 0x00, 0xf099 }, - { 0x04, 0x9800 }, - { 0x04, 0xa000 }, - { 0x03, 0xdf01 }, - { 0x02, 0xdf20 }, - { 0x01, 0xff95 }, - { 0x00, 0xba00 }, - { 0x04, 0xa800 }, - { 0x04, 0xf000 }, - { 0x03, 0xdf01 }, - { 0x02, 0xdf20 }, - { 0x01, 0x101a }, - { 0x00, 0xa0ff }, - { 0x04, 0xf800 }, - { 0x04, 0x0000 }, - { 0x1f, 0x0000 }, - - { 0x1f, 0x0001 }, - { 0x0b, 0x8480 }, - { 0x1f, 0x0000 }, - - { 0x1f, 0x0001 }, - { 0x18, 0x67c7 }, - { 0x04, 0x2000 }, - { 0x03, 0x002f }, - { 0x02, 0x4360 }, - { 0x01, 0x0109 }, - { 0x00, 0x3022 }, - { 0x04, 0x2800 }, - { 0x1f, 0x0000 }, - - { 0x1f, 0x0001 }, - { 0x17, 0x0cc0 }, - { 0x1f, 0x0000 } - }; - - rtl_writephy_batch(tp, phy_reg_init); -} - -static void rtl8168bb_hw_phy_config(struct rtl8169_private *tp) -{ - rtl_writephy(tp, 0x1f, 0x0001); - rtl_patchphy(tp, 0x16, 1 << 0); - rtl_writephy(tp, 0x10, 0xf41b); - rtl_writephy(tp, 0x1f, 0x0000); -} - -static void rtl8168bef_hw_phy_config(struct rtl8169_private *tp) -{ - phy_write_paged(tp->phydev, 0x0001, 0x10, 0xf41b); -} - -static void rtl8168cp_1_hw_phy_config(struct rtl8169_private *tp) -{ - phy_write(tp->phydev, 0x1d, 0x0f00); - phy_write_paged(tp->phydev, 0x0002, 0x0c, 0x1ec8); -} - -static void rtl8168cp_2_hw_phy_config(struct rtl8169_private *tp) -{ - phy_set_bits(tp->phydev, 0x14, BIT(5)); - phy_set_bits(tp->phydev, 0x0d, BIT(5)); - phy_write_paged(tp->phydev, 0x0001, 0x1d, 0x3d98); -} - -static void rtl8168c_1_hw_phy_config(struct rtl8169_private *tp) -{ - static const struct phy_reg phy_reg_init[] = { - { 0x1f, 0x0001 }, - { 0x12, 0x2300 }, - { 0x1f, 0x0002 }, - { 0x00, 0x88d4 }, - { 0x01, 0x82b1 }, - { 0x03, 0x7002 }, - { 0x08, 0x9e30 }, - { 0x09, 0x01f0 }, - { 0x0a, 0x5500 }, - { 0x0c, 0x00c8 }, - { 0x1f, 0x0003 }, - { 0x12, 0xc096 }, - { 0x16, 0x000a }, - { 0x1f, 0x0000 }, - { 0x1f, 0x0000 }, - { 0x09, 0x2000 }, - { 0x09, 0x0000 } - }; - - rtl_writephy_batch(tp, phy_reg_init); - - rtl_patchphy(tp, 0x14, 1 << 5); - rtl_patchphy(tp, 0x0d, 1 << 5); - rtl_writephy(tp, 0x1f, 0x0000); -} - -static void rtl8168c_2_hw_phy_config(struct rtl8169_private *tp) -{ - static const struct phy_reg phy_reg_init[] = { - { 0x1f, 0x0001 }, - { 0x12, 0x2300 }, - { 0x03, 0x802f }, - { 0x02, 0x4f02 }, - { 0x01, 0x0409 }, - { 0x00, 0xf099 }, - { 0x04, 0x9800 }, - { 0x04, 0x9000 }, - { 0x1d, 0x3d98 }, - { 0x1f, 0x0002 }, - { 0x0c, 0x7eb8 }, - { 0x06, 0x0761 }, - { 0x1f, 0x0003 }, - { 0x16, 0x0f0a }, - { 0x1f, 0x0000 } - }; - - rtl_writephy_batch(tp, phy_reg_init); - - rtl_patchphy(tp, 0x16, 1 << 0); - rtl_patchphy(tp, 0x14, 1 << 5); - rtl_patchphy(tp, 0x0d, 1 << 5); - rtl_writephy(tp, 0x1f, 0x0000); -} - -static void rtl8168c_3_hw_phy_config(struct rtl8169_private *tp) -{ - static const struct phy_reg phy_reg_init[] = { - { 0x1f, 0x0001 }, - { 0x12, 0x2300 }, - { 0x1d, 0x3d98 }, - { 0x1f, 0x0002 }, - { 0x0c, 0x7eb8 }, - { 0x06, 0x5461 }, - { 0x1f, 0x0003 }, - { 0x16, 0x0f0a }, - { 0x1f, 0x0000 } - }; - - rtl_writephy_batch(tp, phy_reg_init); - - rtl_patchphy(tp, 0x16, 1 << 0); - rtl_patchphy(tp, 0x14, 1 << 5); - rtl_patchphy(tp, 0x0d, 1 << 5); - rtl_writephy(tp, 0x1f, 0x0000); -} - -static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = { - /* Channel Estimation */ - { 0x1f, 0x0001 }, - { 0x06, 0x4064 }, - { 0x07, 0x2863 }, - { 0x08, 0x059c }, - { 0x09, 0x26b4 }, - { 0x0a, 0x6a19 }, - { 0x0b, 0xdcc8 }, - { 0x10, 0xf06d }, - { 0x14, 0x7f68 }, - { 0x18, 0x7fd9 }, - { 0x1c, 0xf0ff }, - { 0x1d, 0x3d9c }, - { 0x1f, 0x0003 }, - { 0x12, 0xf49f }, - { 0x13, 0x070b }, - { 0x1a, 0x05ad }, - { 0x14, 0x94c0 }, - - /* - * Tx Error Issue - * Enhance line driver power - */ - { 0x1f, 0x0002 }, - { 0x06, 0x5561 }, - { 0x1f, 0x0005 }, - { 0x05, 0x8332 }, - { 0x06, 0x5561 }, - - /* - * Can not link to 1Gbps with bad cable - * Decrease SNR threshold form 21.07dB to 19.04dB - */ - { 0x1f, 0x0001 }, - { 0x17, 0x0cc0 }, - - { 0x1f, 0x0000 }, - { 0x0d, 0xf880 } -}; - -static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = { - { 0x1f, 0x0002 }, - { 0x05, 0x669a }, - { 0x1f, 0x0005 }, - { 0x05, 0x8330 }, - { 0x06, 0x669a }, - { 0x1f, 0x0002 } -}; - -static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp, u16 val) -{ - u16 reg_val; - - rtl_writephy(tp, 0x1f, 0x0005); - rtl_writephy(tp, 0x05, 0x001b); - reg_val = rtl_readphy(tp, 0x06); - rtl_writephy(tp, 0x1f, 0x0000); - - if (reg_val != val) - netif_warn(tp, hw, tp->dev, "chipset not ready for firmware\n"); - else - rtl_apply_firmware(tp); -} - -static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp) -{ - rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0); - - /* - * Rx Error Issue - * Fine Tune Switching regulator parameter - */ - rtl_writephy(tp, 0x1f, 0x0002); - rtl_w0w1_phy(tp, 0x0b, 0x0010, 0x00ef); - rtl_w0w1_phy(tp, 0x0c, 0xa200, 0x5d00); - - if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) { - int val; - - rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1); - - val = rtl_readphy(tp, 0x0d); - - if ((val & 0x00ff) != 0x006c) { - static const u32 set[] = { - 0x0065, 0x0066, 0x0067, 0x0068, - 0x0069, 0x006a, 0x006b, 0x006c - }; - int i; - - rtl_writephy(tp, 0x1f, 0x0002); - - val &= 0xff00; - for (i = 0; i < ARRAY_SIZE(set); i++) - rtl_writephy(tp, 0x0d, val | set[i]); - } - } else { - phy_write_paged(tp->phydev, 0x0002, 0x05, 0x6662); - r8168d_phy_param(tp->phydev, 0x8330, 0xffff, 0x6662); - } - - /* RSET couple improve */ - rtl_writephy(tp, 0x1f, 0x0002); - rtl_patchphy(tp, 0x0d, 0x0300); - rtl_patchphy(tp, 0x0f, 0x0010); - - /* Fine tune PLL performance */ - rtl_writephy(tp, 0x1f, 0x0002); - rtl_w0w1_phy(tp, 0x02, 0x0100, 0x0600); - rtl_w0w1_phy(tp, 0x03, 0x0000, 0xe000); - rtl_writephy(tp, 0x1f, 0x0000); - - rtl8168d_apply_firmware_cond(tp, 0xbf00); -} - -static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp) -{ - rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0); - - if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) { - int val; - - rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1); - - val = rtl_readphy(tp, 0x0d); - if ((val & 0x00ff) != 0x006c) { - static const u32 set[] = { - 0x0065, 0x0066, 0x0067, 0x0068, - 0x0069, 0x006a, 0x006b, 0x006c - }; - int i; - - rtl_writephy(tp, 0x1f, 0x0002); - - val &= 0xff00; - for (i = 0; i < ARRAY_SIZE(set); i++) - rtl_writephy(tp, 0x0d, val | set[i]); - } - } else { - phy_write_paged(tp->phydev, 0x0002, 0x05, 0x2642); - r8168d_phy_param(tp->phydev, 0x8330, 0xffff, 0x2642); - } - - /* Fine tune PLL performance */ - rtl_writephy(tp, 0x1f, 0x0002); - rtl_w0w1_phy(tp, 0x02, 0x0100, 0x0600); - rtl_w0w1_phy(tp, 0x03, 0x0000, 0xe000); - - /* Switching regulator Slew rate */ - rtl_writephy(tp, 0x1f, 0x0002); - rtl_patchphy(tp, 0x0f, 0x0017); - rtl_writephy(tp, 0x1f, 0x0000); - - rtl8168d_apply_firmware_cond(tp, 0xb300); -} - -static void rtl8168d_3_hw_phy_config(struct rtl8169_private *tp) -{ - static const struct phy_reg phy_reg_init[] = { - { 0x1f, 0x0002 }, - { 0x10, 0x0008 }, - { 0x0d, 0x006c }, - - { 0x1f, 0x0000 }, - { 0x0d, 0xf880 }, - - { 0x1f, 0x0001 }, - { 0x17, 0x0cc0 }, - - { 0x1f, 0x0001 }, - { 0x0b, 0xa4d8 }, - { 0x09, 0x281c }, - { 0x07, 0x2883 }, - { 0x0a, 0x6b35 }, - { 0x1d, 0x3da4 }, - { 0x1c, 0xeffd }, - { 0x14, 0x7f52 }, - { 0x18, 0x7fc6 }, - { 0x08, 0x0601 }, - { 0x06, 0x4063 }, - { 0x10, 0xf074 }, - { 0x1f, 0x0003 }, - { 0x13, 0x0789 }, - { 0x12, 0xf4bd }, - { 0x1a, 0x04fd }, - { 0x14, 0x84b0 }, - { 0x1f, 0x0000 }, - { 0x00, 0x9200 }, - - { 0x1f, 0x0005 }, - { 0x01, 0x0340 }, - { 0x1f, 0x0001 }, - { 0x04, 0x4000 }, - { 0x03, 0x1d21 }, - { 0x02, 0x0c32 }, - { 0x01, 0x0200 }, - { 0x00, 0x5554 }, - { 0x04, 0x4800 }, - { 0x04, 0x4000 }, - { 0x04, 0xf000 }, - { 0x03, 0xdf01 }, - { 0x02, 0xdf20 }, - { 0x01, 0x101a }, - { 0x00, 0xa0ff }, - { 0x04, 0xf800 }, - { 0x04, 0xf000 }, - { 0x1f, 0x0000 }, - }; - - rtl_writephy_batch(tp, phy_reg_init); - - r8168d_modify_extpage(tp->phydev, 0x0023, 0x16, 0xffff, 0x0000); -} - -static void rtl8168d_4_hw_phy_config(struct rtl8169_private *tp) -{ - phy_write_paged(tp->phydev, 0x0001, 0x17, 0x0cc0); - r8168d_modify_extpage(tp->phydev, 0x002d, 0x18, 0xffff, 0x0040); - phy_set_bits(tp->phydev, 0x0d, BIT(5)); -} - -static void rtl8168e_1_hw_phy_config(struct rtl8169_private *tp) -{ - static const struct phy_reg phy_reg_init[] = { - /* Channel estimation fine tune */ - { 0x1f, 0x0001 }, - { 0x0b, 0x6c20 }, - { 0x07, 0x2872 }, - { 0x1c, 0xefff }, - { 0x1f, 0x0003 }, - { 0x14, 0x6420 }, - { 0x1f, 0x0000 }, - }; - struct phy_device *phydev = tp->phydev; - - rtl_apply_firmware(tp); - - /* Enable Delay cap */ - r8168d_phy_param(phydev, 0x8b80, 0xffff, 0xc896); - - rtl_writephy_batch(tp, phy_reg_init); - - /* Update PFM & 10M TX idle timer */ - r8168d_modify_extpage(phydev, 0x002f, 0x15, 0xffff, 0x1919); - - r8168d_modify_extpage(phydev, 0x00ac, 0x18, 0xffff, 0x0006); - - /* DCO enable for 10M IDLE Power */ - r8168d_modify_extpage(phydev, 0x0023, 0x17, 0x0000, 0x0006); - - /* For impedance matching */ - phy_modify_paged(phydev, 0x0002, 0x08, 0x7f00, 0x8000); - - /* PHY auto speed down */ - r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0050); - phy_set_bits(phydev, 0x14, BIT(15)); - - r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001); - r8168d_phy_param(phydev, 0x8b85, 0x2000, 0x0000); - - r8168d_modify_extpage(phydev, 0x0020, 0x15, 0x1100, 0x0000); - phy_write_paged(phydev, 0x0006, 0x00, 0x5a00); - - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, 0x0000); -} - static void rtl_rar_exgmac_set(struct rtl8169_private *tp, u8 *addr) { const u16 w[] = { @@ -2917,698 +2211,20 @@ static void rtl_rar_exgmac_set(struct rtl8169_private *tp, u8 *addr) rtl_eri_write(tp, 0xf4, ERIAR_MASK_1111, w[1] | (w[2] << 16)); } -static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - - rtl_apply_firmware(tp); - - /* Enable Delay cap */ - r8168d_modify_extpage(phydev, 0x00ac, 0x18, 0xffff, 0x0006); - - /* Channel estimation fine tune */ - phy_write_paged(phydev, 0x0003, 0x09, 0xa20f); - - /* Green Setting */ - r8168d_phy_param(phydev, 0x8b5b, 0xffff, 0x9222); - r8168d_phy_param(phydev, 0x8b6d, 0xffff, 0x8000); - r8168d_phy_param(phydev, 0x8b76, 0xffff, 0x8000); - - /* For 4-corner performance improve */ - rtl_writephy(tp, 0x1f, 0x0005); - rtl_writephy(tp, 0x05, 0x8b80); - rtl_w0w1_phy(tp, 0x17, 0x0006, 0x0000); - rtl_writephy(tp, 0x1f, 0x0000); - - /* PHY auto speed down */ - r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0010); - phy_set_bits(phydev, 0x14, BIT(15)); - - /* improve 10M EEE waveform */ - r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001); - - /* Improve 2-pair detection performance */ - r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000); - - rtl8168f_config_eee_phy(tp); - rtl_enable_eee(tp); - - /* Green feature */ - rtl_writephy(tp, 0x1f, 0x0003); - rtl_w0w1_phy(tp, 0x19, 0x0001, 0x0000); - rtl_w0w1_phy(tp, 0x10, 0x0400, 0x0000); - rtl_writephy(tp, 0x1f, 0x0000); - rtl_writephy(tp, 0x1f, 0x0005); - rtl_w0w1_phy(tp, 0x01, 0x0100, 0x0000); - rtl_writephy(tp, 0x1f, 0x0000); - - /* Broken BIOS workaround: feed GigaMAC registers with MAC address. */ - rtl_rar_exgmac_set(tp, tp->dev->dev_addr); -} - -static void rtl8168f_hw_phy_config(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - - /* For 4-corner performance improve */ - r8168d_phy_param(phydev, 0x8b80, 0x0000, 0x0006); - - /* PHY auto speed down */ - r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0010); - phy_set_bits(phydev, 0x14, BIT(15)); - - /* Improve 10M EEE waveform */ - r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001); - - rtl8168f_config_eee_phy(tp); - rtl_enable_eee(tp); -} - -static void rtl8168f_1_hw_phy_config(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - - rtl_apply_firmware(tp); - - /* Channel estimation fine tune */ - phy_write_paged(phydev, 0x0003, 0x09, 0xa20f); - - /* Modify green table for giga & fnet */ - r8168d_phy_param(phydev, 0x8b55, 0xffff, 0x0000); - r8168d_phy_param(phydev, 0x8b5e, 0xffff, 0x0000); - r8168d_phy_param(phydev, 0x8b67, 0xffff, 0x0000); - r8168d_phy_param(phydev, 0x8b70, 0xffff, 0x0000); - r8168d_modify_extpage(phydev, 0x0078, 0x17, 0xffff, 0x0000); - r8168d_modify_extpage(phydev, 0x0078, 0x19, 0xffff, 0x00fb); - - /* Modify green table for 10M */ - r8168d_phy_param(phydev, 0x8b79, 0xffff, 0xaa00); - - /* Disable hiimpedance detection (RTCT) */ - phy_write_paged(phydev, 0x0003, 0x01, 0x328a); - - rtl8168f_hw_phy_config(tp); - - /* Improve 2-pair detection performance */ - r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000); -} - -static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp) +u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp) { - rtl_apply_firmware(tp); - - rtl8168f_hw_phy_config(tp); -} - -static void rtl8411_hw_phy_config(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - - rtl_apply_firmware(tp); - - rtl8168f_hw_phy_config(tp); - - /* Improve 2-pair detection performance */ - r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000); - - /* Channel estimation fine tune */ - phy_write_paged(phydev, 0x0003, 0x09, 0xa20f); - - /* Modify green table for giga & fnet */ - r8168d_phy_param(phydev, 0x8b55, 0xffff, 0x0000); - r8168d_phy_param(phydev, 0x8b5e, 0xffff, 0x0000); - r8168d_phy_param(phydev, 0x8b67, 0xffff, 0x0000); - r8168d_phy_param(phydev, 0x8b70, 0xffff, 0x0000); - r8168d_modify_extpage(phydev, 0x0078, 0x17, 0xffff, 0x0000); - r8168d_modify_extpage(phydev, 0x0078, 0x19, 0xffff, 0x00aa); - - /* Modify green table for 10M */ - r8168d_phy_param(phydev, 0x8b79, 0xffff, 0xaa00); - - /* Disable hiimpedance detection (RTCT) */ - phy_write_paged(phydev, 0x0003, 0x01, 0x328a); - - /* Modify green table for giga */ - r8168d_phy_param(phydev, 0x8b54, 0x0800, 0x0000); - r8168d_phy_param(phydev, 0x8b5d, 0x0800, 0x0000); - r8168d_phy_param(phydev, 0x8a7c, 0x0100, 0x0000); - r8168d_phy_param(phydev, 0x8a7f, 0x0000, 0x0100); - r8168d_phy_param(phydev, 0x8a82, 0x0100, 0x0000); - r8168d_phy_param(phydev, 0x8a85, 0x0100, 0x0000); - r8168d_phy_param(phydev, 0x8a88, 0x0100, 0x0000); - - /* uc same-seed solution */ - r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x8000); - - /* Green feature */ - rtl_writephy(tp, 0x1f, 0x0003); - rtl_w0w1_phy(tp, 0x19, 0x0000, 0x0001); - rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0400); - rtl_writephy(tp, 0x1f, 0x0000); -} - -static void rtl8168g_disable_aldps(struct rtl8169_private *tp) -{ - phy_modify_paged(tp->phydev, 0x0a43, 0x10, BIT(2), 0); -} - -static void rtl8168g_phy_adjust_10m_aldps(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - - phy_modify_paged(phydev, 0x0bcc, 0x14, BIT(8), 0); - phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(7) | BIT(6)); - r8168g_phy_param(phydev, 0x8084, 0x6000, 0x0000); - phy_modify_paged(phydev, 0x0a43, 0x10, 0x0000, 0x1003); -} - -static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) -{ - int ret; - - rtl_apply_firmware(tp); - - ret = phy_read_paged(tp->phydev, 0x0a46, 0x10); - if (ret & BIT(8)) - phy_modify_paged(tp->phydev, 0x0bcc, 0x12, BIT(15), 0); - else - phy_modify_paged(tp->phydev, 0x0bcc, 0x12, 0, BIT(15)); - - ret = phy_read_paged(tp->phydev, 0x0a46, 0x13); - if (ret & BIT(8)) - phy_modify_paged(tp->phydev, 0x0c41, 0x15, 0, BIT(1)); - else - phy_modify_paged(tp->phydev, 0x0c41, 0x15, BIT(1), 0); - - /* Enable PHY auto speed down */ - phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2)); - - rtl8168g_phy_adjust_10m_aldps(tp); - - /* EEE auto-fallback function */ - phy_modify_paged(tp->phydev, 0x0a4b, 0x11, 0, BIT(2)); - - /* Enable UC LPF tune function */ - r8168g_phy_param(tp->phydev, 0x8012, 0x0000, 0x8000); - - phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14)); - - /* Improve SWR Efficiency */ - rtl_writephy(tp, 0x1f, 0x0bcd); - rtl_writephy(tp, 0x14, 0x5065); - rtl_writephy(tp, 0x14, 0xd065); - rtl_writephy(tp, 0x1f, 0x0bc8); - rtl_writephy(tp, 0x11, 0x5655); - rtl_writephy(tp, 0x1f, 0x0bcd); - rtl_writephy(tp, 0x14, 0x1065); - rtl_writephy(tp, 0x14, 0x9065); - rtl_writephy(tp, 0x14, 0x1065); - rtl_writephy(tp, 0x1f, 0x0000); - - rtl8168g_disable_aldps(tp); - rtl8168g_config_eee_phy(tp); - rtl_enable_eee(tp); -} - -static void rtl8168g_2_hw_phy_config(struct rtl8169_private *tp) -{ - rtl_apply_firmware(tp); - rtl8168g_config_eee_phy(tp); - rtl_enable_eee(tp); -} - -static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - u16 dout_tapbin; - u32 data; - - rtl_apply_firmware(tp); - - /* CHN EST parameters adjust - giga master */ - r8168g_phy_param(phydev, 0x809b, 0xf800, 0x8000); - r8168g_phy_param(phydev, 0x80a2, 0xff00, 0x8000); - r8168g_phy_param(phydev, 0x80a4, 0xff00, 0x8500); - r8168g_phy_param(phydev, 0x809c, 0xff00, 0xbd00); - - /* CHN EST parameters adjust - giga slave */ - r8168g_phy_param(phydev, 0x80ad, 0xf800, 0x7000); - r8168g_phy_param(phydev, 0x80b4, 0xff00, 0x5000); - r8168g_phy_param(phydev, 0x80ac, 0xff00, 0x4000); - - /* CHN EST parameters adjust - fnet */ - r8168g_phy_param(phydev, 0x808e, 0xff00, 0x1200); - r8168g_phy_param(phydev, 0x8090, 0xff00, 0xe500); - r8168g_phy_param(phydev, 0x8092, 0xff00, 0x9f00); - - /* enable R-tune & PGA-retune function */ - dout_tapbin = 0; - data = phy_read_paged(phydev, 0x0a46, 0x13); - data &= 3; - data <<= 2; - dout_tapbin |= data; - data = phy_read_paged(phydev, 0x0a46, 0x12); - data &= 0xc000; - data >>= 14; - dout_tapbin |= data; - dout_tapbin = ~(dout_tapbin^0x08); - dout_tapbin <<= 12; - dout_tapbin &= 0xf000; - - r8168g_phy_param(phydev, 0x827a, 0xf000, dout_tapbin); - r8168g_phy_param(phydev, 0x827b, 0xf000, dout_tapbin); - r8168g_phy_param(phydev, 0x827c, 0xf000, dout_tapbin); - r8168g_phy_param(phydev, 0x827d, 0xf000, dout_tapbin); - r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800); - phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002); - - /* enable GPHY 10M */ - phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11)); - - /* SAR ADC performance */ - phy_modify_paged(tp->phydev, 0x0bca, 0x17, BIT(12) | BIT(13), BIT(14)); - - r8168g_phy_param(phydev, 0x803f, 0x3000, 0x0000); - r8168g_phy_param(phydev, 0x8047, 0x3000, 0x0000); - r8168g_phy_param(phydev, 0x804f, 0x3000, 0x0000); - r8168g_phy_param(phydev, 0x8057, 0x3000, 0x0000); - r8168g_phy_param(phydev, 0x805f, 0x3000, 0x0000); - r8168g_phy_param(phydev, 0x8067, 0x3000, 0x0000); - r8168g_phy_param(phydev, 0x806f, 0x3000, 0x0000); - - /* disable phy pfm mode */ - phy_modify_paged(tp->phydev, 0x0a44, 0x11, BIT(7), 0); - - rtl8168g_disable_aldps(tp); - rtl8168h_config_eee_phy(tp); - rtl_enable_eee(tp); -} - -static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp) -{ - u16 ioffset_p3, ioffset_p2, ioffset_p1, ioffset_p0; - struct phy_device *phydev = tp->phydev; - u16 rlen; - u32 data; - - rtl_apply_firmware(tp); - - /* CHIN EST parameter update */ - r8168g_phy_param(phydev, 0x808a, 0x003f, 0x000a); - - /* enable R-tune & PGA-retune function */ - r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800); - phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002); - - /* enable GPHY 10M */ - phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11)); + u16 data1, data2, ioffset; r8168_mac_ocp_write(tp, 0xdd02, 0x807d); - data = r8168_mac_ocp_read(tp, 0xdd02); - ioffset_p3 = ((data & 0x80)>>7); - ioffset_p3 <<= 3; - - data = r8168_mac_ocp_read(tp, 0xdd00); - ioffset_p3 |= ((data & (0xe000))>>13); - ioffset_p2 = ((data & (0x1e00))>>9); - ioffset_p1 = ((data & (0x01e0))>>5); - ioffset_p0 = ((data & 0x0010)>>4); - ioffset_p0 <<= 3; - ioffset_p0 |= (data & (0x07)); - data = (ioffset_p3<<12)|(ioffset_p2<<8)|(ioffset_p1<<4)|(ioffset_p0); - - if ((ioffset_p3 != 0x0f) || (ioffset_p2 != 0x0f) || - (ioffset_p1 != 0x0f) || (ioffset_p0 != 0x0f)) - phy_write_paged(phydev, 0x0bcf, 0x16, data); - - /* Modify rlen (TX LPF corner frequency) level */ - data = phy_read_paged(phydev, 0x0bcd, 0x16); - data &= 0x000f; - rlen = 0; - if (data > 3) - rlen = data - 3; - data = rlen | (rlen<<4) | (rlen<<8) | (rlen<<12); - phy_write_paged(phydev, 0x0bcd, 0x17, data); - - /* disable phy pfm mode */ - phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0); - - rtl8168g_disable_aldps(tp); - rtl8168g_config_eee_phy(tp); - rtl_enable_eee(tp); -} - -static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - - /* Enable PHY auto speed down */ - phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2)); - - rtl8168g_phy_adjust_10m_aldps(tp); - - /* Enable EEE auto-fallback function */ - phy_modify_paged(phydev, 0x0a4b, 0x11, 0, BIT(2)); - - /* Enable UC LPF tune function */ - r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000); - - /* set rg_sel_sdm_rate */ - phy_modify_paged(phydev, 0x0c42, 0x11, BIT(13), BIT(14)); - - rtl8168g_disable_aldps(tp); - rtl8168g_config_eee_phy(tp); - rtl_enable_eee(tp); -} - -static void rtl8168ep_2_hw_phy_config(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - - rtl8168g_phy_adjust_10m_aldps(tp); - - /* Enable UC LPF tune function */ - r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000); - - /* Set rg_sel_sdm_rate */ - phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14)); - - /* Channel estimation parameters */ - r8168g_phy_param(phydev, 0x80f3, 0xff00, 0x8b00); - r8168g_phy_param(phydev, 0x80f0, 0xff00, 0x3a00); - r8168g_phy_param(phydev, 0x80ef, 0xff00, 0x0500); - r8168g_phy_param(phydev, 0x80f6, 0xff00, 0x6e00); - r8168g_phy_param(phydev, 0x80ec, 0xff00, 0x6800); - r8168g_phy_param(phydev, 0x80ed, 0xff00, 0x7c00); - r8168g_phy_param(phydev, 0x80f2, 0xff00, 0xf400); - r8168g_phy_param(phydev, 0x80f4, 0xff00, 0x8500); - r8168g_phy_param(phydev, 0x8110, 0xff00, 0xa800); - r8168g_phy_param(phydev, 0x810f, 0xff00, 0x1d00); - r8168g_phy_param(phydev, 0x8111, 0xff00, 0xf500); - r8168g_phy_param(phydev, 0x8113, 0xff00, 0x6100); - r8168g_phy_param(phydev, 0x8115, 0xff00, 0x9200); - r8168g_phy_param(phydev, 0x810e, 0xff00, 0x0400); - r8168g_phy_param(phydev, 0x810c, 0xff00, 0x7c00); - r8168g_phy_param(phydev, 0x810b, 0xff00, 0x5a00); - r8168g_phy_param(phydev, 0x80d1, 0xff00, 0xff00); - r8168g_phy_param(phydev, 0x80cd, 0xff00, 0x9e00); - r8168g_phy_param(phydev, 0x80d3, 0xff00, 0x0e00); - r8168g_phy_param(phydev, 0x80d5, 0xff00, 0xca00); - r8168g_phy_param(phydev, 0x80d7, 0xff00, 0x8400); - - /* Force PWM-mode */ - rtl_writephy(tp, 0x1f, 0x0bcd); - rtl_writephy(tp, 0x14, 0x5065); - rtl_writephy(tp, 0x14, 0xd065); - rtl_writephy(tp, 0x1f, 0x0bc8); - rtl_writephy(tp, 0x12, 0x00ed); - rtl_writephy(tp, 0x1f, 0x0bcd); - rtl_writephy(tp, 0x14, 0x1065); - rtl_writephy(tp, 0x14, 0x9065); - rtl_writephy(tp, 0x14, 0x1065); - rtl_writephy(tp, 0x1f, 0x0000); - - rtl8168g_disable_aldps(tp); - rtl8168g_config_eee_phy(tp); - rtl_enable_eee(tp); -} - -static void rtl8117_hw_phy_config(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - - /* CHN EST parameters adjust - fnet */ - r8168g_phy_param(phydev, 0x808e, 0xff00, 0x4800); - r8168g_phy_param(phydev, 0x8090, 0xff00, 0xcc00); - r8168g_phy_param(phydev, 0x8092, 0xff00, 0xb000); - - r8168g_phy_param(phydev, 0x8088, 0xff00, 0x6000); - r8168g_phy_param(phydev, 0x808b, 0x3f00, 0x0b00); - r8168g_phy_param(phydev, 0x808d, 0x1f00, 0x0600); - r8168g_phy_param(phydev, 0x808c, 0xff00, 0xb000); - r8168g_phy_param(phydev, 0x80a0, 0xff00, 0x2800); - r8168g_phy_param(phydev, 0x80a2, 0xff00, 0x5000); - r8168g_phy_param(phydev, 0x809b, 0xf800, 0xb000); - r8168g_phy_param(phydev, 0x809a, 0xff00, 0x4b00); - r8168g_phy_param(phydev, 0x809d, 0x3f00, 0x0800); - r8168g_phy_param(phydev, 0x80a1, 0xff00, 0x7000); - r8168g_phy_param(phydev, 0x809f, 0x1f00, 0x0300); - r8168g_phy_param(phydev, 0x809e, 0xff00, 0x8800); - r8168g_phy_param(phydev, 0x80b2, 0xff00, 0x2200); - r8168g_phy_param(phydev, 0x80ad, 0xf800, 0x9800); - r8168g_phy_param(phydev, 0x80af, 0x3f00, 0x0800); - r8168g_phy_param(phydev, 0x80b3, 0xff00, 0x6f00); - r8168g_phy_param(phydev, 0x80b1, 0x1f00, 0x0300); - r8168g_phy_param(phydev, 0x80b0, 0xff00, 0x9300); - - r8168g_phy_param(phydev, 0x8011, 0x0000, 0x0800); - - /* enable GPHY 10M */ - phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11)); - - r8168g_phy_param(phydev, 0x8016, 0x0000, 0x0400); - - rtl8168g_disable_aldps(tp); - rtl8168h_config_eee_phy(tp); - rtl_enable_eee(tp); -} - -static void rtl8102e_hw_phy_config(struct rtl8169_private *tp) -{ - static const struct phy_reg phy_reg_init[] = { - { 0x1f, 0x0003 }, - { 0x08, 0x441d }, - { 0x01, 0x9100 }, - { 0x1f, 0x0000 } - }; - - rtl_writephy(tp, 0x1f, 0x0000); - rtl_patchphy(tp, 0x11, 1 << 12); - rtl_patchphy(tp, 0x19, 1 << 13); - rtl_patchphy(tp, 0x10, 1 << 15); - - rtl_writephy_batch(tp, phy_reg_init); -} - -static void rtl8105e_hw_phy_config(struct rtl8169_private *tp) -{ - /* Disable ALDPS before ram code */ - phy_write(tp->phydev, 0x18, 0x0310); - msleep(100); - - rtl_apply_firmware(tp); - - phy_write_paged(tp->phydev, 0x0005, 0x1a, 0x0000); - phy_write_paged(tp->phydev, 0x0004, 0x1c, 0x0000); - phy_write_paged(tp->phydev, 0x0001, 0x15, 0x7701); -} - -static void rtl8402_hw_phy_config(struct rtl8169_private *tp) -{ - /* Disable ALDPS before setting firmware */ - phy_write(tp->phydev, 0x18, 0x0310); - msleep(20); - - rtl_apply_firmware(tp); - - /* EEE setting */ - rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000); - rtl_writephy(tp, 0x1f, 0x0004); - rtl_writephy(tp, 0x10, 0x401f); - rtl_writephy(tp, 0x19, 0x7030); - rtl_writephy(tp, 0x1f, 0x0000); -} - -static void rtl8106e_hw_phy_config(struct rtl8169_private *tp) -{ - static const struct phy_reg phy_reg_init[] = { - { 0x1f, 0x0004 }, - { 0x10, 0xc07f }, - { 0x19, 0x7030 }, - { 0x1f, 0x0000 } - }; + data1 = r8168_mac_ocp_read(tp, 0xdd02); + data2 = r8168_mac_ocp_read(tp, 0xdd00); - /* Disable ALDPS before ram code */ - phy_write(tp->phydev, 0x18, 0x0310); - msleep(100); + ioffset = (data2 >> 1) & 0x7ff8; + ioffset |= data2 & 0x0007; + if (data1 & BIT(7)) + ioffset |= BIT(15); - rtl_apply_firmware(tp); - - rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000); - rtl_writephy_batch(tp, phy_reg_init); - - rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000); -} - -static void rtl8125_1_hw_phy_config(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - - phy_modify_paged(phydev, 0xad4, 0x10, 0x03ff, 0x0084); - phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010); - phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x0006); - phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006); - phy_modify_paged(phydev, 0xac0, 0x14, 0x0000, 0x1100); - phy_modify_paged(phydev, 0xac8, 0x15, 0xf000, 0x7000); - phy_modify_paged(phydev, 0xad1, 0x14, 0x0000, 0x0400); - phy_modify_paged(phydev, 0xad1, 0x15, 0x0000, 0x03ff); - phy_modify_paged(phydev, 0xad1, 0x16, 0x0000, 0x03ff); - - r8168g_phy_param(phydev, 0x80ea, 0xff00, 0xc400); - r8168g_phy_param(phydev, 0x80eb, 0x0700, 0x0300); - r8168g_phy_param(phydev, 0x80f8, 0xff00, 0x1c00); - r8168g_phy_param(phydev, 0x80f1, 0xff00, 0x3000); - r8168g_phy_param(phydev, 0x80fe, 0xff00, 0xa500); - r8168g_phy_param(phydev, 0x8102, 0xff00, 0x5000); - r8168g_phy_param(phydev, 0x8105, 0xff00, 0x3300); - r8168g_phy_param(phydev, 0x8100, 0xff00, 0x7000); - r8168g_phy_param(phydev, 0x8104, 0xff00, 0xf000); - r8168g_phy_param(phydev, 0x8106, 0xff00, 0x6500); - r8168g_phy_param(phydev, 0x80dc, 0xff00, 0xed00); - r8168g_phy_param(phydev, 0x80df, 0x0000, 0x0100); - r8168g_phy_param(phydev, 0x80e1, 0x0100, 0x0000); - - phy_modify_paged(phydev, 0xbf0, 0x13, 0x003f, 0x0038); - r8168g_phy_param(phydev, 0x819f, 0xffff, 0xd0b6); - - phy_write_paged(phydev, 0xbc3, 0x12, 0x5555); - phy_modify_paged(phydev, 0xbf0, 0x15, 0x0e00, 0x0a00); - phy_modify_paged(phydev, 0xa5c, 0x10, 0x0400, 0x0000); - phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800); - - rtl8125_config_eee_phy(tp); - rtl_enable_eee(tp); -} - -static void rtl8125_2_hw_phy_config(struct rtl8169_private *tp) -{ - struct phy_device *phydev = tp->phydev; - int i; - - phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010); - phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x03ff); - phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006); - phy_modify_paged(phydev, 0xac0, 0x14, 0x1100, 0x0000); - phy_modify_paged(phydev, 0xacc, 0x10, 0x0003, 0x0002); - phy_modify_paged(phydev, 0xad4, 0x10, 0x00e7, 0x0044); - phy_modify_paged(phydev, 0xac1, 0x12, 0x0080, 0x0000); - phy_modify_paged(phydev, 0xac8, 0x10, 0x0300, 0x0000); - phy_modify_paged(phydev, 0xac5, 0x17, 0x0007, 0x0002); - phy_write_paged(phydev, 0xad4, 0x16, 0x00a8); - phy_write_paged(phydev, 0xac5, 0x16, 0x01ff); - phy_modify_paged(phydev, 0xac8, 0x15, 0x00f0, 0x0030); - - phy_write(phydev, 0x1f, 0x0b87); - phy_write(phydev, 0x16, 0x80a2); - phy_write(phydev, 0x17, 0x0153); - phy_write(phydev, 0x16, 0x809c); - phy_write(phydev, 0x17, 0x0153); - phy_write(phydev, 0x1f, 0x0000); - - phy_write(phydev, 0x1f, 0x0a43); - phy_write(phydev, 0x13, 0x81B3); - phy_write(phydev, 0x14, 0x0043); - phy_write(phydev, 0x14, 0x00A7); - phy_write(phydev, 0x14, 0x00D6); - phy_write(phydev, 0x14, 0x00EC); - phy_write(phydev, 0x14, 0x00F6); - phy_write(phydev, 0x14, 0x00FB); - phy_write(phydev, 0x14, 0x00FD); - phy_write(phydev, 0x14, 0x00FF); - phy_write(phydev, 0x14, 0x00BB); - phy_write(phydev, 0x14, 0x0058); - phy_write(phydev, 0x14, 0x0029); - phy_write(phydev, 0x14, 0x0013); - phy_write(phydev, 0x14, 0x0009); - phy_write(phydev, 0x14, 0x0004); - phy_write(phydev, 0x14, 0x0002); - for (i = 0; i < 25; i++) - phy_write(phydev, 0x14, 0x0000); - phy_write(phydev, 0x1f, 0x0000); - - r8168g_phy_param(phydev, 0x8257, 0xffff, 0x020F); - r8168g_phy_param(phydev, 0x80ea, 0xffff, 0x7843); - - rtl_apply_firmware(tp); - - phy_modify_paged(phydev, 0xd06, 0x14, 0x0000, 0x2000); - - r8168g_phy_param(phydev, 0x81a2, 0x0000, 0x0100); - - phy_modify_paged(phydev, 0xb54, 0x16, 0xff00, 0xdb00); - phy_modify_paged(phydev, 0xa45, 0x12, 0x0001, 0x0000); - phy_modify_paged(phydev, 0xa5d, 0x12, 0x0000, 0x0020); - phy_modify_paged(phydev, 0xad4, 0x17, 0x0010, 0x0000); - phy_modify_paged(phydev, 0xa86, 0x15, 0x0001, 0x0000); - phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800); - - rtl8125_config_eee_phy(tp); - rtl_enable_eee(tp); -} - -static void rtl_hw_phy_config(struct net_device *dev) -{ - static const rtl_generic_fct phy_configs[] = { - /* PCI devices. */ - [RTL_GIGA_MAC_VER_02] = rtl8169s_hw_phy_config, - [RTL_GIGA_MAC_VER_03] = rtl8169s_hw_phy_config, - [RTL_GIGA_MAC_VER_04] = rtl8169sb_hw_phy_config, - [RTL_GIGA_MAC_VER_05] = rtl8169scd_hw_phy_config, - [RTL_GIGA_MAC_VER_06] = rtl8169sce_hw_phy_config, - /* PCI-E devices. */ - [RTL_GIGA_MAC_VER_07] = rtl8102e_hw_phy_config, - [RTL_GIGA_MAC_VER_08] = rtl8102e_hw_phy_config, - [RTL_GIGA_MAC_VER_09] = rtl8102e_hw_phy_config, - [RTL_GIGA_MAC_VER_10] = NULL, - [RTL_GIGA_MAC_VER_11] = rtl8168bb_hw_phy_config, - [RTL_GIGA_MAC_VER_12] = rtl8168bef_hw_phy_config, - [RTL_GIGA_MAC_VER_13] = NULL, - [RTL_GIGA_MAC_VER_14] = NULL, - [RTL_GIGA_MAC_VER_15] = NULL, - [RTL_GIGA_MAC_VER_16] = NULL, - [RTL_GIGA_MAC_VER_17] = rtl8168bef_hw_phy_config, - [RTL_GIGA_MAC_VER_18] = rtl8168cp_1_hw_phy_config, - [RTL_GIGA_MAC_VER_19] = rtl8168c_1_hw_phy_config, - [RTL_GIGA_MAC_VER_20] = rtl8168c_2_hw_phy_config, - [RTL_GIGA_MAC_VER_21] = rtl8168c_3_hw_phy_config, - [RTL_GIGA_MAC_VER_22] = rtl8168c_3_hw_phy_config, - [RTL_GIGA_MAC_VER_23] = rtl8168cp_2_hw_phy_config, - [RTL_GIGA_MAC_VER_24] = rtl8168cp_2_hw_phy_config, - [RTL_GIGA_MAC_VER_25] = rtl8168d_1_hw_phy_config, - [RTL_GIGA_MAC_VER_26] = rtl8168d_2_hw_phy_config, - [RTL_GIGA_MAC_VER_27] = rtl8168d_3_hw_phy_config, - [RTL_GIGA_MAC_VER_28] = rtl8168d_4_hw_phy_config, - [RTL_GIGA_MAC_VER_29] = rtl8105e_hw_phy_config, - [RTL_GIGA_MAC_VER_30] = rtl8105e_hw_phy_config, - [RTL_GIGA_MAC_VER_31] = NULL, - [RTL_GIGA_MAC_VER_32] = rtl8168e_1_hw_phy_config, - [RTL_GIGA_MAC_VER_33] = rtl8168e_1_hw_phy_config, - [RTL_GIGA_MAC_VER_34] = rtl8168e_2_hw_phy_config, - [RTL_GIGA_MAC_VER_35] = rtl8168f_1_hw_phy_config, - [RTL_GIGA_MAC_VER_36] = rtl8168f_2_hw_phy_config, - [RTL_GIGA_MAC_VER_37] = rtl8402_hw_phy_config, - [RTL_GIGA_MAC_VER_38] = rtl8411_hw_phy_config, - [RTL_GIGA_MAC_VER_39] = rtl8106e_hw_phy_config, - [RTL_GIGA_MAC_VER_40] = rtl8168g_1_hw_phy_config, - [RTL_GIGA_MAC_VER_41] = NULL, - [RTL_GIGA_MAC_VER_42] = rtl8168g_2_hw_phy_config, - [RTL_GIGA_MAC_VER_43] = rtl8168g_2_hw_phy_config, - [RTL_GIGA_MAC_VER_44] = rtl8168g_2_hw_phy_config, - [RTL_GIGA_MAC_VER_45] = rtl8168h_1_hw_phy_config, - [RTL_GIGA_MAC_VER_46] = rtl8168h_2_hw_phy_config, - [RTL_GIGA_MAC_VER_47] = rtl8168h_1_hw_phy_config, - [RTL_GIGA_MAC_VER_48] = rtl8168h_2_hw_phy_config, - [RTL_GIGA_MAC_VER_49] = rtl8168ep_1_hw_phy_config, - [RTL_GIGA_MAC_VER_50] = rtl8168ep_2_hw_phy_config, - [RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config, - [RTL_GIGA_MAC_VER_52] = rtl8117_hw_phy_config, - [RTL_GIGA_MAC_VER_60] = rtl8125_1_hw_phy_config, - [RTL_GIGA_MAC_VER_61] = rtl8125_2_hw_phy_config, - }; - struct rtl8169_private *tp = netdev_priv(dev); - - if (phy_configs[tp->mac_version]) - phy_configs[tp->mac_version](tp); + return ioffset; } static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag) @@ -3617,21 +2233,28 @@ static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag) schedule_work(&tp->wk.work); } -static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp) +static void rtl8169_init_phy(struct rtl8169_private *tp) { - rtl_hw_phy_config(dev); + r8169_hw_phy_config(tp, tp->phydev, tp->mac_version); if (tp->mac_version <= RTL_GIGA_MAC_VER_06) { pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40); pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08); - netif_dbg(tp, drv, dev, - "Set MAC Reg C+CR Offset 0x82h = 0x01h\n"); + /* set undocumented MAC Reg C+CR Offset 0x82h */ RTL_W8(tp, 0x82, 0x01); } + if (tp->mac_version == RTL_GIGA_MAC_VER_05 && + tp->pci_dev->subsystem_vendor == PCI_VENDOR_ID_GIGABYTE && + tp->pci_dev->subsystem_device == 0xe000) + phy_write_paged(tp->phydev, 0x0001, 0x10, 0xf01b); + /* We may have called phy_speed_down before */ phy_speed_up(tp->phydev); + if (rtl_supports_eee(tp)) + rtl_enable_eee(tp); + genphy_soft_reset(tp->phydev); } @@ -4710,9 +3333,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) rtl_pcie_state_l2l3_disable(tp); - rtl_writephy(tp, 0x1f, 0x0c42); - rg_saw_cnt = (rtl_readphy(tp, 0x13) & 0x3fff); - rtl_writephy(tp, 0x1f, 0x0000); + rg_saw_cnt = phy_read_paged(tp->phydev, 0x0c42, 0x13) & 0x3fff; if (rg_saw_cnt > 0) { u16 sw_cnt_1ms_ini; @@ -4887,7 +3508,7 @@ static void rtl_hw_start_8117(struct rtl8169_private *tp) r8168_mac_ocp_write(tp, 0xc09e, 0x0000); /* firmware is for MAC only */ - rtl_apply_firmware(tp); + r8169_apply_firmware(tp); rtl_hw_aspm_clkreq_enable(tp, true); } @@ -4991,6 +3612,9 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp) rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000); rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0e00, 0xff00); + /* disable EEE */ + rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000); + rtl_pcie_state_l2l3_disable(tp); } @@ -5005,6 +3629,11 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp) RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET); RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN); + rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000); + + /* disable EEE */ + rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000); + rtl_pcie_state_l2l3_disable(tp); rtl_hw_aspm_clkreq_enable(tp, true); } @@ -5222,11 +3851,8 @@ static void rtl_hw_start_8169(struct rtl8169_private *tp) tp->cp_cmd |= PCIMulRW; if (tp->mac_version == RTL_GIGA_MAC_VER_02 || - tp->mac_version == RTL_GIGA_MAC_VER_03) { - netif_dbg(tp, drv, tp->dev, - "Set MAC Reg C+CR Offset 0xe0. Bit 3 and Bit 14 MUST be 1\n"); - tp->cp_cmd |= (1 << 14); - } + tp->mac_version == RTL_GIGA_MAC_VER_03) + tp->cp_cmd |= EnAnaPLL; RTL_W16(tp, CPlusCmd, tp->cp_cmd); @@ -5435,7 +4061,7 @@ static void rtl_reset_work(struct rtl8169_private *tp) netif_wake_queue(dev); } -static void rtl8169_tx_timeout(struct net_device *dev) +static void rtl8169_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct rtl8169_private *tp = netdev_priv(dev); @@ -6240,7 +4866,7 @@ static int rtl_open(struct net_device *dev) napi_enable(&tp->napi); - rtl8169_init_phy(dev, tp); + rtl8169_init_phy(tp); rtl_pll_power_up(tp); @@ -6371,7 +4997,7 @@ static void __rtl8169_resume(struct net_device *dev) netif_device_attach(dev); rtl_pll_power_up(tp); - rtl8169_init_phy(dev, tp); + rtl8169_init_phy(tp); phy_start(tp->phydev); @@ -6825,6 +5451,15 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) int chipset, region; int jumbo_max, rc; + /* Some tools for creating an initramfs don't consider softdeps, then + * r8169.ko may be in initramfs, but realtek.ko not. Then the generic + * PHY driver is used that doesn't work with most chip versions. + */ + if (!driver_find("RTL8201CP Ethernet", &mdio_bus_type)) { + dev_err(&pdev->dev, "realtek.ko not loaded, maybe it needs to be added to initramfs?\n"); + return -ENOENT; + } + dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp)); if (!dev) return -ENOMEM; diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c new file mode 100644 index 000000000000..e367e77c773b --- /dev/null +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -0,0 +1,1307 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * r8169_phy_config.c: RealTek 8169/8168/8101 ethernet driver. + * + * Copyright (c) 2002 ShuChen <shuchen@realtek.com.tw> + * Copyright (c) 2003 - 2007 Francois Romieu <romieu@fr.zoreil.com> + * Copyright (c) a lot of people too. Please respect their work. + * + * See MAINTAINERS file for support contact information. + */ + +#include <linux/delay.h> +#include <linux/phy.h> + +#include "r8169.h" + +typedef void (*rtl_phy_cfg_fct)(struct rtl8169_private *tp, + struct phy_device *phydev); + +static void r8168d_modify_extpage(struct phy_device *phydev, int extpage, + int reg, u16 mask, u16 val) +{ + int oldpage = phy_select_page(phydev, 0x0007); + + __phy_write(phydev, 0x1e, extpage); + __phy_modify(phydev, reg, mask, val); + + phy_restore_page(phydev, oldpage, 0); +} + +static void r8168d_phy_param(struct phy_device *phydev, u16 parm, + u16 mask, u16 val) +{ + int oldpage = phy_select_page(phydev, 0x0005); + + __phy_write(phydev, 0x05, parm); + __phy_modify(phydev, 0x06, mask, val); + + phy_restore_page(phydev, oldpage, 0); +} + +static void r8168g_phy_param(struct phy_device *phydev, u16 parm, + u16 mask, u16 val) +{ + int oldpage = phy_select_page(phydev, 0x0a43); + + __phy_write(phydev, 0x13, parm); + __phy_modify(phydev, 0x14, mask, val); + + phy_restore_page(phydev, oldpage, 0); +} + +struct phy_reg { + u16 reg; + u16 val; +}; + +static void __rtl_writephy_batch(struct phy_device *phydev, + const struct phy_reg *regs, int len) +{ + phy_lock_mdio_bus(phydev); + + while (len-- > 0) { + __phy_write(phydev, regs->reg, regs->val); + regs++; + } + + phy_unlock_mdio_bus(phydev); +} + +#define rtl_writephy_batch(p, a) __rtl_writephy_batch(p, a, ARRAY_SIZE(a)) + +static void rtl8168f_config_eee_phy(struct phy_device *phydev) +{ + r8168d_modify_extpage(phydev, 0x0020, 0x15, 0, BIT(8)); + r8168d_phy_param(phydev, 0x8b85, 0, BIT(13)); +} + +static void rtl8168g_config_eee_phy(struct phy_device *phydev) +{ + phy_modify_paged(phydev, 0x0a43, 0x11, 0, BIT(4)); +} + +static void rtl8168h_config_eee_phy(struct phy_device *phydev) +{ + rtl8168g_config_eee_phy(phydev); + + phy_modify_paged(phydev, 0xa4a, 0x11, 0x0000, 0x0200); + phy_modify_paged(phydev, 0xa42, 0x14, 0x0000, 0x0080); +} + +static void rtl8125_config_eee_phy(struct phy_device *phydev) +{ + rtl8168h_config_eee_phy(phydev); + + phy_modify_paged(phydev, 0xa6d, 0x12, 0x0001, 0x0000); + phy_modify_paged(phydev, 0xa6d, 0x14, 0x0010, 0x0000); +} + +static void rtl8169s_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + static const struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0001 }, + { 0x06, 0x006e }, + { 0x08, 0x0708 }, + { 0x15, 0x4000 }, + { 0x18, 0x65c7 }, + + { 0x1f, 0x0001 }, + { 0x03, 0x00a1 }, + { 0x02, 0x0008 }, + { 0x01, 0x0120 }, + { 0x00, 0x1000 }, + { 0x04, 0x0800 }, + { 0x04, 0x0000 }, + + { 0x03, 0xff41 }, + { 0x02, 0xdf60 }, + { 0x01, 0x0140 }, + { 0x00, 0x0077 }, + { 0x04, 0x7800 }, + { 0x04, 0x7000 }, + + { 0x03, 0x802f }, + { 0x02, 0x4f02 }, + { 0x01, 0x0409 }, + { 0x00, 0xf0f9 }, + { 0x04, 0x9800 }, + { 0x04, 0x9000 }, + + { 0x03, 0xdf01 }, + { 0x02, 0xdf20 }, + { 0x01, 0xff95 }, + { 0x00, 0xba00 }, + { 0x04, 0xa800 }, + { 0x04, 0xa000 }, + + { 0x03, 0xff41 }, + { 0x02, 0xdf20 }, + { 0x01, 0x0140 }, + { 0x00, 0x00bb }, + { 0x04, 0xb800 }, + { 0x04, 0xb000 }, + + { 0x03, 0xdf41 }, + { 0x02, 0xdc60 }, + { 0x01, 0x6340 }, + { 0x00, 0x007d }, + { 0x04, 0xd800 }, + { 0x04, 0xd000 }, + + { 0x03, 0xdf01 }, + { 0x02, 0xdf20 }, + { 0x01, 0x100a }, + { 0x00, 0xa0ff }, + { 0x04, 0xf800 }, + { 0x04, 0xf000 }, + + { 0x1f, 0x0000 }, + { 0x0b, 0x0000 }, + { 0x00, 0x9200 } + }; + + rtl_writephy_batch(phydev, phy_reg_init); +} + +static void rtl8169sb_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + phy_write_paged(phydev, 0x0002, 0x01, 0x90d0); +} + +static void rtl8169scd_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + static const struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0001 }, + { 0x04, 0x0000 }, + { 0x03, 0x00a1 }, + { 0x02, 0x0008 }, + { 0x01, 0x0120 }, + { 0x00, 0x1000 }, + { 0x04, 0x0800 }, + { 0x04, 0x9000 }, + { 0x03, 0x802f }, + { 0x02, 0x4f02 }, + { 0x01, 0x0409 }, + { 0x00, 0xf099 }, + { 0x04, 0x9800 }, + { 0x04, 0xa000 }, + { 0x03, 0xdf01 }, + { 0x02, 0xdf20 }, + { 0x01, 0xff95 }, + { 0x00, 0xba00 }, + { 0x04, 0xa800 }, + { 0x04, 0xf000 }, + { 0x03, 0xdf01 }, + { 0x02, 0xdf20 }, + { 0x01, 0x101a }, + { 0x00, 0xa0ff }, + { 0x04, 0xf800 }, + { 0x04, 0x0000 }, + { 0x1f, 0x0000 }, + + { 0x1f, 0x0001 }, + { 0x10, 0xf41b }, + { 0x14, 0xfb54 }, + { 0x18, 0xf5c7 }, + { 0x1f, 0x0000 }, + + { 0x1f, 0x0001 }, + { 0x17, 0x0cc0 }, + { 0x1f, 0x0000 } + }; + + rtl_writephy_batch(phydev, phy_reg_init); +} + +static void rtl8169sce_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + static const struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0001 }, + { 0x04, 0x0000 }, + { 0x03, 0x00a1 }, + { 0x02, 0x0008 }, + { 0x01, 0x0120 }, + { 0x00, 0x1000 }, + { 0x04, 0x0800 }, + { 0x04, 0x9000 }, + { 0x03, 0x802f }, + { 0x02, 0x4f02 }, + { 0x01, 0x0409 }, + { 0x00, 0xf099 }, + { 0x04, 0x9800 }, + { 0x04, 0xa000 }, + { 0x03, 0xdf01 }, + { 0x02, 0xdf20 }, + { 0x01, 0xff95 }, + { 0x00, 0xba00 }, + { 0x04, 0xa800 }, + { 0x04, 0xf000 }, + { 0x03, 0xdf01 }, + { 0x02, 0xdf20 }, + { 0x01, 0x101a }, + { 0x00, 0xa0ff }, + { 0x04, 0xf800 }, + { 0x04, 0x0000 }, + { 0x1f, 0x0000 }, + + { 0x1f, 0x0001 }, + { 0x0b, 0x8480 }, + { 0x1f, 0x0000 }, + + { 0x1f, 0x0001 }, + { 0x18, 0x67c7 }, + { 0x04, 0x2000 }, + { 0x03, 0x002f }, + { 0x02, 0x4360 }, + { 0x01, 0x0109 }, + { 0x00, 0x3022 }, + { 0x04, 0x2800 }, + { 0x1f, 0x0000 }, + + { 0x1f, 0x0001 }, + { 0x17, 0x0cc0 }, + { 0x1f, 0x0000 } + }; + + rtl_writephy_batch(phydev, phy_reg_init); +} + +static void rtl8168bb_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + phy_write(phydev, 0x1f, 0x0001); + phy_set_bits(phydev, 0x16, BIT(0)); + phy_write(phydev, 0x10, 0xf41b); + phy_write(phydev, 0x1f, 0x0000); +} + +static void rtl8168bef_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + phy_write_paged(phydev, 0x0001, 0x10, 0xf41b); +} + +static void rtl8168cp_1_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + phy_write(phydev, 0x1d, 0x0f00); + phy_write_paged(phydev, 0x0002, 0x0c, 0x1ec8); +} + +static void rtl8168cp_2_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + phy_set_bits(phydev, 0x14, BIT(5)); + phy_set_bits(phydev, 0x0d, BIT(5)); + phy_write_paged(phydev, 0x0001, 0x1d, 0x3d98); +} + +static void rtl8168c_1_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + static const struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0001 }, + { 0x12, 0x2300 }, + { 0x1f, 0x0002 }, + { 0x00, 0x88d4 }, + { 0x01, 0x82b1 }, + { 0x03, 0x7002 }, + { 0x08, 0x9e30 }, + { 0x09, 0x01f0 }, + { 0x0a, 0x5500 }, + { 0x0c, 0x00c8 }, + { 0x1f, 0x0003 }, + { 0x12, 0xc096 }, + { 0x16, 0x000a }, + { 0x1f, 0x0000 }, + { 0x1f, 0x0000 }, + { 0x09, 0x2000 }, + { 0x09, 0x0000 } + }; + + rtl_writephy_batch(phydev, phy_reg_init); + + phy_set_bits(phydev, 0x14, BIT(5)); + phy_set_bits(phydev, 0x0d, BIT(5)); +} + +static void rtl8168c_2_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + static const struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0001 }, + { 0x12, 0x2300 }, + { 0x03, 0x802f }, + { 0x02, 0x4f02 }, + { 0x01, 0x0409 }, + { 0x00, 0xf099 }, + { 0x04, 0x9800 }, + { 0x04, 0x9000 }, + { 0x1d, 0x3d98 }, + { 0x1f, 0x0002 }, + { 0x0c, 0x7eb8 }, + { 0x06, 0x0761 }, + { 0x1f, 0x0003 }, + { 0x16, 0x0f0a }, + { 0x1f, 0x0000 } + }; + + rtl_writephy_batch(phydev, phy_reg_init); + + phy_set_bits(phydev, 0x16, BIT(0)); + phy_set_bits(phydev, 0x14, BIT(5)); + phy_set_bits(phydev, 0x0d, BIT(5)); +} + +static void rtl8168c_3_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + static const struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0001 }, + { 0x12, 0x2300 }, + { 0x1d, 0x3d98 }, + { 0x1f, 0x0002 }, + { 0x0c, 0x7eb8 }, + { 0x06, 0x5461 }, + { 0x1f, 0x0003 }, + { 0x16, 0x0f0a }, + { 0x1f, 0x0000 } + }; + + rtl_writephy_batch(phydev, phy_reg_init); + + phy_set_bits(phydev, 0x16, BIT(0)); + phy_set_bits(phydev, 0x14, BIT(5)); + phy_set_bits(phydev, 0x0d, BIT(5)); +} + +static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = { + /* Channel Estimation */ + { 0x1f, 0x0001 }, + { 0x06, 0x4064 }, + { 0x07, 0x2863 }, + { 0x08, 0x059c }, + { 0x09, 0x26b4 }, + { 0x0a, 0x6a19 }, + { 0x0b, 0xdcc8 }, + { 0x10, 0xf06d }, + { 0x14, 0x7f68 }, + { 0x18, 0x7fd9 }, + { 0x1c, 0xf0ff }, + { 0x1d, 0x3d9c }, + { 0x1f, 0x0003 }, + { 0x12, 0xf49f }, + { 0x13, 0x070b }, + { 0x1a, 0x05ad }, + { 0x14, 0x94c0 }, + + /* + * Tx Error Issue + * Enhance line driver power + */ + { 0x1f, 0x0002 }, + { 0x06, 0x5561 }, + { 0x1f, 0x0005 }, + { 0x05, 0x8332 }, + { 0x06, 0x5561 }, + + /* + * Can not link to 1Gbps with bad cable + * Decrease SNR threshold form 21.07dB to 19.04dB + */ + { 0x1f, 0x0001 }, + { 0x17, 0x0cc0 }, + + { 0x1f, 0x0000 }, + { 0x0d, 0xf880 } +}; + +static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = { + { 0x1f, 0x0002 }, + { 0x05, 0x669a }, + { 0x1f, 0x0005 }, + { 0x05, 0x8330 }, + { 0x06, 0x669a }, + { 0x1f, 0x0002 } +}; + +static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp, + struct phy_device *phydev, + u16 val) +{ + u16 reg_val; + + phy_write(phydev, 0x1f, 0x0005); + phy_write(phydev, 0x05, 0x001b); + reg_val = phy_read(phydev, 0x06); + phy_write(phydev, 0x1f, 0x0000); + + if (reg_val != val) + phydev_warn(phydev, "chipset not ready for firmware\n"); + else + r8169_apply_firmware(tp); +} + +static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_0); + + /* + * Rx Error Issue + * Fine Tune Switching regulator parameter + */ + phy_write(phydev, 0x1f, 0x0002); + phy_modify(phydev, 0x0b, 0x00ef, 0x0010); + phy_modify(phydev, 0x0c, 0x5d00, 0xa200); + + if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) { + int val; + + rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1); + + val = phy_read(phydev, 0x0d); + + if ((val & 0x00ff) != 0x006c) { + static const u32 set[] = { + 0x0065, 0x0066, 0x0067, 0x0068, + 0x0069, 0x006a, 0x006b, 0x006c + }; + int i; + + phy_write(phydev, 0x1f, 0x0002); + + val &= 0xff00; + for (i = 0; i < ARRAY_SIZE(set); i++) + phy_write(phydev, 0x0d, val | set[i]); + } + } else { + phy_write_paged(phydev, 0x0002, 0x05, 0x6662); + r8168d_phy_param(phydev, 0x8330, 0xffff, 0x6662); + } + + /* RSET couple improve */ + phy_write(phydev, 0x1f, 0x0002); + phy_set_bits(phydev, 0x0d, 0x0300); + phy_set_bits(phydev, 0x0f, 0x0010); + + /* Fine tune PLL performance */ + phy_write(phydev, 0x1f, 0x0002); + phy_modify(phydev, 0x02, 0x0600, 0x0100); + phy_clear_bits(phydev, 0x03, 0xe000); + phy_write(phydev, 0x1f, 0x0000); + + rtl8168d_apply_firmware_cond(tp, phydev, 0xbf00); +} + +static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_0); + + if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) { + int val; + + rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1); + + val = phy_read(phydev, 0x0d); + if ((val & 0x00ff) != 0x006c) { + static const u32 set[] = { + 0x0065, 0x0066, 0x0067, 0x0068, + 0x0069, 0x006a, 0x006b, 0x006c + }; + int i; + + phy_write(phydev, 0x1f, 0x0002); + + val &= 0xff00; + for (i = 0; i < ARRAY_SIZE(set); i++) + phy_write(phydev, 0x0d, val | set[i]); + } + } else { + phy_write_paged(phydev, 0x0002, 0x05, 0x2642); + r8168d_phy_param(phydev, 0x8330, 0xffff, 0x2642); + } + + /* Fine tune PLL performance */ + phy_write(phydev, 0x1f, 0x0002); + phy_modify(phydev, 0x02, 0x0600, 0x0100); + phy_clear_bits(phydev, 0x03, 0xe000); + phy_write(phydev, 0x1f, 0x0000); + + /* Switching regulator Slew rate */ + phy_modify_paged(phydev, 0x0002, 0x0f, 0x0000, 0x0017); + + rtl8168d_apply_firmware_cond(tp, phydev, 0xb300); +} + +static void rtl8168d_3_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + static const struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0002 }, + { 0x10, 0x0008 }, + { 0x0d, 0x006c }, + + { 0x1f, 0x0000 }, + { 0x0d, 0xf880 }, + + { 0x1f, 0x0001 }, + { 0x17, 0x0cc0 }, + + { 0x1f, 0x0001 }, + { 0x0b, 0xa4d8 }, + { 0x09, 0x281c }, + { 0x07, 0x2883 }, + { 0x0a, 0x6b35 }, + { 0x1d, 0x3da4 }, + { 0x1c, 0xeffd }, + { 0x14, 0x7f52 }, + { 0x18, 0x7fc6 }, + { 0x08, 0x0601 }, + { 0x06, 0x4063 }, + { 0x10, 0xf074 }, + { 0x1f, 0x0003 }, + { 0x13, 0x0789 }, + { 0x12, 0xf4bd }, + { 0x1a, 0x04fd }, + { 0x14, 0x84b0 }, + { 0x1f, 0x0000 }, + { 0x00, 0x9200 }, + + { 0x1f, 0x0005 }, + { 0x01, 0x0340 }, + { 0x1f, 0x0001 }, + { 0x04, 0x4000 }, + { 0x03, 0x1d21 }, + { 0x02, 0x0c32 }, + { 0x01, 0x0200 }, + { 0x00, 0x5554 }, + { 0x04, 0x4800 }, + { 0x04, 0x4000 }, + { 0x04, 0xf000 }, + { 0x03, 0xdf01 }, + { 0x02, 0xdf20 }, + { 0x01, 0x101a }, + { 0x00, 0xa0ff }, + { 0x04, 0xf800 }, + { 0x04, 0xf000 }, + { 0x1f, 0x0000 }, + }; + + rtl_writephy_batch(phydev, phy_reg_init); + r8168d_modify_extpage(phydev, 0x0023, 0x16, 0xffff, 0x0000); +} + +static void rtl8168d_4_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + phy_write_paged(phydev, 0x0001, 0x17, 0x0cc0); + r8168d_modify_extpage(phydev, 0x002d, 0x18, 0xffff, 0x0040); + phy_set_bits(phydev, 0x0d, BIT(5)); +} + +static void rtl8168e_1_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + static const struct phy_reg phy_reg_init[] = { + /* Channel estimation fine tune */ + { 0x1f, 0x0001 }, + { 0x0b, 0x6c20 }, + { 0x07, 0x2872 }, + { 0x1c, 0xefff }, + { 0x1f, 0x0003 }, + { 0x14, 0x6420 }, + { 0x1f, 0x0000 }, + }; + + r8169_apply_firmware(tp); + + /* Enable Delay cap */ + r8168d_phy_param(phydev, 0x8b80, 0xffff, 0xc896); + + rtl_writephy_batch(phydev, phy_reg_init); + + /* Update PFM & 10M TX idle timer */ + r8168d_modify_extpage(phydev, 0x002f, 0x15, 0xffff, 0x1919); + + r8168d_modify_extpage(phydev, 0x00ac, 0x18, 0xffff, 0x0006); + + /* DCO enable for 10M IDLE Power */ + r8168d_modify_extpage(phydev, 0x0023, 0x17, 0x0000, 0x0006); + + /* For impedance matching */ + phy_modify_paged(phydev, 0x0002, 0x08, 0x7f00, 0x8000); + + /* PHY auto speed down */ + r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0050); + phy_set_bits(phydev, 0x14, BIT(15)); + + r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001); + r8168d_phy_param(phydev, 0x8b85, 0x2000, 0x0000); + + r8168d_modify_extpage(phydev, 0x0020, 0x15, 0x1100, 0x0000); + phy_write_paged(phydev, 0x0006, 0x00, 0x5a00); + + phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, 0x0000); +} + +static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + r8169_apply_firmware(tp); + + /* Enable Delay cap */ + r8168d_modify_extpage(phydev, 0x00ac, 0x18, 0xffff, 0x0006); + + /* Channel estimation fine tune */ + phy_write_paged(phydev, 0x0003, 0x09, 0xa20f); + + /* Green Setting */ + r8168d_phy_param(phydev, 0x8b5b, 0xffff, 0x9222); + r8168d_phy_param(phydev, 0x8b6d, 0xffff, 0x8000); + r8168d_phy_param(phydev, 0x8b76, 0xffff, 0x8000); + + /* For 4-corner performance improve */ + phy_write(phydev, 0x1f, 0x0005); + phy_write(phydev, 0x05, 0x8b80); + phy_set_bits(phydev, 0x17, 0x0006); + phy_write(phydev, 0x1f, 0x0000); + + /* PHY auto speed down */ + r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0010); + phy_set_bits(phydev, 0x14, BIT(15)); + + /* improve 10M EEE waveform */ + r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001); + + /* Improve 2-pair detection performance */ + r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000); + + rtl8168f_config_eee_phy(phydev); + + /* Green feature */ + phy_write(phydev, 0x1f, 0x0003); + phy_set_bits(phydev, 0x19, BIT(0)); + phy_set_bits(phydev, 0x10, BIT(10)); + phy_write(phydev, 0x1f, 0x0000); + phy_modify_paged(phydev, 0x0005, 0x01, 0, BIT(8)); +} + +static void rtl8168f_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + /* For 4-corner performance improve */ + r8168d_phy_param(phydev, 0x8b80, 0x0000, 0x0006); + + /* PHY auto speed down */ + r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0010); + phy_set_bits(phydev, 0x14, BIT(15)); + + /* Improve 10M EEE waveform */ + r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001); + + rtl8168f_config_eee_phy(phydev); +} + +static void rtl8168f_1_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + r8169_apply_firmware(tp); + + /* Channel estimation fine tune */ + phy_write_paged(phydev, 0x0003, 0x09, 0xa20f); + + /* Modify green table for giga & fnet */ + r8168d_phy_param(phydev, 0x8b55, 0xffff, 0x0000); + r8168d_phy_param(phydev, 0x8b5e, 0xffff, 0x0000); + r8168d_phy_param(phydev, 0x8b67, 0xffff, 0x0000); + r8168d_phy_param(phydev, 0x8b70, 0xffff, 0x0000); + r8168d_modify_extpage(phydev, 0x0078, 0x17, 0xffff, 0x0000); + r8168d_modify_extpage(phydev, 0x0078, 0x19, 0xffff, 0x00fb); + + /* Modify green table for 10M */ + r8168d_phy_param(phydev, 0x8b79, 0xffff, 0xaa00); + + /* Disable hiimpedance detection (RTCT) */ + phy_write_paged(phydev, 0x0003, 0x01, 0x328a); + + rtl8168f_hw_phy_config(tp, phydev); + + /* Improve 2-pair detection performance */ + r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000); +} + +static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + r8169_apply_firmware(tp); + + rtl8168f_hw_phy_config(tp, phydev); +} + +static void rtl8411_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + r8169_apply_firmware(tp); + + rtl8168f_hw_phy_config(tp, phydev); + + /* Improve 2-pair detection performance */ + r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000); + + /* Channel estimation fine tune */ + phy_write_paged(phydev, 0x0003, 0x09, 0xa20f); + + /* Modify green table for giga & fnet */ + r8168d_phy_param(phydev, 0x8b55, 0xffff, 0x0000); + r8168d_phy_param(phydev, 0x8b5e, 0xffff, 0x0000); + r8168d_phy_param(phydev, 0x8b67, 0xffff, 0x0000); + r8168d_phy_param(phydev, 0x8b70, 0xffff, 0x0000); + r8168d_modify_extpage(phydev, 0x0078, 0x17, 0xffff, 0x0000); + r8168d_modify_extpage(phydev, 0x0078, 0x19, 0xffff, 0x00aa); + + /* Modify green table for 10M */ + r8168d_phy_param(phydev, 0x8b79, 0xffff, 0xaa00); + + /* Disable hiimpedance detection (RTCT) */ + phy_write_paged(phydev, 0x0003, 0x01, 0x328a); + + /* Modify green table for giga */ + r8168d_phy_param(phydev, 0x8b54, 0x0800, 0x0000); + r8168d_phy_param(phydev, 0x8b5d, 0x0800, 0x0000); + r8168d_phy_param(phydev, 0x8a7c, 0x0100, 0x0000); + r8168d_phy_param(phydev, 0x8a7f, 0x0000, 0x0100); + r8168d_phy_param(phydev, 0x8a82, 0x0100, 0x0000); + r8168d_phy_param(phydev, 0x8a85, 0x0100, 0x0000); + r8168d_phy_param(phydev, 0x8a88, 0x0100, 0x0000); + + /* uc same-seed solution */ + r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x8000); + + /* Green feature */ + phy_write(phydev, 0x1f, 0x0003); + phy_clear_bits(phydev, 0x19, BIT(0)); + phy_clear_bits(phydev, 0x10, BIT(10)); + phy_write(phydev, 0x1f, 0x0000); +} + +static void rtl8168g_disable_aldps(struct phy_device *phydev) +{ + phy_modify_paged(phydev, 0x0a43, 0x10, BIT(2), 0); +} + +static void rtl8168g_phy_adjust_10m_aldps(struct phy_device *phydev) +{ + phy_modify_paged(phydev, 0x0bcc, 0x14, BIT(8), 0); + phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(7) | BIT(6)); + r8168g_phy_param(phydev, 0x8084, 0x6000, 0x0000); + phy_modify_paged(phydev, 0x0a43, 0x10, 0x0000, 0x1003); +} + +static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + int ret; + + r8169_apply_firmware(tp); + + ret = phy_read_paged(phydev, 0x0a46, 0x10); + if (ret & BIT(8)) + phy_modify_paged(phydev, 0x0bcc, 0x12, BIT(15), 0); + else + phy_modify_paged(phydev, 0x0bcc, 0x12, 0, BIT(15)); + + ret = phy_read_paged(phydev, 0x0a46, 0x13); + if (ret & BIT(8)) + phy_modify_paged(phydev, 0x0c41, 0x15, 0, BIT(1)); + else + phy_modify_paged(phydev, 0x0c41, 0x15, BIT(1), 0); + + /* Enable PHY auto speed down */ + phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2)); + + rtl8168g_phy_adjust_10m_aldps(phydev); + + /* EEE auto-fallback function */ + phy_modify_paged(phydev, 0x0a4b, 0x11, 0, BIT(2)); + + /* Enable UC LPF tune function */ + r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000); + + phy_modify_paged(phydev, 0x0c42, 0x11, BIT(13), BIT(14)); + + /* Improve SWR Efficiency */ + phy_write(phydev, 0x1f, 0x0bcd); + phy_write(phydev, 0x14, 0x5065); + phy_write(phydev, 0x14, 0xd065); + phy_write(phydev, 0x1f, 0x0bc8); + phy_write(phydev, 0x11, 0x5655); + phy_write(phydev, 0x1f, 0x0bcd); + phy_write(phydev, 0x14, 0x1065); + phy_write(phydev, 0x14, 0x9065); + phy_write(phydev, 0x14, 0x1065); + phy_write(phydev, 0x1f, 0x0000); + + rtl8168g_disable_aldps(phydev); + rtl8168g_config_eee_phy(phydev); +} + +static void rtl8168g_2_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + r8169_apply_firmware(tp); + rtl8168g_config_eee_phy(phydev); +} + +static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + u16 dout_tapbin; + u32 data; + + r8169_apply_firmware(tp); + + /* CHN EST parameters adjust - giga master */ + r8168g_phy_param(phydev, 0x809b, 0xf800, 0x8000); + r8168g_phy_param(phydev, 0x80a2, 0xff00, 0x8000); + r8168g_phy_param(phydev, 0x80a4, 0xff00, 0x8500); + r8168g_phy_param(phydev, 0x809c, 0xff00, 0xbd00); + + /* CHN EST parameters adjust - giga slave */ + r8168g_phy_param(phydev, 0x80ad, 0xf800, 0x7000); + r8168g_phy_param(phydev, 0x80b4, 0xff00, 0x5000); + r8168g_phy_param(phydev, 0x80ac, 0xff00, 0x4000); + + /* CHN EST parameters adjust - fnet */ + r8168g_phy_param(phydev, 0x808e, 0xff00, 0x1200); + r8168g_phy_param(phydev, 0x8090, 0xff00, 0xe500); + r8168g_phy_param(phydev, 0x8092, 0xff00, 0x9f00); + + /* enable R-tune & PGA-retune function */ + dout_tapbin = 0; + data = phy_read_paged(phydev, 0x0a46, 0x13); + data &= 3; + data <<= 2; + dout_tapbin |= data; + data = phy_read_paged(phydev, 0x0a46, 0x12); + data &= 0xc000; + data >>= 14; + dout_tapbin |= data; + dout_tapbin = ~(dout_tapbin ^ 0x08); + dout_tapbin <<= 12; + dout_tapbin &= 0xf000; + + r8168g_phy_param(phydev, 0x827a, 0xf000, dout_tapbin); + r8168g_phy_param(phydev, 0x827b, 0xf000, dout_tapbin); + r8168g_phy_param(phydev, 0x827c, 0xf000, dout_tapbin); + r8168g_phy_param(phydev, 0x827d, 0xf000, dout_tapbin); + r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800); + phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002); + + /* enable GPHY 10M */ + phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(11)); + + /* SAR ADC performance */ + phy_modify_paged(phydev, 0x0bca, 0x17, BIT(12) | BIT(13), BIT(14)); + + r8168g_phy_param(phydev, 0x803f, 0x3000, 0x0000); + r8168g_phy_param(phydev, 0x8047, 0x3000, 0x0000); + r8168g_phy_param(phydev, 0x804f, 0x3000, 0x0000); + r8168g_phy_param(phydev, 0x8057, 0x3000, 0x0000); + r8168g_phy_param(phydev, 0x805f, 0x3000, 0x0000); + r8168g_phy_param(phydev, 0x8067, 0x3000, 0x0000); + r8168g_phy_param(phydev, 0x806f, 0x3000, 0x0000); + + /* disable phy pfm mode */ + phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0); + + rtl8168g_disable_aldps(phydev); + rtl8168h_config_eee_phy(phydev); +} + +static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + u16 ioffset, rlen; + u32 data; + + r8169_apply_firmware(tp); + + /* CHIN EST parameter update */ + r8168g_phy_param(phydev, 0x808a, 0x003f, 0x000a); + + /* enable R-tune & PGA-retune function */ + r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800); + phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002); + + /* enable GPHY 10M */ + phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(11)); + + ioffset = rtl8168h_2_get_adc_bias_ioffset(tp); + if (ioffset != 0xffff) + phy_write_paged(phydev, 0x0bcf, 0x16, ioffset); + + /* Modify rlen (TX LPF corner frequency) level */ + data = phy_read_paged(phydev, 0x0bcd, 0x16); + data &= 0x000f; + rlen = 0; + if (data > 3) + rlen = data - 3; + data = rlen | (rlen << 4) | (rlen << 8) | (rlen << 12); + phy_write_paged(phydev, 0x0bcd, 0x17, data); + + /* disable phy pfm mode */ + phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0); + + rtl8168g_disable_aldps(phydev); + rtl8168g_config_eee_phy(phydev); +} + +static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + /* Enable PHY auto speed down */ + phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2)); + + rtl8168g_phy_adjust_10m_aldps(phydev); + + /* Enable EEE auto-fallback function */ + phy_modify_paged(phydev, 0x0a4b, 0x11, 0, BIT(2)); + + /* Enable UC LPF tune function */ + r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000); + + /* set rg_sel_sdm_rate */ + phy_modify_paged(phydev, 0x0c42, 0x11, BIT(13), BIT(14)); + + rtl8168g_disable_aldps(phydev); + rtl8168g_config_eee_phy(phydev); +} + +static void rtl8168ep_2_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + rtl8168g_phy_adjust_10m_aldps(phydev); + + /* Enable UC LPF tune function */ + r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000); + + /* Set rg_sel_sdm_rate */ + phy_modify_paged(phydev, 0x0c42, 0x11, BIT(13), BIT(14)); + + /* Channel estimation parameters */ + r8168g_phy_param(phydev, 0x80f3, 0xff00, 0x8b00); + r8168g_phy_param(phydev, 0x80f0, 0xff00, 0x3a00); + r8168g_phy_param(phydev, 0x80ef, 0xff00, 0x0500); + r8168g_phy_param(phydev, 0x80f6, 0xff00, 0x6e00); + r8168g_phy_param(phydev, 0x80ec, 0xff00, 0x6800); + r8168g_phy_param(phydev, 0x80ed, 0xff00, 0x7c00); + r8168g_phy_param(phydev, 0x80f2, 0xff00, 0xf400); + r8168g_phy_param(phydev, 0x80f4, 0xff00, 0x8500); + r8168g_phy_param(phydev, 0x8110, 0xff00, 0xa800); + r8168g_phy_param(phydev, 0x810f, 0xff00, 0x1d00); + r8168g_phy_param(phydev, 0x8111, 0xff00, 0xf500); + r8168g_phy_param(phydev, 0x8113, 0xff00, 0x6100); + r8168g_phy_param(phydev, 0x8115, 0xff00, 0x9200); + r8168g_phy_param(phydev, 0x810e, 0xff00, 0x0400); + r8168g_phy_param(phydev, 0x810c, 0xff00, 0x7c00); + r8168g_phy_param(phydev, 0x810b, 0xff00, 0x5a00); + r8168g_phy_param(phydev, 0x80d1, 0xff00, 0xff00); + r8168g_phy_param(phydev, 0x80cd, 0xff00, 0x9e00); + r8168g_phy_param(phydev, 0x80d3, 0xff00, 0x0e00); + r8168g_phy_param(phydev, 0x80d5, 0xff00, 0xca00); + r8168g_phy_param(phydev, 0x80d7, 0xff00, 0x8400); + + /* Force PWM-mode */ + phy_write(phydev, 0x1f, 0x0bcd); + phy_write(phydev, 0x14, 0x5065); + phy_write(phydev, 0x14, 0xd065); + phy_write(phydev, 0x1f, 0x0bc8); + phy_write(phydev, 0x12, 0x00ed); + phy_write(phydev, 0x1f, 0x0bcd); + phy_write(phydev, 0x14, 0x1065); + phy_write(phydev, 0x14, 0x9065); + phy_write(phydev, 0x14, 0x1065); + phy_write(phydev, 0x1f, 0x0000); + + rtl8168g_disable_aldps(phydev); + rtl8168g_config_eee_phy(phydev); +} + +static void rtl8117_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + /* CHN EST parameters adjust - fnet */ + r8168g_phy_param(phydev, 0x808e, 0xff00, 0x4800); + r8168g_phy_param(phydev, 0x8090, 0xff00, 0xcc00); + r8168g_phy_param(phydev, 0x8092, 0xff00, 0xb000); + + r8168g_phy_param(phydev, 0x8088, 0xff00, 0x6000); + r8168g_phy_param(phydev, 0x808b, 0x3f00, 0x0b00); + r8168g_phy_param(phydev, 0x808d, 0x1f00, 0x0600); + r8168g_phy_param(phydev, 0x808c, 0xff00, 0xb000); + r8168g_phy_param(phydev, 0x80a0, 0xff00, 0x2800); + r8168g_phy_param(phydev, 0x80a2, 0xff00, 0x5000); + r8168g_phy_param(phydev, 0x809b, 0xf800, 0xb000); + r8168g_phy_param(phydev, 0x809a, 0xff00, 0x4b00); + r8168g_phy_param(phydev, 0x809d, 0x3f00, 0x0800); + r8168g_phy_param(phydev, 0x80a1, 0xff00, 0x7000); + r8168g_phy_param(phydev, 0x809f, 0x1f00, 0x0300); + r8168g_phy_param(phydev, 0x809e, 0xff00, 0x8800); + r8168g_phy_param(phydev, 0x80b2, 0xff00, 0x2200); + r8168g_phy_param(phydev, 0x80ad, 0xf800, 0x9800); + r8168g_phy_param(phydev, 0x80af, 0x3f00, 0x0800); + r8168g_phy_param(phydev, 0x80b3, 0xff00, 0x6f00); + r8168g_phy_param(phydev, 0x80b1, 0x1f00, 0x0300); + r8168g_phy_param(phydev, 0x80b0, 0xff00, 0x9300); + + r8168g_phy_param(phydev, 0x8011, 0x0000, 0x0800); + + /* enable GPHY 10M */ + phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(11)); + + r8168g_phy_param(phydev, 0x8016, 0x0000, 0x0400); + + rtl8168g_disable_aldps(phydev); + rtl8168h_config_eee_phy(phydev); +} + +static void rtl8102e_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + static const struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0003 }, + { 0x08, 0x441d }, + { 0x01, 0x9100 }, + { 0x1f, 0x0000 } + }; + + phy_set_bits(phydev, 0x11, BIT(12)); + phy_set_bits(phydev, 0x19, BIT(13)); + phy_set_bits(phydev, 0x10, BIT(15)); + + rtl_writephy_batch(phydev, phy_reg_init); +} + +static void rtl8105e_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + /* Disable ALDPS before ram code */ + phy_write(phydev, 0x18, 0x0310); + msleep(100); + + r8169_apply_firmware(tp); + + phy_write_paged(phydev, 0x0005, 0x1a, 0x0000); + phy_write_paged(phydev, 0x0004, 0x1c, 0x0000); + phy_write_paged(phydev, 0x0001, 0x15, 0x7701); +} + +static void rtl8402_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + /* Disable ALDPS before setting firmware */ + phy_write(phydev, 0x18, 0x0310); + msleep(20); + + r8169_apply_firmware(tp); + + /* EEE setting */ + phy_write(phydev, 0x1f, 0x0004); + phy_write(phydev, 0x10, 0x401f); + phy_write(phydev, 0x19, 0x7030); + phy_write(phydev, 0x1f, 0x0000); +} + +static void rtl8106e_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + static const struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0004 }, + { 0x10, 0xc07f }, + { 0x19, 0x7030 }, + { 0x1f, 0x0000 } + }; + + /* Disable ALDPS before ram code */ + phy_write(phydev, 0x18, 0x0310); + msleep(100); + + r8169_apply_firmware(tp); + + rtl_writephy_batch(phydev, phy_reg_init); +} + +static void rtl8125_1_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + phy_modify_paged(phydev, 0xad4, 0x10, 0x03ff, 0x0084); + phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010); + phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x0006); + phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006); + phy_modify_paged(phydev, 0xac0, 0x14, 0x0000, 0x1100); + phy_modify_paged(phydev, 0xac8, 0x15, 0xf000, 0x7000); + phy_modify_paged(phydev, 0xad1, 0x14, 0x0000, 0x0400); + phy_modify_paged(phydev, 0xad1, 0x15, 0x0000, 0x03ff); + phy_modify_paged(phydev, 0xad1, 0x16, 0x0000, 0x03ff); + + r8168g_phy_param(phydev, 0x80ea, 0xff00, 0xc400); + r8168g_phy_param(phydev, 0x80eb, 0x0700, 0x0300); + r8168g_phy_param(phydev, 0x80f8, 0xff00, 0x1c00); + r8168g_phy_param(phydev, 0x80f1, 0xff00, 0x3000); + r8168g_phy_param(phydev, 0x80fe, 0xff00, 0xa500); + r8168g_phy_param(phydev, 0x8102, 0xff00, 0x5000); + r8168g_phy_param(phydev, 0x8105, 0xff00, 0x3300); + r8168g_phy_param(phydev, 0x8100, 0xff00, 0x7000); + r8168g_phy_param(phydev, 0x8104, 0xff00, 0xf000); + r8168g_phy_param(phydev, 0x8106, 0xff00, 0x6500); + r8168g_phy_param(phydev, 0x80dc, 0xff00, 0xed00); + r8168g_phy_param(phydev, 0x80df, 0x0000, 0x0100); + r8168g_phy_param(phydev, 0x80e1, 0x0100, 0x0000); + + phy_modify_paged(phydev, 0xbf0, 0x13, 0x003f, 0x0038); + r8168g_phy_param(phydev, 0x819f, 0xffff, 0xd0b6); + + phy_write_paged(phydev, 0xbc3, 0x12, 0x5555); + phy_modify_paged(phydev, 0xbf0, 0x15, 0x0e00, 0x0a00); + phy_modify_paged(phydev, 0xa5c, 0x10, 0x0400, 0x0000); + phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800); + + rtl8125_config_eee_phy(phydev); +} + +static void rtl8125_2_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + int i; + + phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010); + phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x03ff); + phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006); + phy_modify_paged(phydev, 0xac0, 0x14, 0x1100, 0x0000); + phy_modify_paged(phydev, 0xacc, 0x10, 0x0003, 0x0002); + phy_modify_paged(phydev, 0xad4, 0x10, 0x00e7, 0x0044); + phy_modify_paged(phydev, 0xac1, 0x12, 0x0080, 0x0000); + phy_modify_paged(phydev, 0xac8, 0x10, 0x0300, 0x0000); + phy_modify_paged(phydev, 0xac5, 0x17, 0x0007, 0x0002); + phy_write_paged(phydev, 0xad4, 0x16, 0x00a8); + phy_write_paged(phydev, 0xac5, 0x16, 0x01ff); + phy_modify_paged(phydev, 0xac8, 0x15, 0x00f0, 0x0030); + + phy_write(phydev, 0x1f, 0x0b87); + phy_write(phydev, 0x16, 0x80a2); + phy_write(phydev, 0x17, 0x0153); + phy_write(phydev, 0x16, 0x809c); + phy_write(phydev, 0x17, 0x0153); + phy_write(phydev, 0x1f, 0x0000); + + phy_write(phydev, 0x1f, 0x0a43); + phy_write(phydev, 0x13, 0x81B3); + phy_write(phydev, 0x14, 0x0043); + phy_write(phydev, 0x14, 0x00A7); + phy_write(phydev, 0x14, 0x00D6); + phy_write(phydev, 0x14, 0x00EC); + phy_write(phydev, 0x14, 0x00F6); + phy_write(phydev, 0x14, 0x00FB); + phy_write(phydev, 0x14, 0x00FD); + phy_write(phydev, 0x14, 0x00FF); + phy_write(phydev, 0x14, 0x00BB); + phy_write(phydev, 0x14, 0x0058); + phy_write(phydev, 0x14, 0x0029); + phy_write(phydev, 0x14, 0x0013); + phy_write(phydev, 0x14, 0x0009); + phy_write(phydev, 0x14, 0x0004); + phy_write(phydev, 0x14, 0x0002); + for (i = 0; i < 25; i++) + phy_write(phydev, 0x14, 0x0000); + phy_write(phydev, 0x1f, 0x0000); + + r8168g_phy_param(phydev, 0x8257, 0xffff, 0x020F); + r8168g_phy_param(phydev, 0x80ea, 0xffff, 0x7843); + + r8169_apply_firmware(tp); + + phy_modify_paged(phydev, 0xd06, 0x14, 0x0000, 0x2000); + + r8168g_phy_param(phydev, 0x81a2, 0x0000, 0x0100); + + phy_modify_paged(phydev, 0xb54, 0x16, 0xff00, 0xdb00); + phy_modify_paged(phydev, 0xa45, 0x12, 0x0001, 0x0000); + phy_modify_paged(phydev, 0xa5d, 0x12, 0x0000, 0x0020); + phy_modify_paged(phydev, 0xad4, 0x17, 0x0010, 0x0000); + phy_modify_paged(phydev, 0xa86, 0x15, 0x0001, 0x0000); + phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800); + + rtl8125_config_eee_phy(phydev); +} + +void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev, + enum mac_version ver) +{ + static const rtl_phy_cfg_fct phy_configs[] = { + /* PCI devices. */ + [RTL_GIGA_MAC_VER_02] = rtl8169s_hw_phy_config, + [RTL_GIGA_MAC_VER_03] = rtl8169s_hw_phy_config, + [RTL_GIGA_MAC_VER_04] = rtl8169sb_hw_phy_config, + [RTL_GIGA_MAC_VER_05] = rtl8169scd_hw_phy_config, + [RTL_GIGA_MAC_VER_06] = rtl8169sce_hw_phy_config, + /* PCI-E devices. */ + [RTL_GIGA_MAC_VER_07] = rtl8102e_hw_phy_config, + [RTL_GIGA_MAC_VER_08] = rtl8102e_hw_phy_config, + [RTL_GIGA_MAC_VER_09] = rtl8102e_hw_phy_config, + [RTL_GIGA_MAC_VER_10] = NULL, + [RTL_GIGA_MAC_VER_11] = rtl8168bb_hw_phy_config, + [RTL_GIGA_MAC_VER_12] = rtl8168bef_hw_phy_config, + [RTL_GIGA_MAC_VER_13] = NULL, + [RTL_GIGA_MAC_VER_14] = NULL, + [RTL_GIGA_MAC_VER_15] = NULL, + [RTL_GIGA_MAC_VER_16] = NULL, + [RTL_GIGA_MAC_VER_17] = rtl8168bef_hw_phy_config, + [RTL_GIGA_MAC_VER_18] = rtl8168cp_1_hw_phy_config, + [RTL_GIGA_MAC_VER_19] = rtl8168c_1_hw_phy_config, + [RTL_GIGA_MAC_VER_20] = rtl8168c_2_hw_phy_config, + [RTL_GIGA_MAC_VER_21] = rtl8168c_3_hw_phy_config, + [RTL_GIGA_MAC_VER_22] = rtl8168c_3_hw_phy_config, + [RTL_GIGA_MAC_VER_23] = rtl8168cp_2_hw_phy_config, + [RTL_GIGA_MAC_VER_24] = rtl8168cp_2_hw_phy_config, + [RTL_GIGA_MAC_VER_25] = rtl8168d_1_hw_phy_config, + [RTL_GIGA_MAC_VER_26] = rtl8168d_2_hw_phy_config, + [RTL_GIGA_MAC_VER_27] = rtl8168d_3_hw_phy_config, + [RTL_GIGA_MAC_VER_28] = rtl8168d_4_hw_phy_config, + [RTL_GIGA_MAC_VER_29] = rtl8105e_hw_phy_config, + [RTL_GIGA_MAC_VER_30] = rtl8105e_hw_phy_config, + [RTL_GIGA_MAC_VER_31] = NULL, + [RTL_GIGA_MAC_VER_32] = rtl8168e_1_hw_phy_config, + [RTL_GIGA_MAC_VER_33] = rtl8168e_1_hw_phy_config, + [RTL_GIGA_MAC_VER_34] = rtl8168e_2_hw_phy_config, + [RTL_GIGA_MAC_VER_35] = rtl8168f_1_hw_phy_config, + [RTL_GIGA_MAC_VER_36] = rtl8168f_2_hw_phy_config, + [RTL_GIGA_MAC_VER_37] = rtl8402_hw_phy_config, + [RTL_GIGA_MAC_VER_38] = rtl8411_hw_phy_config, + [RTL_GIGA_MAC_VER_39] = rtl8106e_hw_phy_config, + [RTL_GIGA_MAC_VER_40] = rtl8168g_1_hw_phy_config, + [RTL_GIGA_MAC_VER_41] = NULL, + [RTL_GIGA_MAC_VER_42] = rtl8168g_2_hw_phy_config, + [RTL_GIGA_MAC_VER_43] = rtl8168g_2_hw_phy_config, + [RTL_GIGA_MAC_VER_44] = rtl8168g_2_hw_phy_config, + [RTL_GIGA_MAC_VER_45] = rtl8168h_1_hw_phy_config, + [RTL_GIGA_MAC_VER_46] = rtl8168h_2_hw_phy_config, + [RTL_GIGA_MAC_VER_47] = rtl8168h_1_hw_phy_config, + [RTL_GIGA_MAC_VER_48] = rtl8168h_2_hw_phy_config, + [RTL_GIGA_MAC_VER_49] = rtl8168ep_1_hw_phy_config, + [RTL_GIGA_MAC_VER_50] = rtl8168ep_2_hw_phy_config, + [RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config, + [RTL_GIGA_MAC_VER_52] = rtl8117_hw_phy_config, + [RTL_GIGA_MAC_VER_60] = rtl8125_1_hw_phy_config, + [RTL_GIGA_MAC_VER_61] = rtl8125_2_hw_phy_config, + }; + + if (phy_configs[ver]) + phy_configs[ver](tp, phydev); +} diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 4b13a184bfc7..067ad25553b9 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1425,7 +1425,7 @@ out_napi_off: } /* Timeout function for Ethernet AVB */ -static void ravb_tx_timeout(struct net_device *ndev) +static void ravb_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ravb_private *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 3591285250e1..c922d7a553f2 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2482,7 +2482,7 @@ out_napi_off: } /* Timeout function */ -static void sh_eth_tx_timeout(struct net_device *ndev) +static void sh_eth_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct sh_eth_private *mdp = netdev_priv(ndev); struct sh_eth_rxdesc *rxdesc; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index bc4f951315da..7585cd2270ba 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2159,7 +2159,7 @@ static void rocker_router_fib_event_work(struct work_struct *work) /* Protect internal structures from changes */ rtnl_lock(); switch (fib_work->event) { - case FIB_EVENT_ENTRY_ADD: + case FIB_EVENT_ENTRY_REPLACE: err = rocker_world_fib4_add(rocker, &fib_work->fen_info); if (err) rocker_world_fib4_abort(rocker); @@ -2201,7 +2201,7 @@ static int rocker_router_fib_event(struct notifier_block *nb, fib_work->event = event; switch (event) { - case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_DEL: if (info->family == AF_INET) { struct fib_entry_notifier_info *fen_info = ptr; diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 52ed111d98f4..7d3a1c0df09c 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -1572,7 +1572,7 @@ static int sxgbe_poll(struct napi_struct *napi, int budget) * netdev structure and arrange for the device to be reset to a sane state * in order to transmit a new packet. */ -static void sxgbe_tx_timeout(struct net_device *dev) +static void sxgbe_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sxgbe_priv_data *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index 632a7c85964d..128ee7cda1ed 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -79,7 +79,7 @@ static netdev_tx_t ether3_sendpacket(struct sk_buff *skb, static irqreturn_t ether3_interrupt (int irq, void *dev_id); static int ether3_close (struct net_device *dev); static void ether3_setmulticastlist (struct net_device *dev); -static void ether3_timeout(struct net_device *dev); +static void ether3_timeout(struct net_device *dev, unsigned int txqueue); #define BUS_16 2 #define BUS_8 1 @@ -450,7 +450,7 @@ static void ether3_setmulticastlist(struct net_device *dev) ether3_outw(priv(dev)->regs.config1 | CFG1_LOCBUFMEM, REG_CONFIG1); } -static void ether3_timeout(struct net_device *dev) +static void ether3_timeout(struct net_device *dev, unsigned int txqueue) { unsigned long flags; diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c index 276c7cae7cee..8507ff242014 100644 --- a/drivers/net/ethernet/seeq/sgiseeq.c +++ b/drivers/net/ethernet/seeq/sgiseeq.c @@ -645,7 +645,7 @@ sgiseeq_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void timeout(struct net_device *dev) +static void timeout(struct net_device *dev, unsigned int txqueue) { printk(KERN_NOTICE "%s: transmit timed out, resetting\n", dev->name); sgiseeq_reset(dev); diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 5f36774bf4b8..ea5a9220196c 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -21,8 +21,6 @@ config SFC depends on PCI select MDIO select CRC32 - select I2C - select I2C_ALGOBIT imply PTP_1588_CLOCK ---help--- This driver supports 10/40-gigabit Ethernet cards based on diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index c5c297e78d06..890fd65caa2d 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -1,7 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -sfc-y += efx.o nic.o farch.o siena.o ef10.o tx.o rx.o \ - selftest.o ethtool.o ptp.o tx_tso.o \ - mcdi.o mcdi_port.o mcdi_mon.o +sfc-y += efx.o efx_common.o efx_channels.o nic.o \ + farch.o siena.o ef10.o \ + tx.o tx_common.o tx_tso.o rx.o rx_common.o \ + selftest.o ethtool.o ethtool_common.o ptp.o \ + mcdi.o mcdi_port.o mcdi_port_common.o \ + mcdi_functions.o mcdi_mon.o sfc-$(CONFIG_SFC_MTD) += mtd.o sfc-$(CONFIG_SFC_SRIOV) += sriov.o siena_sriov.o ef10_sriov.o diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 4d9bbccc6f89..4997f61de3d6 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -5,10 +5,13 @@ */ #include "net_driver.h" +#include "rx_common.h" #include "ef10_regs.h" #include "io.h" #include "mcdi.h" #include "mcdi_pcol.h" +#include "mcdi_port_common.h" +#include "mcdi_functions.h" #include "nic.h" #include "workarounds.h" #include "selftest.h" @@ -185,24 +188,6 @@ static bool efx_ef10_is_vf(struct efx_nic *efx) return efx->type->is_vf; } -static int efx_ef10_get_pf_index(struct efx_nic *efx) -{ - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN); - struct efx_ef10_nic_data *nic_data = efx->nic_data; - size_t outlen; - int rc; - - rc = efx_mcdi_rpc(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, outbuf, - sizeof(outbuf), &outlen); - if (rc) - return rc; - if (outlen < sizeof(outbuf)) - return -EIO; - - nic_data->pf_index = MCDI_DWORD(outbuf, GET_FUNCTION_INFO_OUT_PF); - return 0; -} - #ifdef CONFIG_SFC_SRIOV static int efx_ef10_get_vf_index(struct efx_nic *efx) { @@ -273,24 +258,9 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) u8 vi_window_mode = MCDI_BYTE(outbuf, GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE); - switch (vi_window_mode) { - case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K: - efx->vi_stride = 8192; - break; - case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K: - efx->vi_stride = 16384; - break; - case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K: - efx->vi_stride = 65536; - break; - default: - netif_err(efx, probe, efx->net_dev, - "Unrecognised VI window mode %d\n", - vi_window_mode); - return -EIO; - } - netif_dbg(efx, probe, efx->net_dev, "vi_stride = %u\n", - efx->vi_stride); + rc = efx_mcdi_window_mode_to_stride(efx, vi_window_mode); + if (rc) + return rc; } else { /* keep default VI stride */ netif_dbg(efx, probe, efx->net_dev, @@ -689,7 +659,7 @@ static int efx_ef10_probe(struct efx_nic *efx) } nic_data->warm_boot_count = rc; - efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID; + efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; nic_data->vport_id = EVB_PORT_ID_ASSIGNED; @@ -725,7 +695,7 @@ static int efx_ef10_probe(struct efx_nic *efx) if (rc) goto fail4; - rc = efx_ef10_get_pf_index(efx); + rc = efx_get_pf_index(efx, &nic_data->pf_index); if (rc) goto fail5; @@ -831,22 +801,6 @@ fail1: return rc; } -static int efx_ef10_free_vis(struct efx_nic *efx) -{ - MCDI_DECLARE_BUF_ERR(outbuf); - size_t outlen; - int rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FREE_VIS, NULL, 0, - outbuf, sizeof(outbuf), &outlen); - - /* -EALREADY means nothing to free, so ignore */ - if (rc == -EALREADY) - rc = 0; - if (rc) - efx_mcdi_display_error(efx, MC_CMD_FREE_VIS, 0, outbuf, outlen, - rc); - return rc; -} - #ifdef EFX_USE_PIO static void efx_ef10_free_piobufs(struct efx_nic *efx) @@ -1089,7 +1043,7 @@ static void efx_ef10_remove(struct efx_nic *efx) if (nic_data->wc_membase) iounmap(nic_data->wc_membase); - rc = efx_ef10_free_vis(efx); + rc = efx_mcdi_free_vis(efx); WARN_ON(rc != 0); if (!nic_data->must_restore_piobufs) @@ -1260,28 +1214,10 @@ static int efx_ef10_probe_vf(struct efx_nic *efx __attribute__ ((unused))) static int efx_ef10_alloc_vis(struct efx_nic *efx, unsigned int min_vis, unsigned int max_vis) { - MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN); struct efx_ef10_nic_data *nic_data = efx->nic_data; - size_t outlen; - int rc; - - MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis); - MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis); - rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), &outlen); - if (rc != 0) - return rc; - - if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN) - return -EIO; - netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n", - MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE)); - - nic_data->vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE); - nic_data->n_allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT); - return 0; + return efx_mcdi_alloc_vis(efx, min_vis, max_vis, &nic_data->vi_base, + &nic_data->n_allocated_vis); } /* Note that the failure path of this function does not free @@ -1363,7 +1299,7 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) } /* In case the last attached driver failed to free VIs, do it now */ - rc = efx_ef10_free_vis(efx); + rc = efx_mcdi_free_vis(efx); if (rc != 0) return rc; @@ -1384,7 +1320,7 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) efx->max_tx_channels = nic_data->n_allocated_vis / EFX_TXQ_TYPES; - efx_ef10_free_vis(efx); + efx_mcdi_free_vis(efx); return -EAGAIN; } @@ -1503,7 +1439,7 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx) nic_data->must_restore_filters = true; nic_data->must_restore_piobufs = true; efx_ef10_forget_old_piobufs(efx); - efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID; + efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; /* Driver-created vswitches and vports must be re-created */ nic_data->must_probe_vswitching = true; @@ -2408,20 +2344,15 @@ static u32 efx_ef10_tso_versions(struct efx_nic *efx) static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) { - MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / - EFX_BUF_SIZE)); bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD; - size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE; struct efx_channel *channel = tx_queue->channel; struct efx_nic *efx = tx_queue->efx; - struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct efx_ef10_nic_data *nic_data; bool tso_v2 = false; - size_t inlen; - dma_addr_t dma_addr; efx_qword_t *txd; int rc; - int i; - BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0); + + nic_data = efx->nic_data; /* Only attempt to enable TX timestamping if we have the license for it, * otherwise TXQ init will fail @@ -2448,51 +2379,9 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) channel->channel); } - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1); - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel); - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue); - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue); - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0); - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id); - - dma_addr = tx_queue->txd.buf.dma_addr; - - netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n", - tx_queue->queue, entries, (u64)dma_addr); - - for (i = 0; i < entries; ++i) { - MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr); - dma_addr += EFX_BUF_SIZE; - } - - inlen = MC_CMD_INIT_TXQ_IN_LEN(entries); - - do { - MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS, - /* This flag was removed from mcdi_pcol.h for - * the non-_EXT version of INIT_TXQ. However, - * firmware still honours it. - */ - INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2, - INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, - INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload, - INIT_TXQ_EXT_IN_FLAG_TIMESTAMP, - tx_queue->timestamping); - - rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen, - NULL, 0, NULL); - if (rc == -ENOSPC && tso_v2) { - /* Retry without TSOv2 if we're short on contexts. */ - tso_v2 = false; - netif_warn(efx, probe, efx->net_dev, - "TSOv2 context not available to segment in hardware. TCP performance may be reduced.\n"); - } else if (rc) { - efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ, - MC_CMD_INIT_TXQ_EXT_IN_LEN, - NULL, 0, rc); - goto fail; - } - } while (rc); + rc = efx_mcdi_tx_init(tx_queue, tso_v2); + if (rc) + goto fail; /* A previous user of this TX queue might have set us up the * bomb by writing a descriptor to the TX push collector but @@ -2530,35 +2419,6 @@ fail: tx_queue->queue); } -static void efx_ef10_tx_fini(struct efx_tx_queue *tx_queue) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN); - MCDI_DECLARE_BUF_ERR(outbuf); - struct efx_nic *efx = tx_queue->efx; - size_t outlen; - int rc; - - MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE, - tx_queue->queue); - - rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), &outlen); - - if (rc && rc != -EALREADY) - goto fail; - - return; - -fail: - efx_mcdi_display_error(efx, MC_CMD_FINI_TXQ, MC_CMD_FINI_TXQ_IN_LEN, - outbuf, outlen, rc); -} - -static void efx_ef10_tx_remove(struct efx_tx_queue *tx_queue) -{ - efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf); -} - /* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */ static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue) { @@ -2737,7 +2597,7 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, bool exclusive, EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE); if (!exclusive && rss_spread == 1) { - ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID; + ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; if (context_size) *context_size = 1; return 0; @@ -2824,11 +2684,11 @@ static void efx_ef10_rx_free_indir_table(struct efx_nic *efx) { int rc; - if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID) { + if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) { rc = efx_ef10_free_rss_context(efx, efx->rss_context.context_id); WARN_ON(rc != 0); } - efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID; + efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; } static int efx_ef10_rx_push_shared_rss_config(struct efx_nic *efx, @@ -2854,7 +2714,7 @@ static int efx_ef10_rx_push_exclusive_rss_config(struct efx_nic *efx, struct efx_ef10_nic_data *nic_data = efx->nic_data; int rc; - if (efx->rss_context.context_id == EFX_EF10_RSS_CONTEXT_INVALID || + if (efx->rss_context.context_id == EFX_MCDI_RSS_CONTEXT_INVALID || !nic_data->rx_rss_context_exclusive) { rc = efx_ef10_alloc_rss_context(efx, true, &efx->rss_context, NULL); @@ -2870,7 +2730,7 @@ static int efx_ef10_rx_push_exclusive_rss_config(struct efx_nic *efx, goto fail2; if (efx->rss_context.context_id != old_rx_rss_context && - old_rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID) + old_rx_rss_context != EFX_MCDI_RSS_CONTEXT_INVALID) WARN_ON(efx_ef10_free_rss_context(efx, old_rx_rss_context) != 0); nic_data->rx_rss_context_exclusive = true; if (rx_indir_table != efx->rss_context.rx_indir_table) @@ -2901,7 +2761,7 @@ static int efx_ef10_rx_push_rss_context_config(struct efx_nic *efx, WARN_ON(!mutex_is_locked(&efx->rss_lock)); - if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) { + if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) { rc = efx_ef10_alloc_rss_context(efx, true, ctx, NULL); if (rc) return rc; @@ -2936,7 +2796,7 @@ static int efx_ef10_rx_pull_rss_context_config(struct efx_nic *efx, BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN != MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN); - if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) + if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) return -ENOENT; MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID, @@ -2997,7 +2857,7 @@ static void efx_ef10_rx_restore_rss_contexts(struct efx_nic *efx) list_for_each_entry(ctx, &efx->rss_context.list, list) { /* previous NIC RSS context is gone */ - ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID; + ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; /* so try to allocate a new one */ rc = efx_ef10_rx_push_rss_context_config(efx, ctx, ctx->rx_indir_table, @@ -3068,96 +2928,11 @@ static int efx_ef10_vf_rx_push_rss_config(struct efx_nic *efx, bool user, { if (user) return -EOPNOTSUPP; - if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID) + if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) return 0; return efx_ef10_rx_push_shared_rss_config(efx, NULL); } -static int efx_ef10_rx_probe(struct efx_rx_queue *rx_queue) -{ - return efx_nic_alloc_buffer(rx_queue->efx, &rx_queue->rxd.buf, - (rx_queue->ptr_mask + 1) * - sizeof(efx_qword_t), - GFP_KERNEL); -} - -static void efx_ef10_rx_init(struct efx_rx_queue *rx_queue) -{ - MCDI_DECLARE_BUF(inbuf, - MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / - EFX_BUF_SIZE)); - struct efx_channel *channel = efx_rx_queue_channel(rx_queue); - size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE; - struct efx_nic *efx = rx_queue->efx; - struct efx_ef10_nic_data *nic_data = efx->nic_data; - size_t inlen; - dma_addr_t dma_addr; - int rc; - int i; - BUILD_BUG_ON(MC_CMD_INIT_RXQ_OUT_LEN != 0); - - rx_queue->scatter_n = 0; - rx_queue->scatter_len = 0; - - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_SIZE, rx_queue->ptr_mask + 1); - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_TARGET_EVQ, channel->channel); - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_LABEL, efx_rx_queue_index(rx_queue)); - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_INSTANCE, - efx_rx_queue_index(rx_queue)); - MCDI_POPULATE_DWORD_2(inbuf, INIT_RXQ_IN_FLAGS, - INIT_RXQ_IN_FLAG_PREFIX, 1, - INIT_RXQ_IN_FLAG_TIMESTAMP, 1); - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0); - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, nic_data->vport_id); - - dma_addr = rx_queue->rxd.buf.dma_addr; - - netif_dbg(efx, hw, efx->net_dev, "pushing RXQ %d. %zu entries (%llx)\n", - efx_rx_queue_index(rx_queue), entries, (u64)dma_addr); - - for (i = 0; i < entries; ++i) { - MCDI_SET_ARRAY_QWORD(inbuf, INIT_RXQ_IN_DMA_ADDR, i, dma_addr); - dma_addr += EFX_BUF_SIZE; - } - - inlen = MC_CMD_INIT_RXQ_IN_LEN(entries); - - rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen, - NULL, 0, NULL); - if (rc) - netdev_WARN(efx->net_dev, "failed to initialise RXQ %d\n", - efx_rx_queue_index(rx_queue)); -} - -static void efx_ef10_rx_fini(struct efx_rx_queue *rx_queue) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN); - MCDI_DECLARE_BUF_ERR(outbuf); - struct efx_nic *efx = rx_queue->efx; - size_t outlen; - int rc; - - MCDI_SET_DWORD(inbuf, FINI_RXQ_IN_INSTANCE, - efx_rx_queue_index(rx_queue)); - - rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_RXQ, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), &outlen); - - if (rc && rc != -EALREADY) - goto fail; - - return; - -fail: - efx_mcdi_display_error(efx, MC_CMD_FINI_RXQ, MC_CMD_FINI_RXQ_IN_LEN, - outbuf, outlen, rc); -} - -static void efx_ef10_rx_remove(struct efx_rx_queue *rx_queue) -{ - efx_nic_free_buffer(rx_queue->efx, &rx_queue->rxd.buf); -} - /* This creates an entry in the RX descriptor queue */ static inline void efx_ef10_build_rx_desc(struct efx_rx_queue *rx_queue, unsigned int index) @@ -3229,106 +3004,20 @@ efx_ef10_rx_defer_refill_complete(struct efx_nic *efx, unsigned long cookie, /* nothing to do */ } -static int efx_ef10_ev_probe(struct efx_channel *channel) -{ - return efx_nic_alloc_buffer(channel->efx, &channel->eventq.buf, - (channel->eventq_mask + 1) * - sizeof(efx_qword_t), - GFP_KERNEL); -} - -static void efx_ef10_ev_fini(struct efx_channel *channel) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_EVQ_IN_LEN); - MCDI_DECLARE_BUF_ERR(outbuf); - struct efx_nic *efx = channel->efx; - size_t outlen; - int rc; - - MCDI_SET_DWORD(inbuf, FINI_EVQ_IN_INSTANCE, channel->channel); - - rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_EVQ, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), &outlen); - - if (rc && rc != -EALREADY) - goto fail; - - return; - -fail: - efx_mcdi_display_error(efx, MC_CMD_FINI_EVQ, MC_CMD_FINI_EVQ_IN_LEN, - outbuf, outlen, rc); -} - static int efx_ef10_ev_init(struct efx_channel *channel) { - MCDI_DECLARE_BUF(inbuf, - MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 / - EFX_BUF_SIZE)); - MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN); - size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE; struct efx_nic *efx = channel->efx; struct efx_ef10_nic_data *nic_data; - size_t inlen, outlen; unsigned int enabled, implemented; - dma_addr_t dma_addr; + bool use_v2, cut_thru; int rc; - int i; nic_data = efx->nic_data; - - /* Fill event queue with all ones (i.e. empty events) */ - memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len); - - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_SIZE, channel->eventq_mask + 1); - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel); - /* INIT_EVQ expects index in vector table, not absolute */ - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel); - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE, - MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS); - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0); - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_RELOAD, 0); - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_MODE, - MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS); - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0); - - if (nic_data->datapath_caps2 & - 1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN) { - /* Use the new generic approach to specifying event queue - * configuration, requesting lower latency or higher throughput. - * The options that actually get used appear in the output. - */ - MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS, - INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1, - INIT_EVQ_V2_IN_FLAG_TYPE, - MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO); - } else { - bool cut_thru = !(nic_data->datapath_caps & - 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN); - - MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS, - INIT_EVQ_IN_FLAG_INTERRUPTING, 1, - INIT_EVQ_IN_FLAG_RX_MERGE, 1, - INIT_EVQ_IN_FLAG_TX_MERGE, 1, - INIT_EVQ_IN_FLAG_CUT_THRU, cut_thru); - } - - dma_addr = channel->eventq.buf.dma_addr; - for (i = 0; i < entries; ++i) { - MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr); - dma_addr += EFX_BUF_SIZE; - } - - inlen = MC_CMD_INIT_EVQ_IN_LEN(entries); - - rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen, - outbuf, sizeof(outbuf), &outlen); - - if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN) - netif_dbg(efx, drv, efx->net_dev, - "Channel %d using event queue flags %08x\n", - channel->channel, - MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS)); + use_v2 = nic_data->datapath_caps2 & + 1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN; + cut_thru = !(nic_data->datapath_caps & + 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN); + rc = efx_mcdi_ev_init(channel, cut_thru, use_v2); /* IRQ return is ignored */ if (channel->channel || rc) @@ -3386,15 +3075,10 @@ static int efx_ef10_ev_init(struct efx_channel *channel) return 0; fail: - efx_ef10_ev_fini(channel); + efx_mcdi_ev_fini(channel); return rc; } -static void efx_ef10_ev_remove(struct efx_channel *channel) -{ - efx_nic_free_buffer(channel->efx, &channel->eventq.buf); -} - static void efx_ef10_handle_rx_wrong_queue(struct efx_rx_queue *rx_queue, unsigned int rx_queue_label) { @@ -3976,9 +3660,9 @@ static int efx_ef10_fini_dmaq(struct efx_nic *efx) if (efx->state != STATE_RECOVERY) { efx_for_each_channel(channel, efx) { efx_for_each_channel_rx_queue(rx_queue, channel) - efx_ef10_rx_fini(rx_queue); + efx_mcdi_rx_fini(rx_queue); efx_for_each_channel_tx_queue(tx_queue, channel) - efx_ef10_tx_fini(tx_queue); + efx_mcdi_tx_fini(tx_queue); } wait_event_timeout(efx->flush_wq, @@ -4165,7 +3849,7 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx, */ if (WARN_ON_ONCE(!ctx)) flags &= ~EFX_FILTER_FLAG_RX_RSS; - else if (WARN_ON_ONCE(ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID)) + else if (WARN_ON_ONCE(ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID)) flags &= ~EFX_FILTER_FLAG_RX_RSS; } @@ -4344,7 +4028,7 @@ static s32 efx_ef10_filter_insert_locked(struct efx_nic *efx, rc = -ENOENT; goto out_unlock; } - if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) { + if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) { rc = -EOPNOTSUPP; goto out_unlock; } @@ -5085,7 +4769,7 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx) invalid_filters++; goto not_restored; } - if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) { + if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) { netif_warn(efx, drv, efx->net_dev, "Warning: unable to restore a filter with RSS context %u as it was not created.\n", spec->rss_context); @@ -6650,20 +6334,20 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = { .irq_handle_legacy = efx_ef10_legacy_interrupt, .tx_probe = efx_ef10_tx_probe, .tx_init = efx_ef10_tx_init, - .tx_remove = efx_ef10_tx_remove, + .tx_remove = efx_mcdi_tx_remove, .tx_write = efx_ef10_tx_write, .tx_limit_len = efx_ef10_tx_limit_len, .rx_push_rss_config = efx_ef10_vf_rx_push_rss_config, .rx_pull_rss_config = efx_ef10_rx_pull_rss_config, - .rx_probe = efx_ef10_rx_probe, - .rx_init = efx_ef10_rx_init, - .rx_remove = efx_ef10_rx_remove, + .rx_probe = efx_mcdi_rx_probe, + .rx_init = efx_mcdi_rx_init, + .rx_remove = efx_mcdi_rx_remove, .rx_write = efx_ef10_rx_write, .rx_defer_refill = efx_ef10_rx_defer_refill, - .ev_probe = efx_ef10_ev_probe, + .ev_probe = efx_mcdi_ev_probe, .ev_init = efx_ef10_ev_init, - .ev_fini = efx_ef10_ev_fini, - .ev_remove = efx_ef10_ev_remove, + .ev_fini = efx_mcdi_ev_fini, + .ev_remove = efx_mcdi_ev_remove, .ev_process = efx_ef10_ev_process, .ev_read_ack = efx_ef10_ev_read_ack, .ev_test_generate = efx_ef10_ev_test_generate, @@ -6759,7 +6443,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .irq_handle_legacy = efx_ef10_legacy_interrupt, .tx_probe = efx_ef10_tx_probe, .tx_init = efx_ef10_tx_init, - .tx_remove = efx_ef10_tx_remove, + .tx_remove = efx_mcdi_tx_remove, .tx_write = efx_ef10_tx_write, .tx_limit_len = efx_ef10_tx_limit_len, .rx_push_rss_config = efx_ef10_pf_rx_push_rss_config, @@ -6767,15 +6451,15 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .rx_push_rss_context_config = efx_ef10_rx_push_rss_context_config, .rx_pull_rss_context_config = efx_ef10_rx_pull_rss_context_config, .rx_restore_rss_contexts = efx_ef10_rx_restore_rss_contexts, - .rx_probe = efx_ef10_rx_probe, - .rx_init = efx_ef10_rx_init, - .rx_remove = efx_ef10_rx_remove, + .rx_probe = efx_mcdi_rx_probe, + .rx_init = efx_mcdi_rx_init, + .rx_remove = efx_mcdi_rx_remove, .rx_write = efx_ef10_rx_write, .rx_defer_refill = efx_ef10_rx_defer_refill, - .ev_probe = efx_ef10_ev_probe, + .ev_probe = efx_mcdi_ev_probe, .ev_init = efx_ef10_ev_init, - .ev_fini = efx_ef10_ev_fini, - .ev_remove = efx_ef10_ev_remove, + .ev_fini = efx_mcdi_ev_fini, + .ev_remove = efx_mcdi_ev_remove, .ev_process = efx_ef10_ev_process, .ev_read_ack = efx_ef10_ev_read_ack, .ev_test_generate = efx_ef10_ev_test_generate, diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 52bd43f45761..14393767ef9f 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -522,10 +522,9 @@ int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac) if (!is_zero_ether_addr(mac)) { rc = efx_ef10_vport_add_mac(efx, vf->vport_id, mac); - if (rc) { - eth_zero_addr(vf->mac); + if (rc) goto fail; - } + if (vf->efx) ether_addr_copy(vf->efx->net_dev->dev_addr, mac); } diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 7a38d7f282a1..4481f21a1f43 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -23,6 +23,10 @@ #include <net/gre.h> #include <net/udp_tunnel.h> #include "efx.h" +#include "efx_common.h" +#include "efx_channels.h" +#include "rx_common.h" +#include "tx_common.h" #include "nic.h" #include "io.h" #include "selftest.h" @@ -39,56 +43,6 @@ ************************************************************************** */ -/* Loopback mode names (see LOOPBACK_MODE()) */ -const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; -const char *const efx_loopback_mode_names[] = { - [LOOPBACK_NONE] = "NONE", - [LOOPBACK_DATA] = "DATAPATH", - [LOOPBACK_GMAC] = "GMAC", - [LOOPBACK_XGMII] = "XGMII", - [LOOPBACK_XGXS] = "XGXS", - [LOOPBACK_XAUI] = "XAUI", - [LOOPBACK_GMII] = "GMII", - [LOOPBACK_SGMII] = "SGMII", - [LOOPBACK_XGBR] = "XGBR", - [LOOPBACK_XFI] = "XFI", - [LOOPBACK_XAUI_FAR] = "XAUI_FAR", - [LOOPBACK_GMII_FAR] = "GMII_FAR", - [LOOPBACK_SGMII_FAR] = "SGMII_FAR", - [LOOPBACK_XFI_FAR] = "XFI_FAR", - [LOOPBACK_GPHY] = "GPHY", - [LOOPBACK_PHYXS] = "PHYXS", - [LOOPBACK_PCS] = "PCS", - [LOOPBACK_PMAPMD] = "PMA/PMD", - [LOOPBACK_XPORT] = "XPORT", - [LOOPBACK_XGMII_WS] = "XGMII_WS", - [LOOPBACK_XAUI_WS] = "XAUI_WS", - [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR", - [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR", - [LOOPBACK_GMII_WS] = "GMII_WS", - [LOOPBACK_XFI_WS] = "XFI_WS", - [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR", - [LOOPBACK_PHYXS_WS] = "PHYXS_WS", -}; - -const unsigned int efx_reset_type_max = RESET_TYPE_MAX; -const char *const efx_reset_type_names[] = { - [RESET_TYPE_INVISIBLE] = "INVISIBLE", - [RESET_TYPE_ALL] = "ALL", - [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", - [RESET_TYPE_WORLD] = "WORLD", - [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", - [RESET_TYPE_DATAPATH] = "DATAPATH", - [RESET_TYPE_MC_BIST] = "MC_BIST", - [RESET_TYPE_DISABLE] = "DISABLE", - [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", - [RESET_TYPE_INT_ERROR] = "INT_ERROR", - [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", - [RESET_TYPE_TX_SKIP] = "TX_SKIP", - [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", - [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", -}; - /* UDP tunnel type names */ static const char *const efx_udp_tunnel_type_names[] = { [TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan", @@ -104,18 +58,6 @@ void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen) snprintf(buf, buflen, "type %d", type); } -/* Reset workqueue. If any NIC has a hardware failure then a reset will be - * queued onto this work queue. This is not a per-nic work queue, because - * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. - */ -static struct workqueue_struct *reset_workqueue; - -/* How often and how many times to poll for a reset while waiting for a - * BIST that another function started to complete. - */ -#define BIST_WAIT_DELAY_MS 100 -#define BIST_WAIT_DELAY_COUNT 100 - /************************************************************************** * * Configurable values @@ -135,21 +77,6 @@ module_param(efx_separate_tx_channels, bool, 0444); MODULE_PARM_DESC(efx_separate_tx_channels, "Use separate channels for TX and RX"); -/* This is the weight assigned to each of the (per-channel) virtual - * NAPI devices. - */ -static int napi_weight = 64; - -/* This is the time (in jiffies) between invocations of the hardware - * monitor. - * On Falcon-based NICs, this will: - * - Check the on-board hardware monitor; - * - Poll the link state and reconfigure the hardware as necessary. - * On Siena-based NICs for power systems with EEH support, this will give EEH a - * chance to start. - */ -static unsigned int efx_monitor_interval = 1 * HZ; - /* Initial interrupt moderation settings. They can be modified after * module load with ethtool. * @@ -169,38 +96,10 @@ static unsigned int rx_irq_mod_usec = 60; */ static unsigned int tx_irq_mod_usec = 150; -/* This is the first interrupt mode to try out of: - * 0 => MSI-X - * 1 => MSI - * 2 => legacy - */ -static unsigned int interrupt_mode; - -/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS), - * i.e. the number of CPUs among which we may distribute simultaneous - * interrupt handling. - * - * Cards without MSI-X will only target one CPU via legacy or MSI interrupt. - * The default (0) means to assign an interrupt to each core. - */ -static unsigned int rss_cpus; -module_param(rss_cpus, uint, 0444); -MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling"); - static bool phy_flash_cfg; module_param(phy_flash_cfg, bool, 0644); MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially"); -static unsigned irq_adapt_low_thresh = 8000; -module_param(irq_adapt_low_thresh, uint, 0644); -MODULE_PARM_DESC(irq_adapt_low_thresh, - "Threshold score for reducing IRQ moderation"); - -static unsigned irq_adapt_high_thresh = 16000; -module_param(irq_adapt_high_thresh, uint, 0644); -MODULE_PARM_DESC(irq_adapt_high_thresh, - "Threshold score for increasing IRQ moderation"); - static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | @@ -214,18 +113,8 @@ MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); * *************************************************************************/ -static int efx_soft_enable_interrupts(struct efx_nic *efx); -static void efx_soft_disable_interrupts(struct efx_nic *efx); -static void efx_remove_channel(struct efx_channel *channel); -static void efx_remove_channels(struct efx_nic *efx); static const struct efx_channel_type efx_default_channel_type; static void efx_remove_port(struct efx_nic *efx); -static void efx_init_napi_channel(struct efx_channel *channel); -static void efx_fini_napi(struct efx_nic *efx); -static void efx_fini_napi_channel(struct efx_channel *channel); -static void efx_fini_struct(struct efx_nic *efx); -static void efx_start_all(struct efx_nic *efx); -static void efx_stop_all(struct efx_nic *efx); static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog); static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp); static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs, @@ -239,776 +128,12 @@ static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs, ASSERT_RTNL(); \ } while (0) -static int efx_check_disabled(struct efx_nic *efx) -{ - if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) { - netif_err(efx, drv, efx->net_dev, - "device is disabled due to earlier errors\n"); - return -EIO; - } - return 0; -} - -/************************************************************************** - * - * Event queue processing - * - *************************************************************************/ - -/* Process channel's event queue - * - * This function is responsible for processing the event queue of a - * single channel. The caller must guarantee that this function will - * never be concurrently called more than once on the same channel, - * though different channels may be being processed concurrently. - */ -static int efx_process_channel(struct efx_channel *channel, int budget) -{ - struct efx_tx_queue *tx_queue; - struct list_head rx_list; - int spent; - - if (unlikely(!channel->enabled)) - return 0; - - /* Prepare the batch receive list */ - EFX_WARN_ON_PARANOID(channel->rx_list != NULL); - INIT_LIST_HEAD(&rx_list); - channel->rx_list = &rx_list; - - efx_for_each_channel_tx_queue(tx_queue, channel) { - tx_queue->pkts_compl = 0; - tx_queue->bytes_compl = 0; - } - - spent = efx_nic_process_eventq(channel, budget); - if (spent && efx_channel_has_rx_queue(channel)) { - struct efx_rx_queue *rx_queue = - efx_channel_get_rx_queue(channel); - - efx_rx_flush_packet(channel); - efx_fast_push_rx_descriptors(rx_queue, true); - } - - /* Update BQL */ - efx_for_each_channel_tx_queue(tx_queue, channel) { - if (tx_queue->bytes_compl) { - netdev_tx_completed_queue(tx_queue->core_txq, - tx_queue->pkts_compl, tx_queue->bytes_compl); - } - } - - /* Receive any packets we queued up */ - netif_receive_skb_list(channel->rx_list); - channel->rx_list = NULL; - - return spent; -} - -/* NAPI poll handler - * - * NAPI guarantees serialisation of polls of the same device, which - * provides the guarantee required by efx_process_channel(). - */ -static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel) -{ - int step = efx->irq_mod_step_us; - - if (channel->irq_mod_score < irq_adapt_low_thresh) { - if (channel->irq_moderation_us > step) { - channel->irq_moderation_us -= step; - efx->type->push_irq_moderation(channel); - } - } else if (channel->irq_mod_score > irq_adapt_high_thresh) { - if (channel->irq_moderation_us < - efx->irq_rx_moderation_us) { - channel->irq_moderation_us += step; - efx->type->push_irq_moderation(channel); - } - } - - channel->irq_count = 0; - channel->irq_mod_score = 0; -} - -static int efx_poll(struct napi_struct *napi, int budget) -{ - struct efx_channel *channel = - container_of(napi, struct efx_channel, napi_str); - struct efx_nic *efx = channel->efx; - int spent; - - netif_vdbg(efx, intr, efx->net_dev, - "channel %d NAPI poll executing on CPU %d\n", - channel->channel, raw_smp_processor_id()); - - spent = efx_process_channel(channel, budget); - - xdp_do_flush_map(); - - if (spent < budget) { - if (efx_channel_has_rx_queue(channel) && - efx->irq_rx_adaptive && - unlikely(++channel->irq_count == 1000)) { - efx_update_irq_mod(efx, channel); - } - -#ifdef CONFIG_RFS_ACCEL - /* Perhaps expire some ARFS filters */ - mod_delayed_work(system_wq, &channel->filter_work, 0); -#endif - - /* There is no race here; although napi_disable() will - * only wait for napi_complete(), this isn't a problem - * since efx_nic_eventq_read_ack() will have no effect if - * interrupts have already been disabled. - */ - if (napi_complete_done(napi, spent)) - efx_nic_eventq_read_ack(channel); - } - - return spent; -} - -/* Create event queue - * Event queue memory allocations are done only once. If the channel - * is reset, the memory buffer will be reused; this guards against - * errors during channel reset and also simplifies interrupt handling. - */ -static int efx_probe_eventq(struct efx_channel *channel) -{ - struct efx_nic *efx = channel->efx; - unsigned long entries; - - netif_dbg(efx, probe, efx->net_dev, - "chan %d create event queue\n", channel->channel); - - /* Build an event queue with room for one event per tx and rx buffer, - * plus some extra for link state events and MCDI completions. */ - entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128); - EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE); - channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1; - - return efx_nic_probe_eventq(channel); -} - -/* Prepare channel's event queue */ -static int efx_init_eventq(struct efx_channel *channel) -{ - struct efx_nic *efx = channel->efx; - int rc; - - EFX_WARN_ON_PARANOID(channel->eventq_init); - - netif_dbg(efx, drv, efx->net_dev, - "chan %d init event queue\n", channel->channel); - - rc = efx_nic_init_eventq(channel); - if (rc == 0) { - efx->type->push_irq_moderation(channel); - channel->eventq_read_ptr = 0; - channel->eventq_init = true; - } - return rc; -} - -/* Enable event queue processing and NAPI */ -void efx_start_eventq(struct efx_channel *channel) -{ - netif_dbg(channel->efx, ifup, channel->efx->net_dev, - "chan %d start event queue\n", channel->channel); - - /* Make sure the NAPI handler sees the enabled flag set */ - channel->enabled = true; - smp_wmb(); - - napi_enable(&channel->napi_str); - efx_nic_eventq_read_ack(channel); -} - -/* Disable event queue processing and NAPI */ -void efx_stop_eventq(struct efx_channel *channel) -{ - if (!channel->enabled) - return; - - napi_disable(&channel->napi_str); - channel->enabled = false; -} - -static void efx_fini_eventq(struct efx_channel *channel) -{ - if (!channel->eventq_init) - return; - - netif_dbg(channel->efx, drv, channel->efx->net_dev, - "chan %d fini event queue\n", channel->channel); - - efx_nic_fini_eventq(channel); - channel->eventq_init = false; -} - -static void efx_remove_eventq(struct efx_channel *channel) -{ - netif_dbg(channel->efx, drv, channel->efx->net_dev, - "chan %d remove event queue\n", channel->channel); - - efx_nic_remove_eventq(channel); -} - -/************************************************************************** - * - * Channel handling - * - *************************************************************************/ - -/* Allocate and initialise a channel structure. */ -static struct efx_channel * -efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) -{ - struct efx_channel *channel; - struct efx_rx_queue *rx_queue; - struct efx_tx_queue *tx_queue; - int j; - - channel = kzalloc(sizeof(*channel), GFP_KERNEL); - if (!channel) - return NULL; - - channel->efx = efx; - channel->channel = i; - channel->type = &efx_default_channel_type; - - for (j = 0; j < EFX_TXQ_TYPES; j++) { - tx_queue = &channel->tx_queue[j]; - tx_queue->efx = efx; - tx_queue->queue = i * EFX_TXQ_TYPES + j; - tx_queue->channel = channel; - } - -#ifdef CONFIG_RFS_ACCEL - INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); -#endif - - rx_queue = &channel->rx_queue; - rx_queue->efx = efx; - timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); - - return channel; -} - -/* Allocate and initialise a channel structure, copying parameters - * (but not resources) from an old channel structure. - */ -static struct efx_channel * -efx_copy_channel(const struct efx_channel *old_channel) -{ - struct efx_channel *channel; - struct efx_rx_queue *rx_queue; - struct efx_tx_queue *tx_queue; - int j; - - channel = kmalloc(sizeof(*channel), GFP_KERNEL); - if (!channel) - return NULL; - - *channel = *old_channel; - - channel->napi_dev = NULL; - INIT_HLIST_NODE(&channel->napi_str.napi_hash_node); - channel->napi_str.napi_id = 0; - channel->napi_str.state = 0; - memset(&channel->eventq, 0, sizeof(channel->eventq)); - - for (j = 0; j < EFX_TXQ_TYPES; j++) { - tx_queue = &channel->tx_queue[j]; - if (tx_queue->channel) - tx_queue->channel = channel; - tx_queue->buffer = NULL; - memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); - } - - rx_queue = &channel->rx_queue; - rx_queue->buffer = NULL; - memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); - timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); -#ifdef CONFIG_RFS_ACCEL - INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); -#endif - - return channel; -} - -static int efx_probe_channel(struct efx_channel *channel) -{ - struct efx_tx_queue *tx_queue; - struct efx_rx_queue *rx_queue; - int rc; - - netif_dbg(channel->efx, probe, channel->efx->net_dev, - "creating channel %d\n", channel->channel); - - rc = channel->type->pre_probe(channel); - if (rc) - goto fail; - - rc = efx_probe_eventq(channel); - if (rc) - goto fail; - - efx_for_each_channel_tx_queue(tx_queue, channel) { - rc = efx_probe_tx_queue(tx_queue); - if (rc) - goto fail; - } - - efx_for_each_channel_rx_queue(rx_queue, channel) { - rc = efx_probe_rx_queue(rx_queue); - if (rc) - goto fail; - } - - channel->rx_list = NULL; - - return 0; - -fail: - efx_remove_channel(channel); - return rc; -} - -static void -efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len) -{ - struct efx_nic *efx = channel->efx; - const char *type; - int number; - - number = channel->channel; - - if (number >= efx->xdp_channel_offset && - !WARN_ON_ONCE(!efx->n_xdp_channels)) { - type = "-xdp"; - number -= efx->xdp_channel_offset; - } else if (efx->tx_channel_offset == 0) { - type = ""; - } else if (number < efx->tx_channel_offset) { - type = "-rx"; - } else { - type = "-tx"; - number -= efx->tx_channel_offset; - } - snprintf(buf, len, "%s%s-%d", efx->name, type, number); -} - -static void efx_set_channel_names(struct efx_nic *efx) -{ - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) - channel->type->get_name(channel, - efx->msi_context[channel->channel].name, - sizeof(efx->msi_context[0].name)); -} - -static int efx_probe_channels(struct efx_nic *efx) -{ - struct efx_channel *channel; - int rc; - - /* Restart special buffer allocation */ - efx->next_buffer_table = 0; - - /* Probe channels in reverse, so that any 'extra' channels - * use the start of the buffer table. This allows the traffic - * channels to be resized without moving them or wasting the - * entries before them. - */ - efx_for_each_channel_rev(channel, efx) { - rc = efx_probe_channel(channel); - if (rc) { - netif_err(efx, probe, efx->net_dev, - "failed to create channel %d\n", - channel->channel); - goto fail; - } - } - efx_set_channel_names(efx); - - return 0; - -fail: - efx_remove_channels(efx); - return rc; -} - -/* Channels are shutdown and reinitialised whilst the NIC is running - * to propagate configuration changes (mtu, checksum offload), or - * to clear hardware error conditions - */ -static void efx_start_datapath(struct efx_nic *efx) -{ - netdev_features_t old_features = efx->net_dev->features; - bool old_rx_scatter = efx->rx_scatter; - struct efx_tx_queue *tx_queue; - struct efx_rx_queue *rx_queue; - struct efx_channel *channel; - size_t rx_buf_len; - - /* Calculate the rx buffer allocation parameters required to - * support the current MTU, including padding for header - * alignment and overruns. - */ - efx->rx_dma_len = (efx->rx_prefix_size + - EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + - efx->type->rx_buffer_padding); - rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM + - efx->rx_ip_align + efx->rx_dma_len); - if (rx_buf_len <= PAGE_SIZE) { - efx->rx_scatter = efx->type->always_rx_scatter; - efx->rx_buffer_order = 0; - } else if (efx->type->can_rx_scatter) { - BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES); - BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + - 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE, - EFX_RX_BUF_ALIGNMENT) > - PAGE_SIZE); - efx->rx_scatter = true; - efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; - efx->rx_buffer_order = 0; - } else { - efx->rx_scatter = false; - efx->rx_buffer_order = get_order(rx_buf_len); - } - - efx_rx_config_page_split(efx); - if (efx->rx_buffer_order) - netif_dbg(efx, drv, efx->net_dev, - "RX buf len=%u; page order=%u batch=%u\n", - efx->rx_dma_len, efx->rx_buffer_order, - efx->rx_pages_per_batch); - else - netif_dbg(efx, drv, efx->net_dev, - "RX buf len=%u step=%u bpp=%u; page batch=%u\n", - efx->rx_dma_len, efx->rx_page_buf_step, - efx->rx_bufs_per_page, efx->rx_pages_per_batch); - - /* Restore previously fixed features in hw_features and remove - * features which are fixed now - */ - efx->net_dev->hw_features |= efx->net_dev->features; - efx->net_dev->hw_features &= ~efx->fixed_features; - efx->net_dev->features |= efx->fixed_features; - if (efx->net_dev->features != old_features) - netdev_features_change(efx->net_dev); - - /* RX filters may also have scatter-enabled flags */ - if (efx->rx_scatter != old_rx_scatter) - efx->type->filter_update_rx_scatter(efx); - - /* We must keep at least one descriptor in a TX ring empty. - * We could avoid this when the queue size does not exactly - * match the hardware ring size, but it's not that important. - * Therefore we stop the queue when one more skb might fill - * the ring completely. We wake it when half way back to - * empty. - */ - efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); - efx->txq_wake_thresh = efx->txq_stop_thresh / 2; - - /* Initialise the channels */ - efx_for_each_channel(channel, efx) { - efx_for_each_channel_tx_queue(tx_queue, channel) { - efx_init_tx_queue(tx_queue); - atomic_inc(&efx->active_queues); - } - - efx_for_each_channel_rx_queue(rx_queue, channel) { - efx_init_rx_queue(rx_queue); - atomic_inc(&efx->active_queues); - efx_stop_eventq(channel); - efx_fast_push_rx_descriptors(rx_queue, false); - efx_start_eventq(channel); - } - - WARN_ON(channel->rx_pkt_n_frags); - } - - efx_ptp_start_datapath(efx); - - if (netif_device_present(efx->net_dev)) - netif_tx_wake_all_queues(efx->net_dev); -} - -static void efx_stop_datapath(struct efx_nic *efx) -{ - struct efx_channel *channel; - struct efx_tx_queue *tx_queue; - struct efx_rx_queue *rx_queue; - int rc; - - EFX_ASSERT_RESET_SERIALISED(efx); - BUG_ON(efx->port_enabled); - - efx_ptp_stop_datapath(efx); - - /* Stop RX refill */ - efx_for_each_channel(channel, efx) { - efx_for_each_channel_rx_queue(rx_queue, channel) - rx_queue->refill_enabled = false; - } - - efx_for_each_channel(channel, efx) { - /* RX packet processing is pipelined, so wait for the - * NAPI handler to complete. At least event queue 0 - * might be kept active by non-data events, so don't - * use napi_synchronize() but actually disable NAPI - * temporarily. - */ - if (efx_channel_has_rx_queue(channel)) { - efx_stop_eventq(channel); - efx_start_eventq(channel); - } - } - - rc = efx->type->fini_dmaq(efx); - if (rc) { - netif_err(efx, drv, efx->net_dev, "failed to flush queues\n"); - } else { - netif_dbg(efx, drv, efx->net_dev, - "successfully flushed all queues\n"); - } - - efx_for_each_channel(channel, efx) { - efx_for_each_channel_rx_queue(rx_queue, channel) - efx_fini_rx_queue(rx_queue); - efx_for_each_possible_channel_tx_queue(tx_queue, channel) - efx_fini_tx_queue(tx_queue); - } - efx->xdp_rxq_info_failed = false; -} - -static void efx_remove_channel(struct efx_channel *channel) -{ - struct efx_tx_queue *tx_queue; - struct efx_rx_queue *rx_queue; - - netif_dbg(channel->efx, drv, channel->efx->net_dev, - "destroy chan %d\n", channel->channel); - - efx_for_each_channel_rx_queue(rx_queue, channel) - efx_remove_rx_queue(rx_queue); - efx_for_each_possible_channel_tx_queue(tx_queue, channel) - efx_remove_tx_queue(tx_queue); - efx_remove_eventq(channel); - channel->type->post_remove(channel); -} - -static void efx_remove_channels(struct efx_nic *efx) -{ - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) - efx_remove_channel(channel); - - kfree(efx->xdp_tx_queues); -} - -int -efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) -{ - struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; - u32 old_rxq_entries, old_txq_entries; - unsigned i, next_buffer_table = 0; - int rc, rc2; - - rc = efx_check_disabled(efx); - if (rc) - return rc; - - /* Not all channels should be reallocated. We must avoid - * reallocating their buffer table entries. - */ - efx_for_each_channel(channel, efx) { - struct efx_rx_queue *rx_queue; - struct efx_tx_queue *tx_queue; - - if (channel->type->copy) - continue; - next_buffer_table = max(next_buffer_table, - channel->eventq.index + - channel->eventq.entries); - efx_for_each_channel_rx_queue(rx_queue, channel) - next_buffer_table = max(next_buffer_table, - rx_queue->rxd.index + - rx_queue->rxd.entries); - efx_for_each_channel_tx_queue(tx_queue, channel) - next_buffer_table = max(next_buffer_table, - tx_queue->txd.index + - tx_queue->txd.entries); - } - - efx_device_detach_sync(efx); - efx_stop_all(efx); - efx_soft_disable_interrupts(efx); - - /* Clone channels (where possible) */ - memset(other_channel, 0, sizeof(other_channel)); - for (i = 0; i < efx->n_channels; i++) { - channel = efx->channel[i]; - if (channel->type->copy) - channel = channel->type->copy(channel); - if (!channel) { - rc = -ENOMEM; - goto out; - } - other_channel[i] = channel; - } - - /* Swap entry counts and channel pointers */ - old_rxq_entries = efx->rxq_entries; - old_txq_entries = efx->txq_entries; - efx->rxq_entries = rxq_entries; - efx->txq_entries = txq_entries; - for (i = 0; i < efx->n_channels; i++) { - channel = efx->channel[i]; - efx->channel[i] = other_channel[i]; - other_channel[i] = channel; - } - - /* Restart buffer table allocation */ - efx->next_buffer_table = next_buffer_table; - - for (i = 0; i < efx->n_channels; i++) { - channel = efx->channel[i]; - if (!channel->type->copy) - continue; - rc = efx_probe_channel(channel); - if (rc) - goto rollback; - efx_init_napi_channel(efx->channel[i]); - } - -out: - /* Destroy unused channel structures */ - for (i = 0; i < efx->n_channels; i++) { - channel = other_channel[i]; - if (channel && channel->type->copy) { - efx_fini_napi_channel(channel); - efx_remove_channel(channel); - kfree(channel); - } - } - - rc2 = efx_soft_enable_interrupts(efx); - if (rc2) { - rc = rc ? rc : rc2; - netif_err(efx, drv, efx->net_dev, - "unable to restart interrupts on channel reallocation\n"); - efx_schedule_reset(efx, RESET_TYPE_DISABLE); - } else { - efx_start_all(efx); - efx_device_attach_if_not_resetting(efx); - } - return rc; - -rollback: - /* Swap back */ - efx->rxq_entries = old_rxq_entries; - efx->txq_entries = old_txq_entries; - for (i = 0; i < efx->n_channels; i++) { - channel = efx->channel[i]; - efx->channel[i] = other_channel[i]; - other_channel[i] = channel; - } - goto out; -} - -void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) -{ - mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10)); -} - -static bool efx_default_channel_want_txqs(struct efx_channel *channel) -{ - return channel->channel - channel->efx->tx_channel_offset < - channel->efx->n_tx_channels; -} - -static const struct efx_channel_type efx_default_channel_type = { - .pre_probe = efx_channel_dummy_op_int, - .post_remove = efx_channel_dummy_op_void, - .get_name = efx_get_channel_name, - .copy = efx_copy_channel, - .want_txqs = efx_default_channel_want_txqs, - .keep_eventq = false, - .want_pio = true, -}; - -int efx_channel_dummy_op_int(struct efx_channel *channel) -{ - return 0; -} - -void efx_channel_dummy_op_void(struct efx_channel *channel) -{ -} - /************************************************************************** * * Port handling * **************************************************************************/ -/* This ensures that the kernel is kept informed (via - * netif_carrier_on/off) of the link status, and also maintains the - * link status's stop on the port's TX queue. - */ -void efx_link_status_changed(struct efx_nic *efx) -{ - struct efx_link_state *link_state = &efx->link_state; - - /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure - * that no events are triggered between unregister_netdev() and the - * driver unloading. A more general condition is that NETDEV_CHANGE - * can only be generated between NETDEV_UP and NETDEV_DOWN */ - if (!netif_running(efx->net_dev)) - return; - - if (link_state->up != netif_carrier_ok(efx->net_dev)) { - efx->n_link_state_changes++; - - if (link_state->up) - netif_carrier_on(efx->net_dev); - else - netif_carrier_off(efx->net_dev); - } - - /* Status message for kernel log */ - if (link_state->up) - netif_info(efx, link, efx->net_dev, - "link up at %uMbps %s-duplex (MTU %d)\n", - link_state->speed, link_state->fd ? "full" : "half", - efx->net_dev->mtu); - else - netif_info(efx, link, efx->net_dev, "link down\n"); -} - -void efx_link_set_advertising(struct efx_nic *efx, - const unsigned long *advertising) -{ - memcpy(efx->link_advertising, advertising, - sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK())); - - efx->link_advertising[0] |= ADVERTISED_Autoneg; - if (advertising[0] & ADVERTISED_Pause) - efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX); - else - efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); - if (advertising[0] & ADVERTISED_Asym_Pause) - efx->wanted_fc ^= EFX_FC_TX; -} - /* Equivalent to efx_link_set_advertising with all-zeroes, except does not * force the Autoneg bit on. */ @@ -1035,73 +160,6 @@ void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc) static void efx_fini_port(struct efx_nic *efx); -/* We assume that efx->type->reconfigure_mac will always try to sync RX - * filters and therefore needs to read-lock the filter table against freeing - */ -void efx_mac_reconfigure(struct efx_nic *efx) -{ - down_read(&efx->filter_sem); - efx->type->reconfigure_mac(efx); - up_read(&efx->filter_sem); -} - -/* Push loopback/power/transmit disable settings to the PHY, and reconfigure - * the MAC appropriately. All other PHY configuration changes are pushed - * through phy_op->set_settings(), and pushed asynchronously to the MAC - * through efx_monitor(). - * - * Callers must hold the mac_lock - */ -int __efx_reconfigure_port(struct efx_nic *efx) -{ - enum efx_phy_mode phy_mode; - int rc; - - WARN_ON(!mutex_is_locked(&efx->mac_lock)); - - /* Disable PHY transmit in mac level loopbacks */ - phy_mode = efx->phy_mode; - if (LOOPBACK_INTERNAL(efx)) - efx->phy_mode |= PHY_MODE_TX_DISABLED; - else - efx->phy_mode &= ~PHY_MODE_TX_DISABLED; - - rc = efx->type->reconfigure_port(efx); - - if (rc) - efx->phy_mode = phy_mode; - - return rc; -} - -/* Reinitialise the MAC to pick up new PHY settings, even if the port is - * disabled. */ -int efx_reconfigure_port(struct efx_nic *efx) -{ - int rc; - - EFX_ASSERT_RESET_SERIALISED(efx); - - mutex_lock(&efx->mac_lock); - rc = __efx_reconfigure_port(efx); - mutex_unlock(&efx->mac_lock); - - return rc; -} - -/* Asynchronous work item for changing MAC promiscuity and multicast - * hash. Avoid a drain/rx_ingress enable by reconfiguring the current - * MAC directly. */ -static void efx_mac_work(struct work_struct *data) -{ - struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); - - mutex_lock(&efx->mac_lock); - if (efx->port_enabled) - efx_mac_reconfigure(efx); - mutex_unlock(&efx->mac_lock); -} - static int efx_probe_port(struct efx_nic *efx) { int rc; @@ -1155,44 +213,6 @@ fail1: return rc; } -static void efx_start_port(struct efx_nic *efx) -{ - netif_dbg(efx, ifup, efx->net_dev, "start port\n"); - BUG_ON(efx->port_enabled); - - mutex_lock(&efx->mac_lock); - efx->port_enabled = true; - - /* Ensure MAC ingress/egress is enabled */ - efx_mac_reconfigure(efx); - - mutex_unlock(&efx->mac_lock); -} - -/* Cancel work for MAC reconfiguration, periodic hardware monitoring - * and the async self-test, wait for them to finish and prevent them - * being scheduled again. This doesn't cover online resets, which - * should only be cancelled when removing the device. - */ -static void efx_stop_port(struct efx_nic *efx) -{ - netif_dbg(efx, ifdown, efx->net_dev, "stop port\n"); - - EFX_ASSERT_RESET_SERIALISED(efx); - - mutex_lock(&efx->mac_lock); - efx->port_enabled = false; - mutex_unlock(&efx->mac_lock); - - /* Serialise against efx_set_multicast_list() */ - netif_addr_lock_bh(efx->net_dev); - netif_addr_unlock_bh(efx->net_dev); - - cancel_delayed_work_sync(&efx->monitor_work); - efx_selftest_async_cancel(efx); - cancel_work_sync(&efx->mac_work); -} - static void efx_fini_port(struct efx_nic *efx) { netif_dbg(efx, drv, efx->net_dev, "shut down port\n"); @@ -1291,582 +311,6 @@ static void efx_dissociate(struct efx_nic *efx) } } -/* This configures the PCI device to enable I/O and DMA. */ -static int efx_init_io(struct efx_nic *efx) -{ - struct pci_dev *pci_dev = efx->pci_dev; - dma_addr_t dma_mask = efx->type->max_dma_mask; - unsigned int mem_map_size = efx->type->mem_map_size(efx); - int rc, bar; - - netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); - - bar = efx->type->mem_bar(efx); - - rc = pci_enable_device(pci_dev); - if (rc) { - netif_err(efx, probe, efx->net_dev, - "failed to enable PCI device\n"); - goto fail1; - } - - pci_set_master(pci_dev); - - /* Set the PCI DMA mask. Try all possibilities from our genuine mask - * down to 32 bits, because some architectures will allow 40 bit - * masks event though they reject 46 bit masks. - */ - while (dma_mask > 0x7fffffffUL) { - rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask); - if (rc == 0) - break; - dma_mask >>= 1; - } - if (rc) { - netif_err(efx, probe, efx->net_dev, - "could not find a suitable DMA mask\n"); - goto fail2; - } - netif_dbg(efx, probe, efx->net_dev, - "using DMA mask %llx\n", (unsigned long long) dma_mask); - - efx->membase_phys = pci_resource_start(efx->pci_dev, bar); - rc = pci_request_region(pci_dev, bar, "sfc"); - if (rc) { - netif_err(efx, probe, efx->net_dev, - "request for memory BAR failed\n"); - rc = -EIO; - goto fail3; - } - efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size); - if (!efx->membase) { - netif_err(efx, probe, efx->net_dev, - "could not map memory BAR at %llx+%x\n", - (unsigned long long)efx->membase_phys, mem_map_size); - rc = -ENOMEM; - goto fail4; - } - netif_dbg(efx, probe, efx->net_dev, - "memory BAR at %llx+%x (virtual %p)\n", - (unsigned long long)efx->membase_phys, mem_map_size, - efx->membase); - - return 0; - - fail4: - pci_release_region(efx->pci_dev, bar); - fail3: - efx->membase_phys = 0; - fail2: - pci_disable_device(efx->pci_dev); - fail1: - return rc; -} - -static void efx_fini_io(struct efx_nic *efx) -{ - int bar; - - netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); - - if (efx->membase) { - iounmap(efx->membase); - efx->membase = NULL; - } - - if (efx->membase_phys) { - bar = efx->type->mem_bar(efx); - pci_release_region(efx->pci_dev, bar); - efx->membase_phys = 0; - } - - /* Don't disable bus-mastering if VFs are assigned */ - if (!pci_vfs_assigned(efx->pci_dev)) - pci_disable_device(efx->pci_dev); -} - -void efx_set_default_rx_indir_table(struct efx_nic *efx, - struct efx_rss_context *ctx) -{ - size_t i; - - for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) - ctx->rx_indir_table[i] = - ethtool_rxfh_indir_default(i, efx->rss_spread); -} - -static unsigned int efx_wanted_parallelism(struct efx_nic *efx) -{ - cpumask_var_t thread_mask; - unsigned int count; - int cpu; - - if (rss_cpus) { - count = rss_cpus; - } else { - if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { - netif_warn(efx, probe, efx->net_dev, - "RSS disabled due to allocation failure\n"); - return 1; - } - - count = 0; - for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, thread_mask)) { - ++count; - cpumask_or(thread_mask, thread_mask, - topology_sibling_cpumask(cpu)); - } - } - - free_cpumask_var(thread_mask); - } - - if (count > EFX_MAX_RX_QUEUES) { - netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn, - "Reducing number of rx queues from %u to %u.\n", - count, EFX_MAX_RX_QUEUES); - count = EFX_MAX_RX_QUEUES; - } - - /* If RSS is requested for the PF *and* VFs then we can't write RSS - * table entries that are inaccessible to VFs - */ -#ifdef CONFIG_SFC_SRIOV - if (efx->type->sriov_wanted) { - if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 && - count > efx_vf_size(efx)) { - netif_warn(efx, probe, efx->net_dev, - "Reducing number of RSS channels from %u to %u for " - "VF support. Increase vf-msix-limit to use more " - "channels on the PF.\n", - count, efx_vf_size(efx)); - count = efx_vf_size(efx); - } - } -#endif - - return count; -} - -static int efx_allocate_msix_channels(struct efx_nic *efx, - unsigned int max_channels, - unsigned int extra_channels, - unsigned int parallelism) -{ - unsigned int n_channels = parallelism; - int vec_count; - int n_xdp_tx; - int n_xdp_ev; - - if (efx_separate_tx_channels) - n_channels *= 2; - n_channels += extra_channels; - - /* To allow XDP transmit to happen from arbitrary NAPI contexts - * we allocate a TX queue per CPU. We share event queues across - * multiple tx queues, assuming tx and ev queues are both - * maximum size. - */ - - n_xdp_tx = num_possible_cpus(); - n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES); - - vec_count = pci_msix_vec_count(efx->pci_dev); - if (vec_count < 0) - return vec_count; - - max_channels = min_t(unsigned int, vec_count, max_channels); - - /* Check resources. - * We need a channel per event queue, plus a VI per tx queue. - * This may be more pessimistic than it needs to be. - */ - if (n_channels + n_xdp_ev > max_channels) { - netif_err(efx, drv, efx->net_dev, - "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", - n_xdp_ev, n_channels, max_channels); - efx->n_xdp_channels = 0; - efx->xdp_tx_per_channel = 0; - efx->xdp_tx_queue_count = 0; - } else { - efx->n_xdp_channels = n_xdp_ev; - efx->xdp_tx_per_channel = EFX_TXQ_TYPES; - efx->xdp_tx_queue_count = n_xdp_tx; - n_channels += n_xdp_ev; - netif_dbg(efx, drv, efx->net_dev, - "Allocating %d TX and %d event queues for XDP\n", - n_xdp_tx, n_xdp_ev); - } - - if (vec_count < n_channels) { - netif_err(efx, drv, efx->net_dev, - "WARNING: Insufficient MSI-X vectors available (%d < %u).\n", - vec_count, n_channels); - netif_err(efx, drv, efx->net_dev, - "WARNING: Performance may be reduced.\n"); - n_channels = vec_count; - } - - n_channels = min(n_channels, max_channels); - - efx->n_channels = n_channels; - - /* Ignore XDP tx channels when creating rx channels. */ - n_channels -= efx->n_xdp_channels; - - if (efx_separate_tx_channels) { - efx->n_tx_channels = - min(max(n_channels / 2, 1U), - efx->max_tx_channels); - efx->tx_channel_offset = - n_channels - efx->n_tx_channels; - efx->n_rx_channels = - max(n_channels - - efx->n_tx_channels, 1U); - } else { - efx->n_tx_channels = min(n_channels, efx->max_tx_channels); - efx->tx_channel_offset = 0; - efx->n_rx_channels = n_channels; - } - - efx->n_rx_channels = min(efx->n_rx_channels, parallelism); - efx->n_tx_channels = min(efx->n_tx_channels, parallelism); - - efx->xdp_channel_offset = n_channels; - - netif_dbg(efx, drv, efx->net_dev, - "Allocating %u RX channels\n", - efx->n_rx_channels); - - return efx->n_channels; -} - -/* Probe the number and type of interrupts we are able to obtain, and - * the resulting numbers of channels and RX queues. - */ -static int efx_probe_interrupts(struct efx_nic *efx) -{ - unsigned int extra_channels = 0; - unsigned int rss_spread; - unsigned int i, j; - int rc; - - for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) - if (efx->extra_channel_type[i]) - ++extra_channels; - - if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { - unsigned int parallelism = efx_wanted_parallelism(efx); - struct msix_entry xentries[EFX_MAX_CHANNELS]; - unsigned int n_channels; - - rc = efx_allocate_msix_channels(efx, efx->max_channels, - extra_channels, parallelism); - if (rc >= 0) { - n_channels = rc; - for (i = 0; i < n_channels; i++) - xentries[i].entry = i; - rc = pci_enable_msix_range(efx->pci_dev, xentries, 1, - n_channels); - } - if (rc < 0) { - /* Fall back to single channel MSI */ - netif_err(efx, drv, efx->net_dev, - "could not enable MSI-X\n"); - if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI) - efx->interrupt_mode = EFX_INT_MODE_MSI; - else - return rc; - } else if (rc < n_channels) { - netif_err(efx, drv, efx->net_dev, - "WARNING: Insufficient MSI-X vectors" - " available (%d < %u).\n", rc, n_channels); - netif_err(efx, drv, efx->net_dev, - "WARNING: Performance may be reduced.\n"); - n_channels = rc; - } - - if (rc > 0) { - for (i = 0; i < efx->n_channels; i++) - efx_get_channel(efx, i)->irq = - xentries[i].vector; - } - } - - /* Try single interrupt MSI */ - if (efx->interrupt_mode == EFX_INT_MODE_MSI) { - efx->n_channels = 1; - efx->n_rx_channels = 1; - efx->n_tx_channels = 1; - efx->n_xdp_channels = 0; - efx->xdp_channel_offset = efx->n_channels; - rc = pci_enable_msi(efx->pci_dev); - if (rc == 0) { - efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; - } else { - netif_err(efx, drv, efx->net_dev, - "could not enable MSI\n"); - if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY) - efx->interrupt_mode = EFX_INT_MODE_LEGACY; - else - return rc; - } - } - - /* Assume legacy interrupts */ - if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) { - efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0); - efx->n_rx_channels = 1; - efx->n_tx_channels = 1; - efx->n_xdp_channels = 0; - efx->xdp_channel_offset = efx->n_channels; - efx->legacy_irq = efx->pci_dev->irq; - } - - /* Assign extra channels if possible, before XDP channels */ - efx->n_extra_tx_channels = 0; - j = efx->xdp_channel_offset; - for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) { - if (!efx->extra_channel_type[i]) - continue; - if (j <= efx->tx_channel_offset + efx->n_tx_channels) { - efx->extra_channel_type[i]->handle_no_channel(efx); - } else { - --j; - efx_get_channel(efx, j)->type = - efx->extra_channel_type[i]; - if (efx_channel_has_tx_queues(efx_get_channel(efx, j))) - efx->n_extra_tx_channels++; - } - } - - rss_spread = efx->n_rx_channels; - /* RSS might be usable on VFs even if it is disabled on the PF */ -#ifdef CONFIG_SFC_SRIOV - if (efx->type->sriov_wanted) { - efx->rss_spread = ((rss_spread > 1 || - !efx->type->sriov_wanted(efx)) ? - rss_spread : efx_vf_size(efx)); - return 0; - } -#endif - efx->rss_spread = rss_spread; - - return 0; -} - -#if defined(CONFIG_SMP) -static void efx_set_interrupt_affinity(struct efx_nic *efx) -{ - struct efx_channel *channel; - unsigned int cpu; - - efx_for_each_channel(channel, efx) { - cpu = cpumask_local_spread(channel->channel, - pcibus_to_node(efx->pci_dev->bus)); - irq_set_affinity_hint(channel->irq, cpumask_of(cpu)); - } -} - -static void efx_clear_interrupt_affinity(struct efx_nic *efx) -{ - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) - irq_set_affinity_hint(channel->irq, NULL); -} -#else -static void -efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) -{ -} - -static void -efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) -{ -} -#endif /* CONFIG_SMP */ - -static int efx_soft_enable_interrupts(struct efx_nic *efx) -{ - struct efx_channel *channel, *end_channel; - int rc; - - BUG_ON(efx->state == STATE_DISABLED); - - efx->irq_soft_enabled = true; - smp_wmb(); - - efx_for_each_channel(channel, efx) { - if (!channel->type->keep_eventq) { - rc = efx_init_eventq(channel); - if (rc) - goto fail; - } - efx_start_eventq(channel); - } - - efx_mcdi_mode_event(efx); - - return 0; -fail: - end_channel = channel; - efx_for_each_channel(channel, efx) { - if (channel == end_channel) - break; - efx_stop_eventq(channel); - if (!channel->type->keep_eventq) - efx_fini_eventq(channel); - } - - return rc; -} - -static void efx_soft_disable_interrupts(struct efx_nic *efx) -{ - struct efx_channel *channel; - - if (efx->state == STATE_DISABLED) - return; - - efx_mcdi_mode_poll(efx); - - efx->irq_soft_enabled = false; - smp_wmb(); - - if (efx->legacy_irq) - synchronize_irq(efx->legacy_irq); - - efx_for_each_channel(channel, efx) { - if (channel->irq) - synchronize_irq(channel->irq); - - efx_stop_eventq(channel); - if (!channel->type->keep_eventq) - efx_fini_eventq(channel); - } - - /* Flush the asynchronous MCDI request queue */ - efx_mcdi_flush_async(efx); -} - -static int efx_enable_interrupts(struct efx_nic *efx) -{ - struct efx_channel *channel, *end_channel; - int rc; - - BUG_ON(efx->state == STATE_DISABLED); - - if (efx->eeh_disabled_legacy_irq) { - enable_irq(efx->legacy_irq); - efx->eeh_disabled_legacy_irq = false; - } - - efx->type->irq_enable_master(efx); - - efx_for_each_channel(channel, efx) { - if (channel->type->keep_eventq) { - rc = efx_init_eventq(channel); - if (rc) - goto fail; - } - } - - rc = efx_soft_enable_interrupts(efx); - if (rc) - goto fail; - - return 0; - -fail: - end_channel = channel; - efx_for_each_channel(channel, efx) { - if (channel == end_channel) - break; - if (channel->type->keep_eventq) - efx_fini_eventq(channel); - } - - efx->type->irq_disable_non_ev(efx); - - return rc; -} - -static void efx_disable_interrupts(struct efx_nic *efx) -{ - struct efx_channel *channel; - - efx_soft_disable_interrupts(efx); - - efx_for_each_channel(channel, efx) { - if (channel->type->keep_eventq) - efx_fini_eventq(channel); - } - - efx->type->irq_disable_non_ev(efx); -} - -static void efx_remove_interrupts(struct efx_nic *efx) -{ - struct efx_channel *channel; - - /* Remove MSI/MSI-X interrupts */ - efx_for_each_channel(channel, efx) - channel->irq = 0; - pci_disable_msi(efx->pci_dev); - pci_disable_msix(efx->pci_dev); - - /* Remove legacy interrupt */ - efx->legacy_irq = 0; -} - -static int efx_set_channels(struct efx_nic *efx) -{ - struct efx_channel *channel; - struct efx_tx_queue *tx_queue; - int xdp_queue_number; - - efx->tx_channel_offset = - efx_separate_tx_channels ? - efx->n_channels - efx->n_tx_channels : 0; - - if (efx->xdp_tx_queue_count) { - EFX_WARN_ON_PARANOID(efx->xdp_tx_queues); - - /* Allocate array for XDP TX queue lookup. */ - efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count, - sizeof(*efx->xdp_tx_queues), - GFP_KERNEL); - if (!efx->xdp_tx_queues) - return -ENOMEM; - } - - /* We need to mark which channels really have RX and TX - * queues, and adjust the TX queue numbers if we have separate - * RX-only and TX-only channels. - */ - xdp_queue_number = 0; - efx_for_each_channel(channel, efx) { - if (channel->channel < efx->n_rx_channels) - channel->rx_queue.core_index = channel->channel; - else - channel->rx_queue.core_index = -1; - - efx_for_each_channel_tx_queue(tx_queue, channel) { - tx_queue->queue -= (efx->tx_channel_offset * - EFX_TXQ_TYPES); - - if (efx_channel_is_xdp_tx(channel) && - xdp_queue_number < efx->xdp_tx_queue_count) { - efx->xdp_tx_queues[xdp_queue_number] = tx_queue; - xdp_queue_number++; - } - } - } - return 0; -} - static int efx_probe_nic(struct efx_nic *efx) { int rc; @@ -1939,70 +383,6 @@ static void efx_remove_nic(struct efx_nic *efx) efx->type->remove(efx); } -static int efx_probe_filters(struct efx_nic *efx) -{ - int rc; - - init_rwsem(&efx->filter_sem); - mutex_lock(&efx->mac_lock); - down_write(&efx->filter_sem); - rc = efx->type->filter_table_probe(efx); - if (rc) - goto out_unlock; - -#ifdef CONFIG_RFS_ACCEL - if (efx->type->offload_features & NETIF_F_NTUPLE) { - struct efx_channel *channel; - int i, success = 1; - - efx_for_each_channel(channel, efx) { - channel->rps_flow_id = - kcalloc(efx->type->max_rx_ip_filters, - sizeof(*channel->rps_flow_id), - GFP_KERNEL); - if (!channel->rps_flow_id) - success = 0; - else - for (i = 0; - i < efx->type->max_rx_ip_filters; - ++i) - channel->rps_flow_id[i] = - RPS_FLOW_ID_INVALID; - channel->rfs_expire_index = 0; - channel->rfs_filter_count = 0; - } - - if (!success) { - efx_for_each_channel(channel, efx) - kfree(channel->rps_flow_id); - efx->type->filter_table_remove(efx); - rc = -ENOMEM; - goto out_unlock; - } - } -#endif -out_unlock: - up_write(&efx->filter_sem); - mutex_unlock(&efx->mac_lock); - return rc; -} - -static void efx_remove_filters(struct efx_nic *efx) -{ -#ifdef CONFIG_RFS_ACCEL - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) { - cancel_delayed_work_sync(&channel->filter_work); - kfree(channel->rps_flow_id); - } -#endif - down_write(&efx->filter_sem); - efx->type->filter_table_remove(efx); - up_write(&efx->filter_sem); -} - - /************************************************************************** * * NIC startup/shutdown @@ -2067,81 +447,6 @@ static int efx_probe_all(struct efx_nic *efx) return rc; } -/* If the interface is supposed to be running but is not, start - * the hardware and software data path, regular activity for the port - * (MAC statistics, link polling, etc.) and schedule the port to be - * reconfigured. Interrupts must already be enabled. This function - * is safe to call multiple times, so long as the NIC is not disabled. - * Requires the RTNL lock. - */ -static void efx_start_all(struct efx_nic *efx) -{ - EFX_ASSERT_RESET_SERIALISED(efx); - BUG_ON(efx->state == STATE_DISABLED); - - /* Check that it is appropriate to restart the interface. All - * of these flags are safe to read under just the rtnl lock */ - if (efx->port_enabled || !netif_running(efx->net_dev) || - efx->reset_pending) - return; - - efx_start_port(efx); - efx_start_datapath(efx); - - /* Start the hardware monitor if there is one */ - if (efx->type->monitor != NULL) - queue_delayed_work(efx->workqueue, &efx->monitor_work, - efx_monitor_interval); - - /* Link state detection is normally event-driven; we have - * to poll now because we could have missed a change - */ - mutex_lock(&efx->mac_lock); - if (efx->phy_op->poll(efx)) - efx_link_status_changed(efx); - mutex_unlock(&efx->mac_lock); - - efx->type->start_stats(efx); - efx->type->pull_stats(efx); - spin_lock_bh(&efx->stats_lock); - efx->type->update_stats(efx, NULL, NULL); - spin_unlock_bh(&efx->stats_lock); -} - -/* Quiesce the hardware and software data path, and regular activity - * for the port without bringing the link down. Safe to call multiple - * times with the NIC in almost any state, but interrupts should be - * enabled. Requires the RTNL lock. - */ -static void efx_stop_all(struct efx_nic *efx) -{ - EFX_ASSERT_RESET_SERIALISED(efx); - - /* port_enabled can be read safely under the rtnl lock */ - if (!efx->port_enabled) - return; - - /* update stats before we go down so we can accurately count - * rx_nodesc_drops - */ - efx->type->pull_stats(efx); - spin_lock_bh(&efx->stats_lock); - efx->type->update_stats(efx, NULL, NULL); - spin_unlock_bh(&efx->stats_lock); - efx->type->stop_stats(efx); - efx_stop_port(efx); - - /* Stop the kernel transmit interface. This is only valid if - * the device is stopped or detached; otherwise the watchdog - * may fire immediately. - */ - WARN_ON(netif_running(efx->net_dev) && - netif_device_present(efx->net_dev)); - netif_tx_disable(efx->net_dev); - - efx_stop_datapath(efx); -} - static void efx_remove_all(struct efx_nic *efx) { rtnl_lock(); @@ -2237,36 +542,6 @@ void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs, /************************************************************************** * - * Hardware monitor - * - **************************************************************************/ - -/* Run periodically off the general workqueue */ -static void efx_monitor(struct work_struct *data) -{ - struct efx_nic *efx = container_of(data, struct efx_nic, - monitor_work.work); - - netif_vdbg(efx, timer, efx->net_dev, - "hardware monitor executing on CPU %d\n", - raw_smp_processor_id()); - BUG_ON(efx->type->monitor == NULL); - - /* If the mac_lock is already held then it is likely a port - * reconfiguration is already in place, which will likely do - * most of the work of monitor() anyway. */ - if (mutex_trylock(&efx->mac_lock)) { - if (efx->port_enabled) - efx->type->monitor(efx); - mutex_unlock(&efx->mac_lock); - } - - queue_delayed_work(efx->workqueue, &efx->monitor_work, - efx_monitor_interval); -} - -/************************************************************************** - * * ioctls * *************************************************************************/ @@ -2294,45 +569,6 @@ static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) /************************************************************************** * - * NAPI interface - * - **************************************************************************/ - -static void efx_init_napi_channel(struct efx_channel *channel) -{ - struct efx_nic *efx = channel->efx; - - channel->napi_dev = efx->net_dev; - netif_napi_add(channel->napi_dev, &channel->napi_str, - efx_poll, napi_weight); -} - -static void efx_init_napi(struct efx_nic *efx) -{ - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) - efx_init_napi_channel(channel); -} - -static void efx_fini_napi_channel(struct efx_channel *channel) -{ - if (channel->napi_dev) - netif_napi_del(&channel->napi_str); - - channel->napi_dev = NULL; -} - -static void efx_fini_napi(struct efx_nic *efx) -{ - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) - efx_fini_napi_channel(channel); -} - -/************************************************************************** - * * Kernel net device interface * *************************************************************************/ @@ -2382,19 +618,8 @@ int efx_net_stop(struct net_device *net_dev) return 0; } -/* Context: process, dev_base_lock or RTNL held, non-blocking. */ -static void efx_net_stats(struct net_device *net_dev, - struct rtnl_link_stats64 *stats) -{ - struct efx_nic *efx = netdev_priv(net_dev); - - spin_lock_bh(&efx->stats_lock); - efx->type->update_stats(efx, NULL, stats); - spin_unlock_bh(&efx->stats_lock); -} - /* Context: netif_tx_lock held, BHs disabled. */ -static void efx_watchdog(struct net_device *net_dev) +static void efx_watchdog(struct net_device *net_dev, unsigned int txqueue) { struct efx_nic *efx = netdev_priv(net_dev); @@ -2405,51 +630,6 @@ static void efx_watchdog(struct net_device *net_dev) efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG); } -static unsigned int efx_xdp_max_mtu(struct efx_nic *efx) -{ - /* The maximum MTU that we can fit in a single page, allowing for - * framing, overhead and XDP headroom. - */ - int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + - efx->rx_prefix_size + efx->type->rx_buffer_padding + - efx->rx_ip_align + XDP_PACKET_HEADROOM; - - return PAGE_SIZE - overhead; -} - -/* Context: process, rtnl_lock() held. */ -static int efx_change_mtu(struct net_device *net_dev, int new_mtu) -{ - struct efx_nic *efx = netdev_priv(net_dev); - int rc; - - rc = efx_check_disabled(efx); - if (rc) - return rc; - - if (rtnl_dereference(efx->xdp_prog) && - new_mtu > efx_xdp_max_mtu(efx)) { - netif_err(efx, drv, efx->net_dev, - "Requested MTU of %d too big for XDP (max: %d)\n", - new_mtu, efx_xdp_max_mtu(efx)); - return -EINVAL; - } - - netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); - - efx_device_detach_sync(efx); - efx_stop_all(efx); - - mutex_lock(&efx->mac_lock); - net_dev->mtu = new_mtu; - efx_mac_reconfigure(efx); - mutex_unlock(&efx->mac_lock); - - efx_start_all(efx); - efx_device_attach_if_not_resetting(efx); - return 0; -} - static int efx_set_mac_address(struct net_device *net_dev, void *data) { struct efx_nic *efx = netdev_priv(net_dev); @@ -2726,28 +906,6 @@ show_phy_type(struct device *dev, struct device_attribute *attr, char *buf) } static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL); -#ifdef CONFIG_SFC_MCDI_LOGGING -static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct efx_nic *efx = dev_get_drvdata(dev); - struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - - return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled); -} -static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct efx_nic *efx = dev_get_drvdata(dev); - struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - bool enable = count > 0 && *buf != '0'; - - mcdi->logging_enabled = enable; - return count; -} -static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log); -#endif - static int efx_register_netdev(struct efx_nic *efx) { struct net_device *net_dev = efx->net_dev; @@ -2807,21 +965,11 @@ static int efx_register_netdev(struct efx_nic *efx) "failed to init net dev attributes\n"); goto fail_registered; } -#ifdef CONFIG_SFC_MCDI_LOGGING - rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); - if (rc) { - netif_err(efx, drv, efx->net_dev, - "failed to init net dev attributes\n"); - goto fail_attr_mcdi_logging; - } -#endif + + efx_init_mcdi_logging(efx); return 0; -#ifdef CONFIG_SFC_MCDI_LOGGING -fail_attr_mcdi_logging: - device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); -#endif fail_registered: rtnl_lock(); efx_dissociate(efx); @@ -2842,9 +990,7 @@ static void efx_unregister_netdev(struct efx_nic *efx) if (efx_dev_registered(efx)) { strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); -#ifdef CONFIG_SFC_MCDI_LOGGING - device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); -#endif + efx_fini_mcdi_logging(efx); device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); unregister_netdev(efx->net_dev); } @@ -2852,292 +998,6 @@ static void efx_unregister_netdev(struct efx_nic *efx) /************************************************************************** * - * Device reset and suspend - * - **************************************************************************/ - -/* Tears down the entire software state and most of the hardware state - * before reset. */ -void efx_reset_down(struct efx_nic *efx, enum reset_type method) -{ - EFX_ASSERT_RESET_SERIALISED(efx); - - if (method == RESET_TYPE_MCDI_TIMEOUT) - efx->type->prepare_flr(efx); - - efx_stop_all(efx); - efx_disable_interrupts(efx); - - mutex_lock(&efx->mac_lock); - down_write(&efx->filter_sem); - mutex_lock(&efx->rss_lock); - if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && - method != RESET_TYPE_DATAPATH) - efx->phy_op->fini(efx); - efx->type->fini(efx); -} - -/* This function will always ensure that the locks acquired in - * efx_reset_down() are released. A failure return code indicates - * that we were unable to reinitialise the hardware, and the - * driver should be disabled. If ok is false, then the rx and tx - * engines are not restarted, pending a RESET_DISABLE. */ -int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) -{ - int rc; - - EFX_ASSERT_RESET_SERIALISED(efx); - - if (method == RESET_TYPE_MCDI_TIMEOUT) - efx->type->finish_flr(efx); - - /* Ensure that SRAM is initialised even if we're disabling the device */ - rc = efx->type->init(efx); - if (rc) { - netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); - goto fail; - } - - if (!ok) - goto fail; - - if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && - method != RESET_TYPE_DATAPATH) { - rc = efx->phy_op->init(efx); - if (rc) - goto fail; - rc = efx->phy_op->reconfigure(efx); - if (rc && rc != -EPERM) - netif_err(efx, drv, efx->net_dev, - "could not restore PHY settings\n"); - } - - rc = efx_enable_interrupts(efx); - if (rc) - goto fail; - -#ifdef CONFIG_SFC_SRIOV - rc = efx->type->vswitching_restore(efx); - if (rc) /* not fatal; the PF will still work fine */ - netif_warn(efx, probe, efx->net_dev, - "failed to restore vswitching rc=%d;" - " VFs may not function\n", rc); -#endif - - if (efx->type->rx_restore_rss_contexts) - efx->type->rx_restore_rss_contexts(efx); - mutex_unlock(&efx->rss_lock); - efx->type->filter_table_restore(efx); - up_write(&efx->filter_sem); - if (efx->type->sriov_reset) - efx->type->sriov_reset(efx); - - mutex_unlock(&efx->mac_lock); - - efx_start_all(efx); - - if (efx->type->udp_tnl_push_ports) - efx->type->udp_tnl_push_ports(efx); - - return 0; - -fail: - efx->port_initialized = false; - - mutex_unlock(&efx->rss_lock); - up_write(&efx->filter_sem); - mutex_unlock(&efx->mac_lock); - - return rc; -} - -/* Reset the NIC using the specified method. Note that the reset may - * fail, in which case the card will be left in an unusable state. - * - * Caller must hold the rtnl_lock. - */ -int efx_reset(struct efx_nic *efx, enum reset_type method) -{ - int rc, rc2; - bool disabled; - - netif_info(efx, drv, efx->net_dev, "resetting (%s)\n", - RESET_TYPE(method)); - - efx_device_detach_sync(efx); - efx_reset_down(efx, method); - - rc = efx->type->reset(efx, method); - if (rc) { - netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); - goto out; - } - - /* Clear flags for the scopes we covered. We assume the NIC and - * driver are now quiescent so that there is no race here. - */ - if (method < RESET_TYPE_MAX_METHOD) - efx->reset_pending &= -(1 << (method + 1)); - else /* it doesn't fit into the well-ordered scope hierarchy */ - __clear_bit(method, &efx->reset_pending); - - /* Reinitialise bus-mastering, which may have been turned off before - * the reset was scheduled. This is still appropriate, even in the - * RESET_TYPE_DISABLE since this driver generally assumes the hardware - * can respond to requests. */ - pci_set_master(efx->pci_dev); - -out: - /* Leave device stopped if necessary */ - disabled = rc || - method == RESET_TYPE_DISABLE || - method == RESET_TYPE_RECOVER_OR_DISABLE; - rc2 = efx_reset_up(efx, method, !disabled); - if (rc2) { - disabled = true; - if (!rc) - rc = rc2; - } - - if (disabled) { - dev_close(efx->net_dev); - netif_err(efx, drv, efx->net_dev, "has been disabled\n"); - efx->state = STATE_DISABLED; - } else { - netif_dbg(efx, drv, efx->net_dev, "reset complete\n"); - efx_device_attach_if_not_resetting(efx); - } - return rc; -} - -/* Try recovery mechanisms. - * For now only EEH is supported. - * Returns 0 if the recovery mechanisms are unsuccessful. - * Returns a non-zero value otherwise. - */ -int efx_try_recovery(struct efx_nic *efx) -{ -#ifdef CONFIG_EEH - /* A PCI error can occur and not be seen by EEH because nothing - * happens on the PCI bus. In this case the driver may fail and - * schedule a 'recover or reset', leading to this recovery handler. - * Manually call the eeh failure check function. - */ - struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev); - if (eeh_dev_check_failure(eehdev)) { - /* The EEH mechanisms will handle the error and reset the - * device if necessary. - */ - return 1; - } -#endif - return 0; -} - -static void efx_wait_for_bist_end(struct efx_nic *efx) -{ - int i; - - for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) { - if (efx_mcdi_poll_reboot(efx)) - goto out; - msleep(BIST_WAIT_DELAY_MS); - } - - netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n"); -out: - /* Either way unset the BIST flag. If we found no reboot we probably - * won't recover, but we should try. - */ - efx->mc_bist_for_other_fn = false; -} - -/* The worker thread exists so that code that cannot sleep can - * schedule a reset for later. - */ -static void efx_reset_work(struct work_struct *data) -{ - struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); - unsigned long pending; - enum reset_type method; - - pending = READ_ONCE(efx->reset_pending); - method = fls(pending) - 1; - - if (method == RESET_TYPE_MC_BIST) - efx_wait_for_bist_end(efx); - - if ((method == RESET_TYPE_RECOVER_OR_DISABLE || - method == RESET_TYPE_RECOVER_OR_ALL) && - efx_try_recovery(efx)) - return; - - if (!pending) - return; - - rtnl_lock(); - - /* We checked the state in efx_schedule_reset() but it may - * have changed by now. Now that we have the RTNL lock, - * it cannot change again. - */ - if (efx->state == STATE_READY) - (void)efx_reset(efx, method); - - rtnl_unlock(); -} - -void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) -{ - enum reset_type method; - - if (efx->state == STATE_RECOVERY) { - netif_dbg(efx, drv, efx->net_dev, - "recovering: skip scheduling %s reset\n", - RESET_TYPE(type)); - return; - } - - switch (type) { - case RESET_TYPE_INVISIBLE: - case RESET_TYPE_ALL: - case RESET_TYPE_RECOVER_OR_ALL: - case RESET_TYPE_WORLD: - case RESET_TYPE_DISABLE: - case RESET_TYPE_RECOVER_OR_DISABLE: - case RESET_TYPE_DATAPATH: - case RESET_TYPE_MC_BIST: - case RESET_TYPE_MCDI_TIMEOUT: - method = type; - netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", - RESET_TYPE(method)); - break; - default: - method = efx->type->map_reset_reason(type); - netif_dbg(efx, drv, efx->net_dev, - "scheduling %s reset for %s\n", - RESET_TYPE(method), RESET_TYPE(type)); - break; - } - - set_bit(method, &efx->reset_pending); - smp_mb(); /* ensure we change reset_pending before checking state */ - - /* If we're not READY then just leave the flags set as the cue - * to abort probing or reschedule the reset later. - */ - if (READ_ONCE(efx->state) != STATE_READY) - return; - - /* efx_process_channel() will no longer read events once a - * reset is scheduled. So switch back to poll'd MCDI completions. */ - efx_mcdi_mode_poll(efx); - - queue_work(reset_workqueue, &efx->reset_work); -} - -/************************************************************************** - * * List of NICs we support * **************************************************************************/ @@ -3169,139 +1029,10 @@ static const struct pci_device_id efx_pci_table[] = { /************************************************************************** * - * Dummy PHY/MAC operations - * - * Can be used for some unimplemented operations - * Needed so all function pointers are valid and do not have to be tested - * before use - * - **************************************************************************/ -int efx_port_dummy_op_int(struct efx_nic *efx) -{ - return 0; -} -void efx_port_dummy_op_void(struct efx_nic *efx) {} - -static bool efx_port_dummy_op_poll(struct efx_nic *efx) -{ - return false; -} - -static const struct efx_phy_operations efx_dummy_phy_operations = { - .init = efx_port_dummy_op_int, - .reconfigure = efx_port_dummy_op_int, - .poll = efx_port_dummy_op_poll, - .fini = efx_port_dummy_op_void, -}; - -/************************************************************************** - * * Data housekeeping * **************************************************************************/ -/* This zeroes out and then fills in the invariants in a struct - * efx_nic (including all sub-structures). - */ -static int efx_init_struct(struct efx_nic *efx, - struct pci_dev *pci_dev, struct net_device *net_dev) -{ - int rc = -ENOMEM, i; - - /* Initialise common structures */ - INIT_LIST_HEAD(&efx->node); - INIT_LIST_HEAD(&efx->secondary_list); - spin_lock_init(&efx->biu_lock); -#ifdef CONFIG_SFC_MTD - INIT_LIST_HEAD(&efx->mtd_list); -#endif - INIT_WORK(&efx->reset_work, efx_reset_work); - INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); - INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work); - efx->pci_dev = pci_dev; - efx->msg_enable = debug; - efx->state = STATE_UNINIT; - strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); - - efx->net_dev = net_dev; - efx->rx_prefix_size = efx->type->rx_prefix_size; - efx->rx_ip_align = - NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0; - efx->rx_packet_hash_offset = - efx->type->rx_hash_offset - efx->type->rx_prefix_size; - efx->rx_packet_ts_offset = - efx->type->rx_ts_offset - efx->type->rx_prefix_size; - INIT_LIST_HEAD(&efx->rss_context.list); - mutex_init(&efx->rss_lock); - spin_lock_init(&efx->stats_lock); - efx->vi_stride = EFX_DEFAULT_VI_STRIDE; - efx->num_mac_stats = MC_CMD_MAC_NSTATS; - BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); - mutex_init(&efx->mac_lock); -#ifdef CONFIG_RFS_ACCEL - mutex_init(&efx->rps_mutex); - spin_lock_init(&efx->rps_hash_lock); - /* Failure to allocate is not fatal, but may degrade ARFS performance */ - efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, - sizeof(*efx->rps_hash_table), GFP_KERNEL); -#endif - efx->phy_op = &efx_dummy_phy_operations; - efx->mdio.dev = net_dev; - INIT_WORK(&efx->mac_work, efx_mac_work); - init_waitqueue_head(&efx->flush_wq); - - for (i = 0; i < EFX_MAX_CHANNELS; i++) { - efx->channel[i] = efx_alloc_channel(efx, i, NULL); - if (!efx->channel[i]) - goto fail; - efx->msi_context[i].efx = efx; - efx->msi_context[i].index = i; - } - - /* Higher numbered interrupt modes are less capable! */ - if (WARN_ON_ONCE(efx->type->max_interrupt_mode > - efx->type->min_interrupt_mode)) { - rc = -EIO; - goto fail; - } - efx->interrupt_mode = max(efx->type->max_interrupt_mode, - interrupt_mode); - efx->interrupt_mode = min(efx->type->min_interrupt_mode, - interrupt_mode); - - /* Would be good to use the net_dev name, but we're too early */ - snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", - pci_name(pci_dev)); - efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); - if (!efx->workqueue) - goto fail; - - return 0; - -fail: - efx_fini_struct(efx); - return rc; -} - -static void efx_fini_struct(struct efx_nic *efx) -{ - int i; - -#ifdef CONFIG_RFS_ACCEL - kfree(efx->rps_hash_table); -#endif - - for (i = 0; i < EFX_MAX_CHANNELS; i++) - kfree(efx->channel[i]); - - kfree(efx->vpd_sn); - - if (efx->workqueue) { - destroy_workqueue(efx->workqueue); - efx->workqueue = NULL; - } -} - void efx_update_sw_stats(struct efx_nic *efx, u64 *stats) { u64 n_rx_nodesc_trunc = 0; @@ -3313,197 +1044,6 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats) stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops); } -bool efx_filter_spec_equal(const struct efx_filter_spec *left, - const struct efx_filter_spec *right) -{ - if ((left->match_flags ^ right->match_flags) | - ((left->flags ^ right->flags) & - (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) - return false; - - return memcmp(&left->outer_vid, &right->outer_vid, - sizeof(struct efx_filter_spec) - - offsetof(struct efx_filter_spec, outer_vid)) == 0; -} - -u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) -{ - BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); - return jhash2((const u32 *)&spec->outer_vid, - (sizeof(struct efx_filter_spec) - - offsetof(struct efx_filter_spec, outer_vid)) / 4, - 0); -} - -#ifdef CONFIG_RFS_ACCEL -bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, - bool *force) -{ - if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) { - /* ARFS is currently updating this entry, leave it */ - return false; - } - if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) { - /* ARFS tried and failed to update this, so it's probably out - * of date. Remove the filter and the ARFS rule entry. - */ - rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; - *force = true; - return true; - } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */ - /* ARFS has moved on, so old filter is not needed. Since we did - * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will - * not be removed by efx_rps_hash_del() subsequently. - */ - *force = true; - return true; - } - /* Remove it iff ARFS wants to. */ - return true; -} - -static -struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, - const struct efx_filter_spec *spec) -{ - u32 hash = efx_filter_spec_hash(spec); - - lockdep_assert_held(&efx->rps_hash_lock); - if (!efx->rps_hash_table) - return NULL; - return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; -} - -struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, - const struct efx_filter_spec *spec) -{ - struct efx_arfs_rule *rule; - struct hlist_head *head; - struct hlist_node *node; - - head = efx_rps_hash_bucket(efx, spec); - if (!head) - return NULL; - hlist_for_each(node, head) { - rule = container_of(node, struct efx_arfs_rule, node); - if (efx_filter_spec_equal(spec, &rule->spec)) - return rule; - } - return NULL; -} - -struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, - const struct efx_filter_spec *spec, - bool *new) -{ - struct efx_arfs_rule *rule; - struct hlist_head *head; - struct hlist_node *node; - - head = efx_rps_hash_bucket(efx, spec); - if (!head) - return NULL; - hlist_for_each(node, head) { - rule = container_of(node, struct efx_arfs_rule, node); - if (efx_filter_spec_equal(spec, &rule->spec)) { - *new = false; - return rule; - } - } - rule = kmalloc(sizeof(*rule), GFP_ATOMIC); - *new = true; - if (rule) { - memcpy(&rule->spec, spec, sizeof(rule->spec)); - hlist_add_head(&rule->node, head); - } - return rule; -} - -void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec) -{ - struct efx_arfs_rule *rule; - struct hlist_head *head; - struct hlist_node *node; - - head = efx_rps_hash_bucket(efx, spec); - if (WARN_ON(!head)) - return; - hlist_for_each(node, head) { - rule = container_of(node, struct efx_arfs_rule, node); - if (efx_filter_spec_equal(spec, &rule->spec)) { - /* Someone already reused the entry. We know that if - * this check doesn't fire (i.e. filter_id == REMOVING) - * then the REMOVING mark was put there by our caller, - * because caller is holding a lock on filter table and - * only holders of that lock set REMOVING. - */ - if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING) - return; - hlist_del(node); - kfree(rule); - return; - } - } - /* We didn't find it. */ - WARN_ON(1); -} -#endif - -/* RSS contexts. We're using linked lists and crappy O(n) algorithms, because - * (a) this is an infrequent control-plane operation and (b) n is small (max 64) - */ -struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx) -{ - struct list_head *head = &efx->rss_context.list; - struct efx_rss_context *ctx, *new; - u32 id = 1; /* Don't use zero, that refers to the master RSS context */ - - WARN_ON(!mutex_is_locked(&efx->rss_lock)); - - /* Search for first gap in the numbering */ - list_for_each_entry(ctx, head, list) { - if (ctx->user_id != id) - break; - id++; - /* Check for wrap. If this happens, we have nearly 2^32 - * allocated RSS contexts, which seems unlikely. - */ - if (WARN_ON_ONCE(!id)) - return NULL; - } - - /* Create the new entry */ - new = kmalloc(sizeof(struct efx_rss_context), GFP_KERNEL); - if (!new) - return NULL; - new->context_id = EFX_EF10_RSS_CONTEXT_INVALID; - new->rx_hash_udp_4tuple = false; - - /* Insert the new entry into the gap */ - new->user_id = id; - list_add_tail(&new->list, &ctx->list); - return new; -} - -struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id) -{ - struct list_head *head = &efx->rss_context.list; - struct efx_rss_context *ctx; - - WARN_ON(!mutex_is_locked(&efx->rss_lock)); - - list_for_each_entry(ctx, head, list) - if (ctx->user_id == id) - return ctx; - return NULL; -} - -void efx_free_rss_context_entry(struct efx_rss_context *ctx) -{ - list_del(&ctx->list); - kfree(ctx); -} - /************************************************************************** * * PCI interface @@ -3519,7 +1059,7 @@ static void efx_pci_remove_main(struct efx_nic *efx) * are not READY. */ BUG_ON(efx->state == STATE_READY); - cancel_work_sync(&efx->reset_work); + efx_flush_reset_workqueue(efx); efx_disable_interrupts(efx); efx_clear_interrupt_affinity(efx); @@ -3559,7 +1099,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev) efx_pci_remove_main(efx); - efx_fini_io(efx); + efx_fini_io(efx, efx->type->mem_bar(efx)); netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n"); efx_fini_struct(efx); @@ -3782,7 +1322,8 @@ static int efx_pci_probe(struct pci_dev *pci_dev, efx_probe_vpd_strings(efx); /* Set up basic I/O (BAR mappings etc) */ - rc = efx_init_io(efx); + rc = efx_init_io(efx, efx->type->mem_bar(efx), efx->type->max_dma_mask, + efx->type->mem_map_size(efx)); if (rc) goto fail2; @@ -3826,7 +1367,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev, return 0; fail3: - efx_fini_io(efx); + efx_fini_io(efx, efx->type->mem_bar(efx)); fail2: efx_fini_struct(efx); fail1: @@ -3904,7 +1445,7 @@ static int efx_pm_thaw(struct device *dev) rtnl_unlock(); /* Reschedule any quenched resets scheduled during efx_pm_freeze() */ - queue_work(reset_workqueue, &efx->reset_work); + efx_queue_reset_work(efx); return 0; @@ -4083,10 +1624,6 @@ static struct pci_driver efx_pci_driver = { * *************************************************************************/ -module_param(interrupt_mode, uint, 0444); -MODULE_PARM_DESC(interrupt_mode, - "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)"); - static int __init efx_init_module(void) { int rc; @@ -4103,11 +1640,9 @@ static int __init efx_init_module(void) goto err_sriov; #endif - reset_workqueue = create_singlethread_workqueue("sfc_reset"); - if (!reset_workqueue) { - rc = -ENOMEM; + rc = efx_create_reset_workqueue(); + if (rc) goto err_reset; - } rc = pci_register_driver(&efx_pci_driver); if (rc < 0) @@ -4116,7 +1651,7 @@ static int __init efx_init_module(void) return 0; err_pci: - destroy_workqueue(reset_workqueue); + efx_destroy_reset_workqueue(); err_reset: #ifdef CONFIG_SFC_SRIOV efx_fini_sriov(); @@ -4132,7 +1667,7 @@ static void __exit efx_exit_module(void) printk(KERN_INFO "Solarflare NET driver unloading\n"); pci_unregister_driver(&efx_pci_driver); - destroy_workqueue(reset_workqueue); + efx_destroy_reset_workqueue(); #ifdef CONFIG_SFC_SRIOV efx_fini_sriov(); #endif diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 2dd8d5002315..f1bdb04efbe4 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -15,31 +15,17 @@ int efx_net_open(struct net_device *net_dev); int efx_net_stop(struct net_device *net_dev); /* TX */ -int efx_probe_tx_queue(struct efx_tx_queue *tx_queue); -void efx_remove_tx_queue(struct efx_tx_queue *tx_queue); -void efx_init_tx_queue(struct efx_tx_queue *tx_queue); void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue); -void efx_fini_tx_queue(struct efx_tx_queue *tx_queue); netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev); netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type, void *type_data); -unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); extern unsigned int efx_piobuf_size; extern bool efx_separate_tx_channels; /* RX */ -void efx_set_default_rx_indir_table(struct efx_nic *efx, - struct efx_rss_context *ctx); -void efx_rx_config_page_split(struct efx_nic *efx); -int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); -void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); -void efx_init_rx_queue(struct efx_rx_queue *rx_queue); -void efx_fini_rx_queue(struct efx_rx_queue *rx_queue); -void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic); -void efx_rx_slow_fill(struct timer_list *t); void __efx_rx_packet(struct efx_channel *channel); void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, unsigned int n_frags, unsigned int len, u16 flags); @@ -48,7 +34,6 @@ static inline void efx_rx_flush_packet(struct efx_channel *channel) if (channel->rx_pkt_n_frags) __efx_rx_packet(channel); } -void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); #define EFX_MAX_DMAQ_SIZE 4096UL #define EFX_DEFAULT_DMAQ_SIZE 1024UL @@ -80,8 +65,6 @@ static inline bool efx_rss_enabled(struct efx_nic *efx) /* Filters */ -void efx_mac_reconfigure(struct efx_nic *efx); - /** * efx_filter_insert_filter - add or replace a filter * @efx: NIC in which to insert the filter @@ -186,58 +169,17 @@ static inline void efx_filter_rfs_expire(struct work_struct *data) static inline void efx_filter_rfs_expire(struct work_struct *data) {} #define efx_filter_rfs_enabled() 0 #endif -bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec); - -bool efx_filter_spec_equal(const struct efx_filter_spec *left, - const struct efx_filter_spec *right); -u32 efx_filter_spec_hash(const struct efx_filter_spec *spec); - -#ifdef CONFIG_RFS_ACCEL -bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, - bool *force); - -struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, - const struct efx_filter_spec *spec); - -/* @new is written to indicate if entry was newly added (true) or if an old - * entry was found and returned (false). - */ -struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, - const struct efx_filter_spec *spec, - bool *new); - -void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec); -#endif /* RSS contexts */ -struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx); -struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id); -void efx_free_rss_context_entry(struct efx_rss_context *ctx); static inline bool efx_rss_active(struct efx_rss_context *ctx) { - return ctx->context_id != EFX_EF10_RSS_CONTEXT_INVALID; + return ctx->context_id != EFX_MCDI_RSS_CONTEXT_INVALID; } -/* Channels */ -int efx_channel_dummy_op_int(struct efx_channel *channel); -void efx_channel_dummy_op_void(struct efx_channel *channel); -int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries); - -/* Ports */ -int efx_reconfigure_port(struct efx_nic *efx); -int __efx_reconfigure_port(struct efx_nic *efx); - /* Ethtool support */ extern const struct ethtool_ops efx_ethtool_ops; -/* Reset handling */ -int efx_reset(struct efx_nic *efx, enum reset_type method); -void efx_reset_down(struct efx_nic *efx, enum reset_type method); -int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok); -int efx_try_recovery(struct efx_nic *efx); - /* Global */ -void efx_schedule_reset(struct efx_nic *efx, enum reset_type type); unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs); unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks); int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, @@ -245,8 +187,6 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, bool rx_may_override_tx); void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs, unsigned int *rx_usecs, bool *rx_adaptive); -void efx_stop_eventq(struct efx_channel *channel); -void efx_start_eventq(struct efx_channel *channel); /* Dummy PHY ops for PHY drivers */ int efx_port_dummy_op_int(struct efx_nic *efx); @@ -293,9 +233,6 @@ static inline void efx_schedule_channel_irq(struct efx_channel *channel) efx_schedule_channel(channel); } -void efx_link_status_changed(struct efx_nic *efx); -void efx_link_set_advertising(struct efx_nic *efx, - const unsigned long *advertising); void efx_link_clear_advertising(struct efx_nic *efx); void efx_link_set_wanted_fc(struct efx_nic *efx, u8); diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c new file mode 100644 index 000000000000..aeb5e8aa2f2a --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -0,0 +1,1234 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include <linux/module.h> +#include "efx_channels.h" +#include "efx.h" +#include "efx_common.h" +#include "tx_common.h" +#include "rx_common.h" +#include "nic.h" +#include "sriov.h" + +/* This is the first interrupt mode to try out of: + * 0 => MSI-X + * 1 => MSI + * 2 => legacy + */ +static unsigned int interrupt_mode; +module_param(interrupt_mode, uint, 0444); +MODULE_PARM_DESC(interrupt_mode, + "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)"); + +/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS), + * i.e. the number of CPUs among which we may distribute simultaneous + * interrupt handling. + * + * Cards without MSI-X will only target one CPU via legacy or MSI interrupt. + * The default (0) means to assign an interrupt to each core. + */ +static unsigned int rss_cpus; +module_param(rss_cpus, uint, 0444); +MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling"); + +static unsigned int irq_adapt_low_thresh = 8000; +module_param(irq_adapt_low_thresh, uint, 0644); +MODULE_PARM_DESC(irq_adapt_low_thresh, + "Threshold score for reducing IRQ moderation"); + +static unsigned int irq_adapt_high_thresh = 16000; +module_param(irq_adapt_high_thresh, uint, 0644); +MODULE_PARM_DESC(irq_adapt_high_thresh, + "Threshold score for increasing IRQ moderation"); + +/* This is the weight assigned to each of the (per-channel) virtual + * NAPI devices. + */ +static int napi_weight = 64; + +/*************** + * Housekeeping + ***************/ + +int efx_channel_dummy_op_int(struct efx_channel *channel) +{ + return 0; +} + +void efx_channel_dummy_op_void(struct efx_channel *channel) +{ +} + +static const struct efx_channel_type efx_default_channel_type = { + .pre_probe = efx_channel_dummy_op_int, + .post_remove = efx_channel_dummy_op_void, + .get_name = efx_get_channel_name, + .copy = efx_copy_channel, + .want_txqs = efx_default_channel_want_txqs, + .keep_eventq = false, + .want_pio = true, +}; + +/************* + * INTERRUPTS + *************/ + +static unsigned int efx_wanted_parallelism(struct efx_nic *efx) +{ + cpumask_var_t thread_mask; + unsigned int count; + int cpu; + + if (rss_cpus) { + count = rss_cpus; + } else { + if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { + netif_warn(efx, probe, efx->net_dev, + "RSS disabled due to allocation failure\n"); + return 1; + } + + count = 0; + for_each_online_cpu(cpu) { + if (!cpumask_test_cpu(cpu, thread_mask)) { + ++count; + cpumask_or(thread_mask, thread_mask, + topology_sibling_cpumask(cpu)); + } + } + + free_cpumask_var(thread_mask); + } + + if (count > EFX_MAX_RX_QUEUES) { + netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn, + "Reducing number of rx queues from %u to %u.\n", + count, EFX_MAX_RX_QUEUES); + count = EFX_MAX_RX_QUEUES; + } + + /* If RSS is requested for the PF *and* VFs then we can't write RSS + * table entries that are inaccessible to VFs + */ +#ifdef CONFIG_SFC_SRIOV + if (efx->type->sriov_wanted) { + if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 && + count > efx_vf_size(efx)) { + netif_warn(efx, probe, efx->net_dev, + "Reducing number of RSS channels from %u to %u for " + "VF support. Increase vf-msix-limit to use more " + "channels on the PF.\n", + count, efx_vf_size(efx)); + count = efx_vf_size(efx); + } + } +#endif + + return count; +} + +static int efx_allocate_msix_channels(struct efx_nic *efx, + unsigned int max_channels, + unsigned int extra_channels, + unsigned int parallelism) +{ + unsigned int n_channels = parallelism; + int vec_count; + int n_xdp_tx; + int n_xdp_ev; + + if (efx_separate_tx_channels) + n_channels *= 2; + n_channels += extra_channels; + + /* To allow XDP transmit to happen from arbitrary NAPI contexts + * we allocate a TX queue per CPU. We share event queues across + * multiple tx queues, assuming tx and ev queues are both + * maximum size. + */ + + n_xdp_tx = num_possible_cpus(); + n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES); + + vec_count = pci_msix_vec_count(efx->pci_dev); + if (vec_count < 0) + return vec_count; + + max_channels = min_t(unsigned int, vec_count, max_channels); + + /* Check resources. + * We need a channel per event queue, plus a VI per tx queue. + * This may be more pessimistic than it needs to be. + */ + if (n_channels + n_xdp_ev > max_channels) { + netif_err(efx, drv, efx->net_dev, + "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", + n_xdp_ev, n_channels, max_channels); + efx->n_xdp_channels = 0; + efx->xdp_tx_per_channel = 0; + efx->xdp_tx_queue_count = 0; + } else { + efx->n_xdp_channels = n_xdp_ev; + efx->xdp_tx_per_channel = EFX_TXQ_TYPES; + efx->xdp_tx_queue_count = n_xdp_tx; + n_channels += n_xdp_ev; + netif_dbg(efx, drv, efx->net_dev, + "Allocating %d TX and %d event queues for XDP\n", + n_xdp_tx, n_xdp_ev); + } + + if (vec_count < n_channels) { + netif_err(efx, drv, efx->net_dev, + "WARNING: Insufficient MSI-X vectors available (%d < %u).\n", + vec_count, n_channels); + netif_err(efx, drv, efx->net_dev, + "WARNING: Performance may be reduced.\n"); + n_channels = vec_count; + } + + n_channels = min(n_channels, max_channels); + + efx->n_channels = n_channels; + + /* Ignore XDP tx channels when creating rx channels. */ + n_channels -= efx->n_xdp_channels; + + if (efx_separate_tx_channels) { + efx->n_tx_channels = + min(max(n_channels / 2, 1U), + efx->max_tx_channels); + efx->tx_channel_offset = + n_channels - efx->n_tx_channels; + efx->n_rx_channels = + max(n_channels - + efx->n_tx_channels, 1U); + } else { + efx->n_tx_channels = min(n_channels, efx->max_tx_channels); + efx->tx_channel_offset = 0; + efx->n_rx_channels = n_channels; + } + + efx->n_rx_channels = min(efx->n_rx_channels, parallelism); + efx->n_tx_channels = min(efx->n_tx_channels, parallelism); + + efx->xdp_channel_offset = n_channels; + + netif_dbg(efx, drv, efx->net_dev, + "Allocating %u RX channels\n", + efx->n_rx_channels); + + return efx->n_channels; +} + +/* Probe the number and type of interrupts we are able to obtain, and + * the resulting numbers of channels and RX queues. + */ +int efx_probe_interrupts(struct efx_nic *efx) +{ + unsigned int extra_channels = 0; + unsigned int rss_spread; + unsigned int i, j; + int rc; + + for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) + if (efx->extra_channel_type[i]) + ++extra_channels; + + if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { + unsigned int parallelism = efx_wanted_parallelism(efx); + struct msix_entry xentries[EFX_MAX_CHANNELS]; + unsigned int n_channels; + + rc = efx_allocate_msix_channels(efx, efx->max_channels, + extra_channels, parallelism); + if (rc >= 0) { + n_channels = rc; + for (i = 0; i < n_channels; i++) + xentries[i].entry = i; + rc = pci_enable_msix_range(efx->pci_dev, xentries, 1, + n_channels); + } + if (rc < 0) { + /* Fall back to single channel MSI */ + netif_err(efx, drv, efx->net_dev, + "could not enable MSI-X\n"); + if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI) + efx->interrupt_mode = EFX_INT_MODE_MSI; + else + return rc; + } else if (rc < n_channels) { + netif_err(efx, drv, efx->net_dev, + "WARNING: Insufficient MSI-X vectors" + " available (%d < %u).\n", rc, n_channels); + netif_err(efx, drv, efx->net_dev, + "WARNING: Performance may be reduced.\n"); + n_channels = rc; + } + + if (rc > 0) { + for (i = 0; i < efx->n_channels; i++) + efx_get_channel(efx, i)->irq = + xentries[i].vector; + } + } + + /* Try single interrupt MSI */ + if (efx->interrupt_mode == EFX_INT_MODE_MSI) { + efx->n_channels = 1; + efx->n_rx_channels = 1; + efx->n_tx_channels = 1; + efx->n_xdp_channels = 0; + efx->xdp_channel_offset = efx->n_channels; + rc = pci_enable_msi(efx->pci_dev); + if (rc == 0) { + efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; + } else { + netif_err(efx, drv, efx->net_dev, + "could not enable MSI\n"); + if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY) + efx->interrupt_mode = EFX_INT_MODE_LEGACY; + else + return rc; + } + } + + /* Assume legacy interrupts */ + if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) { + efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0); + efx->n_rx_channels = 1; + efx->n_tx_channels = 1; + efx->n_xdp_channels = 0; + efx->xdp_channel_offset = efx->n_channels; + efx->legacy_irq = efx->pci_dev->irq; + } + + /* Assign extra channels if possible, before XDP channels */ + efx->n_extra_tx_channels = 0; + j = efx->xdp_channel_offset; + for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) { + if (!efx->extra_channel_type[i]) + continue; + if (j <= efx->tx_channel_offset + efx->n_tx_channels) { + efx->extra_channel_type[i]->handle_no_channel(efx); + } else { + --j; + efx_get_channel(efx, j)->type = + efx->extra_channel_type[i]; + if (efx_channel_has_tx_queues(efx_get_channel(efx, j))) + efx->n_extra_tx_channels++; + } + } + + rss_spread = efx->n_rx_channels; + /* RSS might be usable on VFs even if it is disabled on the PF */ +#ifdef CONFIG_SFC_SRIOV + if (efx->type->sriov_wanted) { + efx->rss_spread = ((rss_spread > 1 || + !efx->type->sriov_wanted(efx)) ? + rss_spread : efx_vf_size(efx)); + return 0; + } +#endif + efx->rss_spread = rss_spread; + + return 0; +} + +#if defined(CONFIG_SMP) +void efx_set_interrupt_affinity(struct efx_nic *efx) +{ + struct efx_channel *channel; + unsigned int cpu; + + efx_for_each_channel(channel, efx) { + cpu = cpumask_local_spread(channel->channel, + pcibus_to_node(efx->pci_dev->bus)); + irq_set_affinity_hint(channel->irq, cpumask_of(cpu)); + } +} + +void efx_clear_interrupt_affinity(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + irq_set_affinity_hint(channel->irq, NULL); +} +#else +void +efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) +{ +} + +void +efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) +{ +} +#endif /* CONFIG_SMP */ + +void efx_remove_interrupts(struct efx_nic *efx) +{ + struct efx_channel *channel; + + /* Remove MSI/MSI-X interrupts */ + efx_for_each_channel(channel, efx) + channel->irq = 0; + pci_disable_msi(efx->pci_dev); + pci_disable_msix(efx->pci_dev); + + /* Remove legacy interrupt */ + efx->legacy_irq = 0; +} + +/*************** + * EVENT QUEUES + ***************/ + +/* Create event queue + * Event queue memory allocations are done only once. If the channel + * is reset, the memory buffer will be reused; this guards against + * errors during channel reset and also simplifies interrupt handling. + */ +int efx_probe_eventq(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + unsigned long entries; + + netif_dbg(efx, probe, efx->net_dev, + "chan %d create event queue\n", channel->channel); + + /* Build an event queue with room for one event per tx and rx buffer, + * plus some extra for link state events and MCDI completions. + */ + entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128); + EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE); + channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1; + + return efx_nic_probe_eventq(channel); +} + +/* Prepare channel's event queue */ +int efx_init_eventq(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + int rc; + + EFX_WARN_ON_PARANOID(channel->eventq_init); + + netif_dbg(efx, drv, efx->net_dev, + "chan %d init event queue\n", channel->channel); + + rc = efx_nic_init_eventq(channel); + if (rc == 0) { + efx->type->push_irq_moderation(channel); + channel->eventq_read_ptr = 0; + channel->eventq_init = true; + } + return rc; +} + +/* Enable event queue processing and NAPI */ +void efx_start_eventq(struct efx_channel *channel) +{ + netif_dbg(channel->efx, ifup, channel->efx->net_dev, + "chan %d start event queue\n", channel->channel); + + /* Make sure the NAPI handler sees the enabled flag set */ + channel->enabled = true; + smp_wmb(); + + napi_enable(&channel->napi_str); + efx_nic_eventq_read_ack(channel); +} + +/* Disable event queue processing and NAPI */ +void efx_stop_eventq(struct efx_channel *channel) +{ + if (!channel->enabled) + return; + + napi_disable(&channel->napi_str); + channel->enabled = false; +} + +void efx_fini_eventq(struct efx_channel *channel) +{ + if (!channel->eventq_init) + return; + + netif_dbg(channel->efx, drv, channel->efx->net_dev, + "chan %d fini event queue\n", channel->channel); + + efx_nic_fini_eventq(channel); + channel->eventq_init = false; +} + +void efx_remove_eventq(struct efx_channel *channel) +{ + netif_dbg(channel->efx, drv, channel->efx->net_dev, + "chan %d remove event queue\n", channel->channel); + + efx_nic_remove_eventq(channel); +} + +/************************************************************************** + * + * Channel handling + * + *************************************************************************/ + +/* Allocate and initialise a channel structure. */ +struct efx_channel * +efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) +{ + struct efx_rx_queue *rx_queue; + struct efx_tx_queue *tx_queue; + struct efx_channel *channel; + int j; + + channel = kzalloc(sizeof(*channel), GFP_KERNEL); + if (!channel) + return NULL; + + channel->efx = efx; + channel->channel = i; + channel->type = &efx_default_channel_type; + + for (j = 0; j < EFX_TXQ_TYPES; j++) { + tx_queue = &channel->tx_queue[j]; + tx_queue->efx = efx; + tx_queue->queue = i * EFX_TXQ_TYPES + j; + tx_queue->channel = channel; + } + +#ifdef CONFIG_RFS_ACCEL + INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); +#endif + + rx_queue = &channel->rx_queue; + rx_queue->efx = efx; + timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); + + return channel; +} + +int efx_init_channels(struct efx_nic *efx) +{ + unsigned int i; + + for (i = 0; i < EFX_MAX_CHANNELS; i++) { + efx->channel[i] = efx_alloc_channel(efx, i, NULL); + if (!efx->channel[i]) + return -ENOMEM; + efx->msi_context[i].efx = efx; + efx->msi_context[i].index = i; + } + + /* Higher numbered interrupt modes are less capable! */ + if (WARN_ON_ONCE(efx->type->max_interrupt_mode > + efx->type->min_interrupt_mode)) { + return -EIO; + } + efx->interrupt_mode = max(efx->type->max_interrupt_mode, + interrupt_mode); + efx->interrupt_mode = min(efx->type->min_interrupt_mode, + interrupt_mode); + + return 0; +} + +void efx_fini_channels(struct efx_nic *efx) +{ + unsigned int i; + + for (i = 0; i < EFX_MAX_CHANNELS; i++) + if (efx->channel[i]) { + kfree(efx->channel[i]); + efx->channel[i] = NULL; + } +} + +/* Allocate and initialise a channel structure, copying parameters + * (but not resources) from an old channel structure. + */ +struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel) +{ + struct efx_rx_queue *rx_queue; + struct efx_tx_queue *tx_queue; + struct efx_channel *channel; + int j; + + channel = kmalloc(sizeof(*channel), GFP_KERNEL); + if (!channel) + return NULL; + + *channel = *old_channel; + + channel->napi_dev = NULL; + INIT_HLIST_NODE(&channel->napi_str.napi_hash_node); + channel->napi_str.napi_id = 0; + channel->napi_str.state = 0; + memset(&channel->eventq, 0, sizeof(channel->eventq)); + + for (j = 0; j < EFX_TXQ_TYPES; j++) { + tx_queue = &channel->tx_queue[j]; + if (tx_queue->channel) + tx_queue->channel = channel; + tx_queue->buffer = NULL; + memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); + } + + rx_queue = &channel->rx_queue; + rx_queue->buffer = NULL; + memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); + timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); +#ifdef CONFIG_RFS_ACCEL + INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); +#endif + + return channel; +} + +static int efx_probe_channel(struct efx_channel *channel) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + int rc; + + netif_dbg(channel->efx, probe, channel->efx->net_dev, + "creating channel %d\n", channel->channel); + + rc = channel->type->pre_probe(channel); + if (rc) + goto fail; + + rc = efx_probe_eventq(channel); + if (rc) + goto fail; + + efx_for_each_channel_tx_queue(tx_queue, channel) { + rc = efx_probe_tx_queue(tx_queue); + if (rc) + goto fail; + } + + efx_for_each_channel_rx_queue(rx_queue, channel) { + rc = efx_probe_rx_queue(rx_queue); + if (rc) + goto fail; + } + + channel->rx_list = NULL; + + return 0; + +fail: + efx_remove_channel(channel); + return rc; +} + +void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len) +{ + struct efx_nic *efx = channel->efx; + const char *type; + int number; + + number = channel->channel; + + if (number >= efx->xdp_channel_offset && + !WARN_ON_ONCE(!efx->n_xdp_channels)) { + type = "-xdp"; + number -= efx->xdp_channel_offset; + } else if (efx->tx_channel_offset == 0) { + type = ""; + } else if (number < efx->tx_channel_offset) { + type = "-rx"; + } else { + type = "-tx"; + number -= efx->tx_channel_offset; + } + snprintf(buf, len, "%s%s-%d", efx->name, type, number); +} + +void efx_set_channel_names(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + channel->type->get_name(channel, + efx->msi_context[channel->channel].name, + sizeof(efx->msi_context[0].name)); +} + +int efx_probe_channels(struct efx_nic *efx) +{ + struct efx_channel *channel; + int rc; + + /* Restart special buffer allocation */ + efx->next_buffer_table = 0; + + /* Probe channels in reverse, so that any 'extra' channels + * use the start of the buffer table. This allows the traffic + * channels to be resized without moving them or wasting the + * entries before them. + */ + efx_for_each_channel_rev(channel, efx) { + rc = efx_probe_channel(channel); + if (rc) { + netif_err(efx, probe, efx->net_dev, + "failed to create channel %d\n", + channel->channel); + goto fail; + } + } + efx_set_channel_names(efx); + + return 0; + +fail: + efx_remove_channels(efx); + return rc; +} + +void efx_remove_channel(struct efx_channel *channel) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + + netif_dbg(channel->efx, drv, channel->efx->net_dev, + "destroy chan %d\n", channel->channel); + + efx_for_each_channel_rx_queue(rx_queue, channel) + efx_remove_rx_queue(rx_queue); + efx_for_each_possible_channel_tx_queue(tx_queue, channel) + efx_remove_tx_queue(tx_queue); + efx_remove_eventq(channel); + channel->type->post_remove(channel); +} + +void efx_remove_channels(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + efx_remove_channel(channel); + + kfree(efx->xdp_tx_queues); +} + +int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) +{ + struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; + unsigned int i, next_buffer_table = 0; + u32 old_rxq_entries, old_txq_entries; + int rc, rc2; + + rc = efx_check_disabled(efx); + if (rc) + return rc; + + /* Not all channels should be reallocated. We must avoid + * reallocating their buffer table entries. + */ + efx_for_each_channel(channel, efx) { + struct efx_rx_queue *rx_queue; + struct efx_tx_queue *tx_queue; + + if (channel->type->copy) + continue; + next_buffer_table = max(next_buffer_table, + channel->eventq.index + + channel->eventq.entries); + efx_for_each_channel_rx_queue(rx_queue, channel) + next_buffer_table = max(next_buffer_table, + rx_queue->rxd.index + + rx_queue->rxd.entries); + efx_for_each_channel_tx_queue(tx_queue, channel) + next_buffer_table = max(next_buffer_table, + tx_queue->txd.index + + tx_queue->txd.entries); + } + + efx_device_detach_sync(efx); + efx_stop_all(efx); + efx_soft_disable_interrupts(efx); + + /* Clone channels (where possible) */ + memset(other_channel, 0, sizeof(other_channel)); + for (i = 0; i < efx->n_channels; i++) { + channel = efx->channel[i]; + if (channel->type->copy) + channel = channel->type->copy(channel); + if (!channel) { + rc = -ENOMEM; + goto out; + } + other_channel[i] = channel; + } + + /* Swap entry counts and channel pointers */ + old_rxq_entries = efx->rxq_entries; + old_txq_entries = efx->txq_entries; + efx->rxq_entries = rxq_entries; + efx->txq_entries = txq_entries; + for (i = 0; i < efx->n_channels; i++) { + channel = efx->channel[i]; + efx->channel[i] = other_channel[i]; + other_channel[i] = channel; + } + + /* Restart buffer table allocation */ + efx->next_buffer_table = next_buffer_table; + + for (i = 0; i < efx->n_channels; i++) { + channel = efx->channel[i]; + if (!channel->type->copy) + continue; + rc = efx_probe_channel(channel); + if (rc) + goto rollback; + efx_init_napi_channel(efx->channel[i]); + } + +out: + /* Destroy unused channel structures */ + for (i = 0; i < efx->n_channels; i++) { + channel = other_channel[i]; + if (channel && channel->type->copy) { + efx_fini_napi_channel(channel); + efx_remove_channel(channel); + kfree(channel); + } + } + + rc2 = efx_soft_enable_interrupts(efx); + if (rc2) { + rc = rc ? rc : rc2; + netif_err(efx, drv, efx->net_dev, + "unable to restart interrupts on channel reallocation\n"); + efx_schedule_reset(efx, RESET_TYPE_DISABLE); + } else { + efx_start_all(efx); + efx_device_attach_if_not_resetting(efx); + } + return rc; + +rollback: + /* Swap back */ + efx->rxq_entries = old_rxq_entries; + efx->txq_entries = old_txq_entries; + for (i = 0; i < efx->n_channels; i++) { + channel = efx->channel[i]; + efx->channel[i] = other_channel[i]; + other_channel[i] = channel; + } + goto out; +} + +int efx_set_channels(struct efx_nic *efx) +{ + struct efx_channel *channel; + struct efx_tx_queue *tx_queue; + int xdp_queue_number; + + efx->tx_channel_offset = + efx_separate_tx_channels ? + efx->n_channels - efx->n_tx_channels : 0; + + if (efx->xdp_tx_queue_count) { + EFX_WARN_ON_PARANOID(efx->xdp_tx_queues); + + /* Allocate array for XDP TX queue lookup. */ + efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count, + sizeof(*efx->xdp_tx_queues), + GFP_KERNEL); + if (!efx->xdp_tx_queues) + return -ENOMEM; + } + + /* We need to mark which channels really have RX and TX + * queues, and adjust the TX queue numbers if we have separate + * RX-only and TX-only channels. + */ + xdp_queue_number = 0; + efx_for_each_channel(channel, efx) { + if (channel->channel < efx->n_rx_channels) + channel->rx_queue.core_index = channel->channel; + else + channel->rx_queue.core_index = -1; + + efx_for_each_channel_tx_queue(tx_queue, channel) { + tx_queue->queue -= (efx->tx_channel_offset * + EFX_TXQ_TYPES); + + if (efx_channel_is_xdp_tx(channel) && + xdp_queue_number < efx->xdp_tx_queue_count) { + efx->xdp_tx_queues[xdp_queue_number] = tx_queue; + xdp_queue_number++; + } + } + } + return 0; +} + +bool efx_default_channel_want_txqs(struct efx_channel *channel) +{ + return channel->channel - channel->efx->tx_channel_offset < + channel->efx->n_tx_channels; +} + +/************* + * START/STOP + *************/ + +int efx_soft_enable_interrupts(struct efx_nic *efx) +{ + struct efx_channel *channel, *end_channel; + int rc; + + BUG_ON(efx->state == STATE_DISABLED); + + efx->irq_soft_enabled = true; + smp_wmb(); + + efx_for_each_channel(channel, efx) { + if (!channel->type->keep_eventq) { + rc = efx_init_eventq(channel); + if (rc) + goto fail; + } + efx_start_eventq(channel); + } + + efx_mcdi_mode_event(efx); + + return 0; +fail: + end_channel = channel; + efx_for_each_channel(channel, efx) { + if (channel == end_channel) + break; + efx_stop_eventq(channel); + if (!channel->type->keep_eventq) + efx_fini_eventq(channel); + } + + return rc; +} + +void efx_soft_disable_interrupts(struct efx_nic *efx) +{ + struct efx_channel *channel; + + if (efx->state == STATE_DISABLED) + return; + + efx_mcdi_mode_poll(efx); + + efx->irq_soft_enabled = false; + smp_wmb(); + + if (efx->legacy_irq) + synchronize_irq(efx->legacy_irq); + + efx_for_each_channel(channel, efx) { + if (channel->irq) + synchronize_irq(channel->irq); + + efx_stop_eventq(channel); + if (!channel->type->keep_eventq) + efx_fini_eventq(channel); + } + + /* Flush the asynchronous MCDI request queue */ + efx_mcdi_flush_async(efx); +} + +int efx_enable_interrupts(struct efx_nic *efx) +{ + struct efx_channel *channel, *end_channel; + int rc; + + /* TODO: Is this really a bug? */ + BUG_ON(efx->state == STATE_DISABLED); + + if (efx->eeh_disabled_legacy_irq) { + enable_irq(efx->legacy_irq); + efx->eeh_disabled_legacy_irq = false; + } + + efx->type->irq_enable_master(efx); + + efx_for_each_channel(channel, efx) { + if (channel->type->keep_eventq) { + rc = efx_init_eventq(channel); + if (rc) + goto fail; + } + } + + rc = efx_soft_enable_interrupts(efx); + if (rc) + goto fail; + + return 0; + +fail: + end_channel = channel; + efx_for_each_channel(channel, efx) { + if (channel == end_channel) + break; + if (channel->type->keep_eventq) + efx_fini_eventq(channel); + } + + efx->type->irq_disable_non_ev(efx); + + return rc; +} + +void efx_disable_interrupts(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_soft_disable_interrupts(efx); + + efx_for_each_channel(channel, efx) { + if (channel->type->keep_eventq) + efx_fini_eventq(channel); + } + + efx->type->irq_disable_non_ev(efx); +} + +void efx_start_channels(struct efx_nic *efx) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) { + efx_for_each_channel_tx_queue(tx_queue, channel) { + efx_init_tx_queue(tx_queue); + atomic_inc(&efx->active_queues); + } + + efx_for_each_channel_rx_queue(rx_queue, channel) { + efx_init_rx_queue(rx_queue); + atomic_inc(&efx->active_queues); + efx_stop_eventq(channel); + efx_fast_push_rx_descriptors(rx_queue, false); + efx_start_eventq(channel); + } + + WARN_ON(channel->rx_pkt_n_frags); + } +} + +void efx_stop_channels(struct efx_nic *efx) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + struct efx_channel *channel; + int rc = 0; + + /* Stop RX refill */ + efx_for_each_channel(channel, efx) { + efx_for_each_channel_rx_queue(rx_queue, channel) + rx_queue->refill_enabled = false; + } + + efx_for_each_channel(channel, efx) { + /* RX packet processing is pipelined, so wait for the + * NAPI handler to complete. At least event queue 0 + * might be kept active by non-data events, so don't + * use napi_synchronize() but actually disable NAPI + * temporarily. + */ + if (efx_channel_has_rx_queue(channel)) { + efx_stop_eventq(channel); + efx_start_eventq(channel); + } + } + + if (efx->type->fini_dmaq) + rc = efx->type->fini_dmaq(efx); + + if (rc) { + netif_err(efx, drv, efx->net_dev, "failed to flush queues\n"); + } else { + netif_dbg(efx, drv, efx->net_dev, + "successfully flushed all queues\n"); + } + + efx_for_each_channel(channel, efx) { + efx_for_each_channel_rx_queue(rx_queue, channel) + efx_fini_rx_queue(rx_queue); + efx_for_each_possible_channel_tx_queue(tx_queue, channel) + efx_fini_tx_queue(tx_queue); + } +} + +/************************************************************************** + * + * NAPI interface + * + *************************************************************************/ + +/* Process channel's event queue + * + * This function is responsible for processing the event queue of a + * single channel. The caller must guarantee that this function will + * never be concurrently called more than once on the same channel, + * though different channels may be being processed concurrently. + */ +static int efx_process_channel(struct efx_channel *channel, int budget) +{ + struct efx_tx_queue *tx_queue; + struct list_head rx_list; + int spent; + + if (unlikely(!channel->enabled)) + return 0; + + /* Prepare the batch receive list */ + EFX_WARN_ON_PARANOID(channel->rx_list != NULL); + INIT_LIST_HEAD(&rx_list); + channel->rx_list = &rx_list; + + efx_for_each_channel_tx_queue(tx_queue, channel) { + tx_queue->pkts_compl = 0; + tx_queue->bytes_compl = 0; + } + + spent = efx_nic_process_eventq(channel, budget); + if (spent && efx_channel_has_rx_queue(channel)) { + struct efx_rx_queue *rx_queue = + efx_channel_get_rx_queue(channel); + + efx_rx_flush_packet(channel); + efx_fast_push_rx_descriptors(rx_queue, true); + } + + /* Update BQL */ + efx_for_each_channel_tx_queue(tx_queue, channel) { + if (tx_queue->bytes_compl) { + netdev_tx_completed_queue(tx_queue->core_txq, + tx_queue->pkts_compl, + tx_queue->bytes_compl); + } + } + + /* Receive any packets we queued up */ + netif_receive_skb_list(channel->rx_list); + channel->rx_list = NULL; + + return spent; +} + +static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel) +{ + int step = efx->irq_mod_step_us; + + if (channel->irq_mod_score < irq_adapt_low_thresh) { + if (channel->irq_moderation_us > step) { + channel->irq_moderation_us -= step; + efx->type->push_irq_moderation(channel); + } + } else if (channel->irq_mod_score > irq_adapt_high_thresh) { + if (channel->irq_moderation_us < + efx->irq_rx_moderation_us) { + channel->irq_moderation_us += step; + efx->type->push_irq_moderation(channel); + } + } + + channel->irq_count = 0; + channel->irq_mod_score = 0; +} + +/* NAPI poll handler + * + * NAPI guarantees serialisation of polls of the same device, which + * provides the guarantee required by efx_process_channel(). + */ +static int efx_poll(struct napi_struct *napi, int budget) +{ + struct efx_channel *channel = + container_of(napi, struct efx_channel, napi_str); + struct efx_nic *efx = channel->efx; + int spent; + + netif_vdbg(efx, intr, efx->net_dev, + "channel %d NAPI poll executing on CPU %d\n", + channel->channel, raw_smp_processor_id()); + + spent = efx_process_channel(channel, budget); + + xdp_do_flush_map(); + + if (spent < budget) { + if (efx_channel_has_rx_queue(channel) && + efx->irq_rx_adaptive && + unlikely(++channel->irq_count == 1000)) { + efx_update_irq_mod(efx, channel); + } + +#ifdef CONFIG_RFS_ACCEL + /* Perhaps expire some ARFS filters */ + mod_delayed_work(system_wq, &channel->filter_work, 0); +#endif + + /* There is no race here; although napi_disable() will + * only wait for napi_complete(), this isn't a problem + * since efx_nic_eventq_read_ack() will have no effect if + * interrupts have already been disabled. + */ + if (napi_complete_done(napi, spent)) + efx_nic_eventq_read_ack(channel); + } + + return spent; +} + +void efx_init_napi_channel(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + + channel->napi_dev = efx->net_dev; + netif_napi_add(channel->napi_dev, &channel->napi_str, + efx_poll, napi_weight); +} + +void efx_init_napi(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + efx_init_napi_channel(channel); +} + +void efx_fini_napi_channel(struct efx_channel *channel) +{ + if (channel->napi_dev) + netif_napi_del(&channel->napi_str); + + channel->napi_dev = NULL; +} + +void efx_fini_napi(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + efx_fini_napi_channel(channel); +} diff --git a/drivers/net/ethernet/sfc/efx_channels.h b/drivers/net/ethernet/sfc/efx_channels.h new file mode 100644 index 000000000000..8d7b8c4142d7 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_channels.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_CHANNELS_H +#define EFX_CHANNELS_H + +int efx_probe_interrupts(struct efx_nic *efx); +void efx_remove_interrupts(struct efx_nic *efx); +int efx_soft_enable_interrupts(struct efx_nic *efx); +void efx_soft_disable_interrupts(struct efx_nic *efx); +int efx_enable_interrupts(struct efx_nic *efx); +void efx_disable_interrupts(struct efx_nic *efx); + +void efx_set_interrupt_affinity(struct efx_nic *efx); +void efx_clear_interrupt_affinity(struct efx_nic *efx); + +int efx_probe_eventq(struct efx_channel *channel); +int efx_init_eventq(struct efx_channel *channel); +void efx_start_eventq(struct efx_channel *channel); +void efx_stop_eventq(struct efx_channel *channel); +void efx_fini_eventq(struct efx_channel *channel); +void efx_remove_eventq(struct efx_channel *channel); + +struct efx_channel * +efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel); +int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries); +void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len); +void efx_set_channel_names(struct efx_nic *efx); +int efx_init_channels(struct efx_nic *efx); +int efx_probe_channels(struct efx_nic *efx); +int efx_set_channels(struct efx_nic *efx); +bool efx_default_channel_want_txqs(struct efx_channel *channel); +void efx_remove_channel(struct efx_channel *channel); +void efx_remove_channels(struct efx_nic *efx); +void efx_fini_channels(struct efx_nic *efx); +struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel); +void efx_start_channels(struct efx_nic *efx); +void efx_stop_channels(struct efx_nic *efx); + +void efx_init_napi_channel(struct efx_channel *channel); +void efx_init_napi(struct efx_nic *efx); +void efx_fini_napi_channel(struct efx_channel *channel); +void efx_fini_napi(struct efx_nic *efx); + +int efx_channel_dummy_op_int(struct efx_channel *channel); +void efx_channel_dummy_op_void(struct efx_channel *channel); + +#endif diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c new file mode 100644 index 000000000000..ab0ce62f81c1 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -0,0 +1,1102 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include <linux/module.h> +#include <linux/netdevice.h> +#include "efx_common.h" +#include "efx_channels.h" +#include "efx.h" +#include "mcdi.h" +#include "selftest.h" +#include "rx_common.h" +#include "tx_common.h" +#include "nic.h" +#include "io.h" +#include "mcdi_pcol.h" + +static unsigned int debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | + NETIF_MSG_LINK | NETIF_MSG_IFDOWN | + NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | + NETIF_MSG_TX_ERR | NETIF_MSG_HW); +module_param(debug, uint, 0); +MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); + +/* This is the time (in jiffies) between invocations of the hardware + * monitor. + * On Falcon-based NICs, this will: + * - Check the on-board hardware monitor; + * - Poll the link state and reconfigure the hardware as necessary. + * On Siena-based NICs for power systems with EEH support, this will give EEH a + * chance to start. + */ +static unsigned int efx_monitor_interval = 1 * HZ; + +/* How often and how many times to poll for a reset while waiting for a + * BIST that another function started to complete. + */ +#define BIST_WAIT_DELAY_MS 100 +#define BIST_WAIT_DELAY_COUNT 100 + +/* Default stats update time */ +#define STATS_PERIOD_MS_DEFAULT 1000 + +const unsigned int efx_reset_type_max = RESET_TYPE_MAX; +const char *const efx_reset_type_names[] = { + [RESET_TYPE_INVISIBLE] = "INVISIBLE", + [RESET_TYPE_ALL] = "ALL", + [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", + [RESET_TYPE_WORLD] = "WORLD", + [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", + [RESET_TYPE_DATAPATH] = "DATAPATH", + [RESET_TYPE_MC_BIST] = "MC_BIST", + [RESET_TYPE_DISABLE] = "DISABLE", + [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", + [RESET_TYPE_INT_ERROR] = "INT_ERROR", + [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", + [RESET_TYPE_TX_SKIP] = "TX_SKIP", + [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", + [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", +}; + +#define RESET_TYPE(type) \ + STRING_TABLE_LOOKUP(type, efx_reset_type) + +/* Loopback mode names (see LOOPBACK_MODE()) */ +const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; +const char *const efx_loopback_mode_names[] = { + [LOOPBACK_NONE] = "NONE", + [LOOPBACK_DATA] = "DATAPATH", + [LOOPBACK_GMAC] = "GMAC", + [LOOPBACK_XGMII] = "XGMII", + [LOOPBACK_XGXS] = "XGXS", + [LOOPBACK_XAUI] = "XAUI", + [LOOPBACK_GMII] = "GMII", + [LOOPBACK_SGMII] = "SGMII", + [LOOPBACK_XGBR] = "XGBR", + [LOOPBACK_XFI] = "XFI", + [LOOPBACK_XAUI_FAR] = "XAUI_FAR", + [LOOPBACK_GMII_FAR] = "GMII_FAR", + [LOOPBACK_SGMII_FAR] = "SGMII_FAR", + [LOOPBACK_XFI_FAR] = "XFI_FAR", + [LOOPBACK_GPHY] = "GPHY", + [LOOPBACK_PHYXS] = "PHYXS", + [LOOPBACK_PCS] = "PCS", + [LOOPBACK_PMAPMD] = "PMA/PMD", + [LOOPBACK_XPORT] = "XPORT", + [LOOPBACK_XGMII_WS] = "XGMII_WS", + [LOOPBACK_XAUI_WS] = "XAUI_WS", + [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR", + [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR", + [LOOPBACK_GMII_WS] = "GMII_WS", + [LOOPBACK_XFI_WS] = "XFI_WS", + [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR", + [LOOPBACK_PHYXS_WS] = "PHYXS_WS", +}; + +/* Reset workqueue. If any NIC has a hardware failure then a reset will be + * queued onto this work queue. This is not a per-nic work queue, because + * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. + */ +static struct workqueue_struct *reset_workqueue; + +int efx_create_reset_workqueue(void) +{ + reset_workqueue = create_singlethread_workqueue("sfc_reset"); + if (!reset_workqueue) { + printk(KERN_ERR "Failed to create reset workqueue\n"); + return -ENOMEM; + } + + return 0; +} + +void efx_queue_reset_work(struct efx_nic *efx) +{ + queue_work(reset_workqueue, &efx->reset_work); +} + +void efx_flush_reset_workqueue(struct efx_nic *efx) +{ + cancel_work_sync(&efx->reset_work); +} + +void efx_destroy_reset_workqueue(void) +{ + if (reset_workqueue) { + destroy_workqueue(reset_workqueue); + reset_workqueue = NULL; + } +} + +/* We assume that efx->type->reconfigure_mac will always try to sync RX + * filters and therefore needs to read-lock the filter table against freeing + */ +void efx_mac_reconfigure(struct efx_nic *efx) +{ + if (efx->type->reconfigure_mac) { + down_read(&efx->filter_sem); + efx->type->reconfigure_mac(efx); + up_read(&efx->filter_sem); + } +} + +/* Asynchronous work item for changing MAC promiscuity and multicast + * hash. Avoid a drain/rx_ingress enable by reconfiguring the current + * MAC directly. + */ +static void efx_mac_work(struct work_struct *data) +{ + struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); + + mutex_lock(&efx->mac_lock); + if (efx->port_enabled) + efx_mac_reconfigure(efx); + mutex_unlock(&efx->mac_lock); +} + +/* This ensures that the kernel is kept informed (via + * netif_carrier_on/off) of the link status, and also maintains the + * link status's stop on the port's TX queue. + */ +void efx_link_status_changed(struct efx_nic *efx) +{ + struct efx_link_state *link_state = &efx->link_state; + + /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure + * that no events are triggered between unregister_netdev() and the + * driver unloading. A more general condition is that NETDEV_CHANGE + * can only be generated between NETDEV_UP and NETDEV_DOWN + */ + if (!netif_running(efx->net_dev)) + return; + + if (link_state->up != netif_carrier_ok(efx->net_dev)) { + efx->n_link_state_changes++; + + if (link_state->up) + netif_carrier_on(efx->net_dev); + else + netif_carrier_off(efx->net_dev); + } + + /* Status message for kernel log */ + if (link_state->up) + netif_info(efx, link, efx->net_dev, + "link up at %uMbps %s-duplex (MTU %d)\n", + link_state->speed, link_state->fd ? "full" : "half", + efx->net_dev->mtu); + else + netif_info(efx, link, efx->net_dev, "link down\n"); +} + +unsigned int efx_xdp_max_mtu(struct efx_nic *efx) +{ + /* The maximum MTU that we can fit in a single page, allowing for + * framing, overhead and XDP headroom. + */ + int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + + efx->rx_prefix_size + efx->type->rx_buffer_padding + + efx->rx_ip_align + XDP_PACKET_HEADROOM; + + return PAGE_SIZE - overhead; +} + +/* Context: process, rtnl_lock() held. */ +int efx_change_mtu(struct net_device *net_dev, int new_mtu) +{ + struct efx_nic *efx = netdev_priv(net_dev); + int rc; + + rc = efx_check_disabled(efx); + if (rc) + return rc; + + if (rtnl_dereference(efx->xdp_prog) && + new_mtu > efx_xdp_max_mtu(efx)) { + netif_err(efx, drv, efx->net_dev, + "Requested MTU of %d too big for XDP (max: %d)\n", + new_mtu, efx_xdp_max_mtu(efx)); + return -EINVAL; + } + + netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); + + efx_device_detach_sync(efx); + efx_stop_all(efx); + + mutex_lock(&efx->mac_lock); + net_dev->mtu = new_mtu; + efx_mac_reconfigure(efx); + mutex_unlock(&efx->mac_lock); + + efx_start_all(efx); + efx_device_attach_if_not_resetting(efx); + return 0; +} + +/************************************************************************** + * + * Hardware monitor + * + **************************************************************************/ + +/* Run periodically off the general workqueue */ +static void efx_monitor(struct work_struct *data) +{ + struct efx_nic *efx = container_of(data, struct efx_nic, + monitor_work.work); + + netif_vdbg(efx, timer, efx->net_dev, + "hardware monitor executing on CPU %d\n", + raw_smp_processor_id()); + BUG_ON(efx->type->monitor == NULL); + + /* If the mac_lock is already held then it is likely a port + * reconfiguration is already in place, which will likely do + * most of the work of monitor() anyway. + */ + if (mutex_trylock(&efx->mac_lock)) { + if (efx->port_enabled && efx->type->monitor) + efx->type->monitor(efx); + mutex_unlock(&efx->mac_lock); + } + + efx_start_monitor(efx); +} + +void efx_start_monitor(struct efx_nic *efx) +{ + if (efx->type->monitor) + queue_delayed_work(efx->workqueue, &efx->monitor_work, + efx_monitor_interval); +} + +/************************************************************************** + * + * Event queue processing + * + *************************************************************************/ + +/* Channels are shutdown and reinitialised whilst the NIC is running + * to propagate configuration changes (mtu, checksum offload), or + * to clear hardware error conditions + */ +static void efx_start_datapath(struct efx_nic *efx) +{ + netdev_features_t old_features = efx->net_dev->features; + bool old_rx_scatter = efx->rx_scatter; + size_t rx_buf_len; + + /* Calculate the rx buffer allocation parameters required to + * support the current MTU, including padding for header + * alignment and overruns. + */ + efx->rx_dma_len = (efx->rx_prefix_size + + EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + + efx->type->rx_buffer_padding); + rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM + + efx->rx_ip_align + efx->rx_dma_len); + if (rx_buf_len <= PAGE_SIZE) { + efx->rx_scatter = efx->type->always_rx_scatter; + efx->rx_buffer_order = 0; + } else if (efx->type->can_rx_scatter) { + BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES); + BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + + 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE, + EFX_RX_BUF_ALIGNMENT) > + PAGE_SIZE); + efx->rx_scatter = true; + efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; + efx->rx_buffer_order = 0; + } else { + efx->rx_scatter = false; + efx->rx_buffer_order = get_order(rx_buf_len); + } + + efx_rx_config_page_split(efx); + if (efx->rx_buffer_order) + netif_dbg(efx, drv, efx->net_dev, + "RX buf len=%u; page order=%u batch=%u\n", + efx->rx_dma_len, efx->rx_buffer_order, + efx->rx_pages_per_batch); + else + netif_dbg(efx, drv, efx->net_dev, + "RX buf len=%u step=%u bpp=%u; page batch=%u\n", + efx->rx_dma_len, efx->rx_page_buf_step, + efx->rx_bufs_per_page, efx->rx_pages_per_batch); + + /* Restore previously fixed features in hw_features and remove + * features which are fixed now + */ + efx->net_dev->hw_features |= efx->net_dev->features; + efx->net_dev->hw_features &= ~efx->fixed_features; + efx->net_dev->features |= efx->fixed_features; + if (efx->net_dev->features != old_features) + netdev_features_change(efx->net_dev); + + /* RX filters may also have scatter-enabled flags */ + if ((efx->rx_scatter != old_rx_scatter) && + efx->type->filter_update_rx_scatter) + efx->type->filter_update_rx_scatter(efx); + + /* We must keep at least one descriptor in a TX ring empty. + * We could avoid this when the queue size does not exactly + * match the hardware ring size, but it's not that important. + * Therefore we stop the queue when one more skb might fill + * the ring completely. We wake it when half way back to + * empty. + */ + efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); + efx->txq_wake_thresh = efx->txq_stop_thresh / 2; + + /* Initialise the channels */ + efx_start_channels(efx); + + efx_ptp_start_datapath(efx); + + if (netif_device_present(efx->net_dev)) + netif_tx_wake_all_queues(efx->net_dev); +} + +static void efx_stop_datapath(struct efx_nic *efx) +{ + EFX_ASSERT_RESET_SERIALISED(efx); + BUG_ON(efx->port_enabled); + + efx_ptp_stop_datapath(efx); + + efx_stop_channels(efx); +} + +/************************************************************************** + * + * Port handling + * + **************************************************************************/ + +static void efx_start_port(struct efx_nic *efx) +{ + netif_dbg(efx, ifup, efx->net_dev, "start port\n"); + BUG_ON(efx->port_enabled); + + mutex_lock(&efx->mac_lock); + efx->port_enabled = true; + + /* Ensure MAC ingress/egress is enabled */ + efx_mac_reconfigure(efx); + + mutex_unlock(&efx->mac_lock); +} + +/* Cancel work for MAC reconfiguration, periodic hardware monitoring + * and the async self-test, wait for them to finish and prevent them + * being scheduled again. This doesn't cover online resets, which + * should only be cancelled when removing the device. + */ +static void efx_stop_port(struct efx_nic *efx) +{ + netif_dbg(efx, ifdown, efx->net_dev, "stop port\n"); + + EFX_ASSERT_RESET_SERIALISED(efx); + + mutex_lock(&efx->mac_lock); + efx->port_enabled = false; + mutex_unlock(&efx->mac_lock); + + /* Serialise against efx_set_multicast_list() */ + netif_addr_lock_bh(efx->net_dev); + netif_addr_unlock_bh(efx->net_dev); + + cancel_delayed_work_sync(&efx->monitor_work); + efx_selftest_async_cancel(efx); + cancel_work_sync(&efx->mac_work); +} + +/* If the interface is supposed to be running but is not, start + * the hardware and software data path, regular activity for the port + * (MAC statistics, link polling, etc.) and schedule the port to be + * reconfigured. Interrupts must already be enabled. This function + * is safe to call multiple times, so long as the NIC is not disabled. + * Requires the RTNL lock. + */ +void efx_start_all(struct efx_nic *efx) +{ + EFX_ASSERT_RESET_SERIALISED(efx); + BUG_ON(efx->state == STATE_DISABLED); + + /* Check that it is appropriate to restart the interface. All + * of these flags are safe to read under just the rtnl lock + */ + if (efx->port_enabled || !netif_running(efx->net_dev) || + efx->reset_pending) + return; + + efx_start_port(efx); + efx_start_datapath(efx); + + /* Start the hardware monitor if there is one */ + efx_start_monitor(efx); + + /* Link state detection is normally event-driven; we have + * to poll now because we could have missed a change + */ + mutex_lock(&efx->mac_lock); + if (efx->phy_op->poll(efx)) + efx_link_status_changed(efx); + mutex_unlock(&efx->mac_lock); + + if (efx->type->start_stats) { + efx->type->start_stats(efx); + efx->type->pull_stats(efx); + spin_lock_bh(&efx->stats_lock); + efx->type->update_stats(efx, NULL, NULL); + spin_unlock_bh(&efx->stats_lock); + } +} + +/* Quiesce the hardware and software data path, and regular activity + * for the port without bringing the link down. Safe to call multiple + * times with the NIC in almost any state, but interrupts should be + * enabled. Requires the RTNL lock. + */ +void efx_stop_all(struct efx_nic *efx) +{ + EFX_ASSERT_RESET_SERIALISED(efx); + + /* port_enabled can be read safely under the rtnl lock */ + if (!efx->port_enabled) + return; + + if (efx->type->update_stats) { + /* update stats before we go down so we can accurately count + * rx_nodesc_drops + */ + efx->type->pull_stats(efx); + spin_lock_bh(&efx->stats_lock); + efx->type->update_stats(efx, NULL, NULL); + spin_unlock_bh(&efx->stats_lock); + efx->type->stop_stats(efx); + } + + efx_stop_port(efx); + + /* Stop the kernel transmit interface. This is only valid if + * the device is stopped or detached; otherwise the watchdog + * may fire immediately. + */ + WARN_ON(netif_running(efx->net_dev) && + netif_device_present(efx->net_dev)); + netif_tx_disable(efx->net_dev); + + efx_stop_datapath(efx); +} + +/* Context: process, dev_base_lock or RTNL held, non-blocking. */ +void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + spin_lock_bh(&efx->stats_lock); + efx->type->update_stats(efx, NULL, stats); + spin_unlock_bh(&efx->stats_lock); +} + +/* Push loopback/power/transmit disable settings to the PHY, and reconfigure + * the MAC appropriately. All other PHY configuration changes are pushed + * through phy_op->set_settings(), and pushed asynchronously to the MAC + * through efx_monitor(). + * + * Callers must hold the mac_lock + */ +int __efx_reconfigure_port(struct efx_nic *efx) +{ + enum efx_phy_mode phy_mode; + int rc = 0; + + WARN_ON(!mutex_is_locked(&efx->mac_lock)); + + /* Disable PHY transmit in mac level loopbacks */ + phy_mode = efx->phy_mode; + if (LOOPBACK_INTERNAL(efx)) + efx->phy_mode |= PHY_MODE_TX_DISABLED; + else + efx->phy_mode &= ~PHY_MODE_TX_DISABLED; + + if (efx->type->reconfigure_port) + rc = efx->type->reconfigure_port(efx); + + if (rc) + efx->phy_mode = phy_mode; + + return rc; +} + +/* Reinitialise the MAC to pick up new PHY settings, even if the port is + * disabled. + */ +int efx_reconfigure_port(struct efx_nic *efx) +{ + int rc; + + EFX_ASSERT_RESET_SERIALISED(efx); + + mutex_lock(&efx->mac_lock); + rc = __efx_reconfigure_port(efx); + mutex_unlock(&efx->mac_lock); + + return rc; +} + +/************************************************************************** + * + * Device reset and suspend + * + **************************************************************************/ + +static void efx_wait_for_bist_end(struct efx_nic *efx) +{ + int i; + + for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) { + if (efx_mcdi_poll_reboot(efx)) + goto out; + msleep(BIST_WAIT_DELAY_MS); + } + + netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n"); +out: + /* Either way unset the BIST flag. If we found no reboot we probably + * won't recover, but we should try. + */ + efx->mc_bist_for_other_fn = false; +} + +/* Try recovery mechanisms. + * For now only EEH is supported. + * Returns 0 if the recovery mechanisms are unsuccessful. + * Returns a non-zero value otherwise. + */ +int efx_try_recovery(struct efx_nic *efx) +{ +#ifdef CONFIG_EEH + /* A PCI error can occur and not be seen by EEH because nothing + * happens on the PCI bus. In this case the driver may fail and + * schedule a 'recover or reset', leading to this recovery handler. + * Manually call the eeh failure check function. + */ + struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev); + if (eeh_dev_check_failure(eehdev)) { + /* The EEH mechanisms will handle the error and reset the + * device if necessary. + */ + return 1; + } +#endif + return 0; +} + +/* Tears down the entire software state and most of the hardware state + * before reset. + */ +void efx_reset_down(struct efx_nic *efx, enum reset_type method) +{ + EFX_ASSERT_RESET_SERIALISED(efx); + + if (method == RESET_TYPE_MCDI_TIMEOUT) + efx->type->prepare_flr(efx); + + efx_stop_all(efx); + efx_disable_interrupts(efx); + + mutex_lock(&efx->mac_lock); + down_write(&efx->filter_sem); + mutex_lock(&efx->rss_lock); + if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && + method != RESET_TYPE_DATAPATH) + efx->phy_op->fini(efx); + efx->type->fini(efx); +} + +/* This function will always ensure that the locks acquired in + * efx_reset_down() are released. A failure return code indicates + * that we were unable to reinitialise the hardware, and the + * driver should be disabled. If ok is false, then the rx and tx + * engines are not restarted, pending a RESET_DISABLE. + */ +int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) +{ + int rc; + + EFX_ASSERT_RESET_SERIALISED(efx); + + if (method == RESET_TYPE_MCDI_TIMEOUT) + efx->type->finish_flr(efx); + + /* Ensure that SRAM is initialised even if we're disabling the device */ + rc = efx->type->init(efx); + if (rc) { + netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); + goto fail; + } + + if (!ok) + goto fail; + + if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && + method != RESET_TYPE_DATAPATH) { + rc = efx->phy_op->init(efx); + if (rc) + goto fail; + rc = efx->phy_op->reconfigure(efx); + if (rc && rc != -EPERM) + netif_err(efx, drv, efx->net_dev, + "could not restore PHY settings\n"); + } + + rc = efx_enable_interrupts(efx); + if (rc) + goto fail; + +#ifdef CONFIG_SFC_SRIOV + rc = efx->type->vswitching_restore(efx); + if (rc) /* not fatal; the PF will still work fine */ + netif_warn(efx, probe, efx->net_dev, + "failed to restore vswitching rc=%d;" + " VFs may not function\n", rc); +#endif + + if (efx->type->rx_restore_rss_contexts) + efx->type->rx_restore_rss_contexts(efx); + mutex_unlock(&efx->rss_lock); + efx->type->filter_table_restore(efx); + up_write(&efx->filter_sem); + if (efx->type->sriov_reset) + efx->type->sriov_reset(efx); + + mutex_unlock(&efx->mac_lock); + + efx_start_all(efx); + + if (efx->type->udp_tnl_push_ports) + efx->type->udp_tnl_push_ports(efx); + + return 0; + +fail: + efx->port_initialized = false; + + mutex_unlock(&efx->rss_lock); + up_write(&efx->filter_sem); + mutex_unlock(&efx->mac_lock); + + return rc; +} + +/* Reset the NIC using the specified method. Note that the reset may + * fail, in which case the card will be left in an unusable state. + * + * Caller must hold the rtnl_lock. + */ +int efx_reset(struct efx_nic *efx, enum reset_type method) +{ + bool disabled; + int rc, rc2; + + netif_info(efx, drv, efx->net_dev, "resetting (%s)\n", + RESET_TYPE(method)); + + efx_device_detach_sync(efx); + efx_reset_down(efx, method); + + rc = efx->type->reset(efx, method); + if (rc) { + netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); + goto out; + } + + /* Clear flags for the scopes we covered. We assume the NIC and + * driver are now quiescent so that there is no race here. + */ + if (method < RESET_TYPE_MAX_METHOD) + efx->reset_pending &= -(1 << (method + 1)); + else /* it doesn't fit into the well-ordered scope hierarchy */ + __clear_bit(method, &efx->reset_pending); + + /* Reinitialise bus-mastering, which may have been turned off before + * the reset was scheduled. This is still appropriate, even in the + * RESET_TYPE_DISABLE since this driver generally assumes the hardware + * can respond to requests. + */ + pci_set_master(efx->pci_dev); + +out: + /* Leave device stopped if necessary */ + disabled = rc || + method == RESET_TYPE_DISABLE || + method == RESET_TYPE_RECOVER_OR_DISABLE; + rc2 = efx_reset_up(efx, method, !disabled); + if (rc2) { + disabled = true; + if (!rc) + rc = rc2; + } + + if (disabled) { + dev_close(efx->net_dev); + netif_err(efx, drv, efx->net_dev, "has been disabled\n"); + efx->state = STATE_DISABLED; + } else { + netif_dbg(efx, drv, efx->net_dev, "reset complete\n"); + efx_device_attach_if_not_resetting(efx); + } + return rc; +} + +/* The worker thread exists so that code that cannot sleep can + * schedule a reset for later. + */ +static void efx_reset_work(struct work_struct *data) +{ + struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); + unsigned long pending; + enum reset_type method; + + pending = READ_ONCE(efx->reset_pending); + method = fls(pending) - 1; + + if (method == RESET_TYPE_MC_BIST) + efx_wait_for_bist_end(efx); + + if ((method == RESET_TYPE_RECOVER_OR_DISABLE || + method == RESET_TYPE_RECOVER_OR_ALL) && + efx_try_recovery(efx)) + return; + + if (!pending) + return; + + rtnl_lock(); + + /* We checked the state in efx_schedule_reset() but it may + * have changed by now. Now that we have the RTNL lock, + * it cannot change again. + */ + if (efx->state == STATE_READY) + (void)efx_reset(efx, method); + + rtnl_unlock(); +} + +void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) +{ + enum reset_type method; + + if (efx->state == STATE_RECOVERY) { + netif_dbg(efx, drv, efx->net_dev, + "recovering: skip scheduling %s reset\n", + RESET_TYPE(type)); + return; + } + + switch (type) { + case RESET_TYPE_INVISIBLE: + case RESET_TYPE_ALL: + case RESET_TYPE_RECOVER_OR_ALL: + case RESET_TYPE_WORLD: + case RESET_TYPE_DISABLE: + case RESET_TYPE_RECOVER_OR_DISABLE: + case RESET_TYPE_DATAPATH: + case RESET_TYPE_MC_BIST: + case RESET_TYPE_MCDI_TIMEOUT: + method = type; + netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", + RESET_TYPE(method)); + break; + default: + method = efx->type->map_reset_reason(type); + netif_dbg(efx, drv, efx->net_dev, + "scheduling %s reset for %s\n", + RESET_TYPE(method), RESET_TYPE(type)); + break; + } + + set_bit(method, &efx->reset_pending); + smp_mb(); /* ensure we change reset_pending before checking state */ + + /* If we're not READY then just leave the flags set as the cue + * to abort probing or reschedule the reset later. + */ + if (READ_ONCE(efx->state) != STATE_READY) + return; + + /* efx_process_channel() will no longer read events once a + * reset is scheduled. So switch back to poll'd MCDI completions. + */ + efx_mcdi_mode_poll(efx); + + efx_queue_reset_work(efx); +} + +/************************************************************************** + * + * Dummy PHY/MAC operations + * + * Can be used for some unimplemented operations + * Needed so all function pointers are valid and do not have to be tested + * before use + * + **************************************************************************/ +int efx_port_dummy_op_int(struct efx_nic *efx) +{ + return 0; +} +void efx_port_dummy_op_void(struct efx_nic *efx) {} + +static bool efx_port_dummy_op_poll(struct efx_nic *efx) +{ + return false; +} + +static const struct efx_phy_operations efx_dummy_phy_operations = { + .init = efx_port_dummy_op_int, + .reconfigure = efx_port_dummy_op_int, + .poll = efx_port_dummy_op_poll, + .fini = efx_port_dummy_op_void, +}; + +/************************************************************************** + * + * Data housekeeping + * + **************************************************************************/ + +/* This zeroes out and then fills in the invariants in a struct + * efx_nic (including all sub-structures). + */ +int efx_init_struct(struct efx_nic *efx, + struct pci_dev *pci_dev, struct net_device *net_dev) +{ + int rc = -ENOMEM; + + /* Initialise common structures */ + INIT_LIST_HEAD(&efx->node); + INIT_LIST_HEAD(&efx->secondary_list); + spin_lock_init(&efx->biu_lock); +#ifdef CONFIG_SFC_MTD + INIT_LIST_HEAD(&efx->mtd_list); +#endif + INIT_WORK(&efx->reset_work, efx_reset_work); + INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); + efx_selftest_async_init(efx); + efx->pci_dev = pci_dev; + efx->msg_enable = debug; + efx->state = STATE_UNINIT; + strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); + + efx->net_dev = net_dev; + efx->rx_prefix_size = efx->type->rx_prefix_size; + efx->rx_ip_align = + NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0; + efx->rx_packet_hash_offset = + efx->type->rx_hash_offset - efx->type->rx_prefix_size; + efx->rx_packet_ts_offset = + efx->type->rx_ts_offset - efx->type->rx_prefix_size; + INIT_LIST_HEAD(&efx->rss_context.list); + mutex_init(&efx->rss_lock); + spin_lock_init(&efx->stats_lock); + efx->vi_stride = EFX_DEFAULT_VI_STRIDE; + efx->num_mac_stats = MC_CMD_MAC_NSTATS; + BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); + mutex_init(&efx->mac_lock); +#ifdef CONFIG_RFS_ACCEL + mutex_init(&efx->rps_mutex); + spin_lock_init(&efx->rps_hash_lock); + /* Failure to allocate is not fatal, but may degrade ARFS performance */ + efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, + sizeof(*efx->rps_hash_table), GFP_KERNEL); +#endif + efx->phy_op = &efx_dummy_phy_operations; + efx->mdio.dev = net_dev; + INIT_WORK(&efx->mac_work, efx_mac_work); + init_waitqueue_head(&efx->flush_wq); + + rc = efx_init_channels(efx); + if (rc) + goto fail; + + /* Would be good to use the net_dev name, but we're too early */ + snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", + pci_name(pci_dev)); + efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); + if (!efx->workqueue) { + rc = -ENOMEM; + goto fail; + } + + return 0; + +fail: + efx_fini_struct(efx); + return rc; +} + +void efx_fini_struct(struct efx_nic *efx) +{ +#ifdef CONFIG_RFS_ACCEL + kfree(efx->rps_hash_table); +#endif + + efx_fini_channels(efx); + + kfree(efx->vpd_sn); + + if (efx->workqueue) { + destroy_workqueue(efx->workqueue); + efx->workqueue = NULL; + } +} + +/* This configures the PCI device to enable I/O and DMA. */ +int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, + unsigned int mem_map_size) +{ + struct pci_dev *pci_dev = efx->pci_dev; + int rc; + + netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); + + rc = pci_enable_device(pci_dev); + if (rc) { + netif_err(efx, probe, efx->net_dev, + "failed to enable PCI device\n"); + goto fail1; + } + + pci_set_master(pci_dev); + + /* Set the PCI DMA mask. Try all possibilities from our + * genuine mask down to 32 bits, because some architectures + * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit + * masks event though they reject 46 bit masks. + */ + while (dma_mask > 0x7fffffffUL) { + rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask); + if (rc == 0) + break; + dma_mask >>= 1; + } + if (rc) { + netif_err(efx, probe, efx->net_dev, + "could not find a suitable DMA mask\n"); + goto fail2; + } + netif_dbg(efx, probe, efx->net_dev, + "using DMA mask %llx\n", (unsigned long long)dma_mask); + + efx->membase_phys = pci_resource_start(efx->pci_dev, bar); + if (!efx->membase_phys) { + netif_err(efx, probe, efx->net_dev, + "ERROR: No BAR%d mapping from the BIOS. " + "Try pci=realloc on the kernel command line\n", bar); + rc = -ENODEV; + goto fail3; + } + + rc = pci_request_region(pci_dev, bar, "sfc"); + if (rc) { + netif_err(efx, probe, efx->net_dev, + "request for memory BAR failed\n"); + rc = -EIO; + goto fail3; + } + + efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size); + if (!efx->membase) { + netif_err(efx, probe, efx->net_dev, + "could not map memory BAR at %llx+%x\n", + (unsigned long long)efx->membase_phys, mem_map_size); + rc = -ENOMEM; + goto fail4; + } + netif_dbg(efx, probe, efx->net_dev, + "memory BAR at %llx+%x (virtual %p)\n", + (unsigned long long)efx->membase_phys, mem_map_size, + efx->membase); + + return 0; + +fail4: + pci_release_region(efx->pci_dev, bar); +fail3: + efx->membase_phys = 0; +fail2: + pci_disable_device(efx->pci_dev); +fail1: + return rc; +} + +void efx_fini_io(struct efx_nic *efx, int bar) +{ + netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); + + if (efx->membase) { + iounmap(efx->membase); + efx->membase = NULL; + } + + if (efx->membase_phys) { + pci_release_region(efx->pci_dev, bar); + efx->membase_phys = 0; + } + + /* Don't disable bus-mastering if VFs are assigned */ + if (!pci_vfs_assigned(efx->pci_dev)) + pci_disable_device(efx->pci_dev); +} + +#ifdef CONFIG_SFC_MCDI_LOGGING +static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct efx_nic *efx = dev_get_drvdata(dev); + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + + return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled); +} + +static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct efx_nic *efx = dev_get_drvdata(dev); + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + bool enable = count > 0 && *buf != '0'; + + mcdi->logging_enabled = enable; + return count; +} + +static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log); + +void efx_init_mcdi_logging(struct efx_nic *efx) +{ + int rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); + + if (rc) { + netif_warn(efx, drv, efx->net_dev, + "failed to init net dev attributes\n"); + } +} + +void efx_fini_mcdi_logging(struct efx_nic *efx) +{ + device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); +} +#endif diff --git a/drivers/net/ethernet/sfc/efx_common.h b/drivers/net/ethernet/sfc/efx_common.h new file mode 100644 index 000000000000..fa2fc681e7f9 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_common.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_COMMON_H +#define EFX_COMMON_H + +int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, + unsigned int mem_map_size); +void efx_fini_io(struct efx_nic *efx, int bar); +int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev, + struct net_device *net_dev); +void efx_fini_struct(struct efx_nic *efx); + +void efx_start_all(struct efx_nic *efx); +void efx_stop_all(struct efx_nic *efx); + +void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats); + +int efx_create_reset_workqueue(void); +void efx_queue_reset_work(struct efx_nic *efx); +void efx_flush_reset_workqueue(struct efx_nic *efx); +void efx_destroy_reset_workqueue(void); + +void efx_start_monitor(struct efx_nic *efx); + +int __efx_reconfigure_port(struct efx_nic *efx); +int efx_reconfigure_port(struct efx_nic *efx); + +#define EFX_ASSERT_RESET_SERIALISED(efx) \ + do { \ + if ((efx->state == STATE_READY) || \ + (efx->state == STATE_RECOVERY) || \ + (efx->state == STATE_DISABLED)) \ + ASSERT_RTNL(); \ + } while (0) + +int efx_try_recovery(struct efx_nic *efx); +void efx_reset_down(struct efx_nic *efx, enum reset_type method); +int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok); +int efx_reset(struct efx_nic *efx, enum reset_type method); +void efx_schedule_reset(struct efx_nic *efx, enum reset_type type); + +static inline int efx_check_disabled(struct efx_nic *efx) +{ + if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) { + netif_err(efx, drv, efx->net_dev, + "device is disabled due to earlier errors\n"); + return -EIO; + } + return 0; +} + +#ifdef CONFIG_SFC_MCDI_LOGGING +void efx_init_mcdi_logging(struct efx_nic *efx); +void efx_fini_mcdi_logging(struct efx_nic *efx); +#else +static inline void efx_init_mcdi_logging(struct efx_nic *efx) {} +static inline void efx_fini_mcdi_logging(struct efx_nic *efx) {} +#endif + +void efx_mac_reconfigure(struct efx_nic *efx); +void efx_link_status_changed(struct efx_nic *efx); +unsigned int efx_xdp_max_mtu(struct efx_nic *efx); +int efx_change_mtu(struct net_device *net_dev, int new_mtu); + +#endif diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index b31032da4bcb..993b5769525b 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -13,92 +13,13 @@ #include "workarounds.h" #include "selftest.h" #include "efx.h" +#include "efx_channels.h" +#include "rx_common.h" +#include "tx_common.h" +#include "ethtool_common.h" #include "filter.h" #include "nic.h" -struct efx_sw_stat_desc { - const char *name; - enum { - EFX_ETHTOOL_STAT_SOURCE_nic, - EFX_ETHTOOL_STAT_SOURCE_channel, - EFX_ETHTOOL_STAT_SOURCE_tx_queue - } source; - unsigned offset; - u64(*get_stat) (void *field); /* Reader function */ -}; - -/* Initialiser for a struct efx_sw_stat_desc with type-checking */ -#define EFX_ETHTOOL_STAT(stat_name, source_name, field, field_type, \ - get_stat_function) { \ - .name = #stat_name, \ - .source = EFX_ETHTOOL_STAT_SOURCE_##source_name, \ - .offset = ((((field_type *) 0) == \ - &((struct efx_##source_name *)0)->field) ? \ - offsetof(struct efx_##source_name, field) : \ - offsetof(struct efx_##source_name, field)), \ - .get_stat = get_stat_function, \ -} - -static u64 efx_get_uint_stat(void *field) -{ - return *(unsigned int *)field; -} - -static u64 efx_get_atomic_stat(void *field) -{ - return atomic_read((atomic_t *) field); -} - -#define EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field) \ - EFX_ETHTOOL_STAT(field, nic, field, \ - atomic_t, efx_get_atomic_stat) - -#define EFX_ETHTOOL_UINT_CHANNEL_STAT(field) \ - EFX_ETHTOOL_STAT(field, channel, n_##field, \ - unsigned int, efx_get_uint_stat) -#define EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(field) \ - EFX_ETHTOOL_STAT(field, channel, field, \ - unsigned int, efx_get_uint_stat) - -#define EFX_ETHTOOL_UINT_TXQ_STAT(field) \ - EFX_ETHTOOL_STAT(tx_##field, tx_queue, field, \ - unsigned int, efx_get_uint_stat) - -static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { - EFX_ETHTOOL_UINT_TXQ_STAT(merge_events), - EFX_ETHTOOL_UINT_TXQ_STAT(tso_bursts), - EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers), - EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets), - EFX_ETHTOOL_UINT_TXQ_STAT(tso_fallbacks), - EFX_ETHTOOL_UINT_TXQ_STAT(pushes), - EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets), - EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets), - EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_ip_hdr_chksum_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_tcp_udp_chksum_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_ip_hdr_chksum_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_tcp_udp_chksum_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_eth_crc_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect), -#ifdef CONFIG_RFS_ACCEL - EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(rfs_filter_count), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_succeeded), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_failed), -#endif -}; - -#define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc) - #define EFX_ETHTOOL_EEPROM_MAGIC 0xEFAB /************************************************************************** @@ -185,18 +106,6 @@ efx_ethtool_set_link_ksettings(struct net_device *net_dev, return rc; } -static void efx_ethtool_get_drvinfo(struct net_device *net_dev, - struct ethtool_drvinfo *info) -{ - struct efx_nic *efx = netdev_priv(net_dev); - - strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); - strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version)); - efx_mcdi_print_fwver(efx, info->fw_version, - sizeof(info->fw_version)); - strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info)); -} - static int efx_ethtool_get_regs_len(struct net_device *net_dev) { return efx_nic_get_regs_len(netdev_priv(net_dev)); @@ -211,341 +120,6 @@ static void efx_ethtool_get_regs(struct net_device *net_dev, efx_nic_get_regs(efx, buf); } -static u32 efx_ethtool_get_msglevel(struct net_device *net_dev) -{ - struct efx_nic *efx = netdev_priv(net_dev); - return efx->msg_enable; -} - -static void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable) -{ - struct efx_nic *efx = netdev_priv(net_dev); - efx->msg_enable = msg_enable; -} - -/** - * efx_fill_test - fill in an individual self-test entry - * @test_index: Index of the test - * @strings: Ethtool strings, or %NULL - * @data: Ethtool test results, or %NULL - * @test: Pointer to test result (used only if data != %NULL) - * @unit_format: Unit name format (e.g. "chan\%d") - * @unit_id: Unit id (e.g. 0 for "chan0") - * @test_format: Test name format (e.g. "loopback.\%s.tx.sent") - * @test_id: Test id (e.g. "PHYXS" for "loopback.PHYXS.tx_sent") - * - * Fill in an individual self-test entry. - */ -static void efx_fill_test(unsigned int test_index, u8 *strings, u64 *data, - int *test, const char *unit_format, int unit_id, - const char *test_format, const char *test_id) -{ - char unit_str[ETH_GSTRING_LEN], test_str[ETH_GSTRING_LEN]; - - /* Fill data value, if applicable */ - if (data) - data[test_index] = *test; - - /* Fill string, if applicable */ - if (strings) { - if (strchr(unit_format, '%')) - snprintf(unit_str, sizeof(unit_str), - unit_format, unit_id); - else - strcpy(unit_str, unit_format); - snprintf(test_str, sizeof(test_str), test_format, test_id); - snprintf(strings + test_index * ETH_GSTRING_LEN, - ETH_GSTRING_LEN, - "%-6s %-24s", unit_str, test_str); - } -} - -#define EFX_CHANNEL_NAME(_channel) "chan%d", _channel->channel -#define EFX_TX_QUEUE_NAME(_tx_queue) "txq%d", _tx_queue->queue -#define EFX_RX_QUEUE_NAME(_rx_queue) "rxq%d", _rx_queue->queue -#define EFX_LOOPBACK_NAME(_mode, _counter) \ - "loopback.%s." _counter, STRING_TABLE_LOOKUP(_mode, efx_loopback_mode) - -/** - * efx_fill_loopback_test - fill in a block of loopback self-test entries - * @efx: Efx NIC - * @lb_tests: Efx loopback self-test results structure - * @mode: Loopback test mode - * @test_index: Starting index of the test - * @strings: Ethtool strings, or %NULL - * @data: Ethtool test results, or %NULL - * - * Fill in a block of loopback self-test entries. Return new test - * index. - */ -static int efx_fill_loopback_test(struct efx_nic *efx, - struct efx_loopback_self_tests *lb_tests, - enum efx_loopback_mode mode, - unsigned int test_index, - u8 *strings, u64 *data) -{ - struct efx_channel *channel = - efx_get_channel(efx, efx->tx_channel_offset); - struct efx_tx_queue *tx_queue; - - efx_for_each_channel_tx_queue(tx_queue, channel) { - efx_fill_test(test_index++, strings, data, - &lb_tests->tx_sent[tx_queue->queue], - EFX_TX_QUEUE_NAME(tx_queue), - EFX_LOOPBACK_NAME(mode, "tx_sent")); - efx_fill_test(test_index++, strings, data, - &lb_tests->tx_done[tx_queue->queue], - EFX_TX_QUEUE_NAME(tx_queue), - EFX_LOOPBACK_NAME(mode, "tx_done")); - } - efx_fill_test(test_index++, strings, data, - &lb_tests->rx_good, - "rx", 0, - EFX_LOOPBACK_NAME(mode, "rx_good")); - efx_fill_test(test_index++, strings, data, - &lb_tests->rx_bad, - "rx", 0, - EFX_LOOPBACK_NAME(mode, "rx_bad")); - - return test_index; -} - -/** - * efx_ethtool_fill_self_tests - get self-test details - * @efx: Efx NIC - * @tests: Efx self-test results structure, or %NULL - * @strings: Ethtool strings, or %NULL - * @data: Ethtool test results, or %NULL - * - * Get self-test number of strings, strings, and/or test results. - * Return number of strings (== number of test results). - * - * The reason for merging these three functions is to make sure that - * they can never be inconsistent. - */ -static int efx_ethtool_fill_self_tests(struct efx_nic *efx, - struct efx_self_tests *tests, - u8 *strings, u64 *data) -{ - struct efx_channel *channel; - unsigned int n = 0, i; - enum efx_loopback_mode mode; - - efx_fill_test(n++, strings, data, &tests->phy_alive, - "phy", 0, "alive", NULL); - efx_fill_test(n++, strings, data, &tests->nvram, - "core", 0, "nvram", NULL); - efx_fill_test(n++, strings, data, &tests->interrupt, - "core", 0, "interrupt", NULL); - - /* Event queues */ - efx_for_each_channel(channel, efx) { - efx_fill_test(n++, strings, data, - &tests->eventq_dma[channel->channel], - EFX_CHANNEL_NAME(channel), - "eventq.dma", NULL); - efx_fill_test(n++, strings, data, - &tests->eventq_int[channel->channel], - EFX_CHANNEL_NAME(channel), - "eventq.int", NULL); - } - - efx_fill_test(n++, strings, data, &tests->memory, - "core", 0, "memory", NULL); - efx_fill_test(n++, strings, data, &tests->registers, - "core", 0, "registers", NULL); - - if (efx->phy_op->run_tests != NULL) { - EFX_WARN_ON_PARANOID(efx->phy_op->test_name == NULL); - - for (i = 0; true; ++i) { - const char *name; - - EFX_WARN_ON_PARANOID(i >= EFX_MAX_PHY_TESTS); - name = efx->phy_op->test_name(efx, i); - if (name == NULL) - break; - - efx_fill_test(n++, strings, data, &tests->phy_ext[i], - "phy", 0, name, NULL); - } - } - - /* Loopback tests */ - for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) { - if (!(efx->loopback_modes & (1 << mode))) - continue; - n = efx_fill_loopback_test(efx, - &tests->loopback[mode], mode, n, - strings, data); - } - - return n; -} - -static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings) -{ - size_t n_stats = 0; - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) { - if (efx_channel_has_tx_queues(channel)) { - n_stats++; - if (strings != NULL) { - snprintf(strings, ETH_GSTRING_LEN, - "tx-%u.tx_packets", - channel->tx_queue[0].queue / - EFX_TXQ_TYPES); - - strings += ETH_GSTRING_LEN; - } - } - } - efx_for_each_channel(channel, efx) { - if (efx_channel_has_rx_queue(channel)) { - n_stats++; - if (strings != NULL) { - snprintf(strings, ETH_GSTRING_LEN, - "rx-%d.rx_packets", channel->channel); - strings += ETH_GSTRING_LEN; - } - } - } - if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) { - unsigned short xdp; - - for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) { - n_stats++; - if (strings) { - snprintf(strings, ETH_GSTRING_LEN, - "tx-xdp-cpu-%hu.tx_packets", xdp); - strings += ETH_GSTRING_LEN; - } - } - } - - return n_stats; -} - -static int efx_ethtool_get_sset_count(struct net_device *net_dev, - int string_set) -{ - struct efx_nic *efx = netdev_priv(net_dev); - - switch (string_set) { - case ETH_SS_STATS: - return efx->type->describe_stats(efx, NULL) + - EFX_ETHTOOL_SW_STAT_COUNT + - efx_describe_per_queue_stats(efx, NULL) + - efx_ptp_describe_stats(efx, NULL); - case ETH_SS_TEST: - return efx_ethtool_fill_self_tests(efx, NULL, NULL, NULL); - default: - return -EINVAL; - } -} - -static void efx_ethtool_get_strings(struct net_device *net_dev, - u32 string_set, u8 *strings) -{ - struct efx_nic *efx = netdev_priv(net_dev); - int i; - - switch (string_set) { - case ETH_SS_STATS: - strings += (efx->type->describe_stats(efx, strings) * - ETH_GSTRING_LEN); - for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) - strlcpy(strings + i * ETH_GSTRING_LEN, - efx_sw_stat_desc[i].name, ETH_GSTRING_LEN); - strings += EFX_ETHTOOL_SW_STAT_COUNT * ETH_GSTRING_LEN; - strings += (efx_describe_per_queue_stats(efx, strings) * - ETH_GSTRING_LEN); - efx_ptp_describe_stats(efx, strings); - break; - case ETH_SS_TEST: - efx_ethtool_fill_self_tests(efx, NULL, strings, NULL); - break; - default: - /* No other string sets */ - break; - } -} - -static void efx_ethtool_get_stats(struct net_device *net_dev, - struct ethtool_stats *stats, - u64 *data) -{ - struct efx_nic *efx = netdev_priv(net_dev); - const struct efx_sw_stat_desc *stat; - struct efx_channel *channel; - struct efx_tx_queue *tx_queue; - struct efx_rx_queue *rx_queue; - int i; - - spin_lock_bh(&efx->stats_lock); - - /* Get NIC statistics */ - data += efx->type->update_stats(efx, data, NULL); - - /* Get software statistics */ - for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) { - stat = &efx_sw_stat_desc[i]; - switch (stat->source) { - case EFX_ETHTOOL_STAT_SOURCE_nic: - data[i] = stat->get_stat((void *)efx + stat->offset); - break; - case EFX_ETHTOOL_STAT_SOURCE_channel: - data[i] = 0; - efx_for_each_channel(channel, efx) - data[i] += stat->get_stat((void *)channel + - stat->offset); - break; - case EFX_ETHTOOL_STAT_SOURCE_tx_queue: - data[i] = 0; - efx_for_each_channel(channel, efx) { - efx_for_each_channel_tx_queue(tx_queue, channel) - data[i] += - stat->get_stat((void *)tx_queue - + stat->offset); - } - break; - } - } - data += EFX_ETHTOOL_SW_STAT_COUNT; - - spin_unlock_bh(&efx->stats_lock); - - efx_for_each_channel(channel, efx) { - if (efx_channel_has_tx_queues(channel)) { - *data = 0; - efx_for_each_channel_tx_queue(tx_queue, channel) { - *data += tx_queue->tx_packets; - } - data++; - } - } - efx_for_each_channel(channel, efx) { - if (efx_channel_has_rx_queue(channel)) { - *data = 0; - efx_for_each_channel_rx_queue(rx_queue, channel) { - *data += rx_queue->rx_packets; - } - data++; - } - } - if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) { - int xdp; - - for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) { - data[0] = efx->xdp_tx_queues[xdp]->tx_packets; - data++; - } - } - - efx_ptp_update_stats(efx, data); -} - static void efx_ethtool_self_test(struct net_device *net_dev, struct ethtool_test *test, u64 *data) { @@ -787,16 +361,6 @@ out: return rc; } -static void efx_ethtool_get_pauseparam(struct net_device *net_dev, - struct ethtool_pauseparam *pause) -{ - struct efx_nic *efx = netdev_priv(net_dev); - - pause->rx_pause = !!(efx->wanted_fc & EFX_FC_RX); - pause->tx_pause = !!(efx->wanted_fc & EFX_FC_TX); - pause->autoneg = !!(efx->wanted_fc & EFX_FC_AUTO); -} - static void efx_ethtool_get_wol(struct net_device *net_dev, struct ethtool_wolinfo *wol) { @@ -1456,7 +1020,7 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev, rc = -ENOMEM; goto out_unlock; } - ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID; + ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; /* Initialise indir table and key to defaults */ efx_set_default_rx_indir_table(efx, ctx); netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key)); diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c new file mode 100644 index 000000000000..b8d281ab6c7a --- /dev/null +++ b/drivers/net/ethernet/sfc/ethtool_common.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2019 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +#include <linux/module.h> +#include <linux/netdevice.h> +#include "net_driver.h" +#include "mcdi.h" +#include "nic.h" +#include "selftest.h" +#include "ethtool_common.h" + +struct efx_sw_stat_desc { + const char *name; + enum { + EFX_ETHTOOL_STAT_SOURCE_nic, + EFX_ETHTOOL_STAT_SOURCE_channel, + EFX_ETHTOOL_STAT_SOURCE_tx_queue + } source; + unsigned int offset; + u64 (*get_stat)(void *field); /* Reader function */ +}; + +/* Initialiser for a struct efx_sw_stat_desc with type-checking */ +#define EFX_ETHTOOL_STAT(stat_name, source_name, field, field_type, \ + get_stat_function) { \ + .name = #stat_name, \ + .source = EFX_ETHTOOL_STAT_SOURCE_##source_name, \ + .offset = ((((field_type *) 0) == \ + &((struct efx_##source_name *)0)->field) ? \ + offsetof(struct efx_##source_name, field) : \ + offsetof(struct efx_##source_name, field)), \ + .get_stat = get_stat_function, \ +} + +static u64 efx_get_uint_stat(void *field) +{ + return *(unsigned int *)field; +} + +static u64 efx_get_atomic_stat(void *field) +{ + return atomic_read((atomic_t *) field); +} + +#define EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field) \ + EFX_ETHTOOL_STAT(field, nic, field, \ + atomic_t, efx_get_atomic_stat) + +#define EFX_ETHTOOL_UINT_CHANNEL_STAT(field) \ + EFX_ETHTOOL_STAT(field, channel, n_##field, \ + unsigned int, efx_get_uint_stat) +#define EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(field) \ + EFX_ETHTOOL_STAT(field, channel, field, \ + unsigned int, efx_get_uint_stat) + +#define EFX_ETHTOOL_UINT_TXQ_STAT(field) \ + EFX_ETHTOOL_STAT(tx_##field, tx_queue, field, \ + unsigned int, efx_get_uint_stat) + +static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { + EFX_ETHTOOL_UINT_TXQ_STAT(merge_events), + EFX_ETHTOOL_UINT_TXQ_STAT(tso_bursts), + EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers), + EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets), + EFX_ETHTOOL_UINT_TXQ_STAT(tso_fallbacks), + EFX_ETHTOOL_UINT_TXQ_STAT(pushes), + EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets), + EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets), + EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_ip_hdr_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_tcp_udp_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_ip_hdr_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_tcp_udp_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_eth_crc_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect), +#ifdef CONFIG_RFS_ACCEL + EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(rfs_filter_count), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_succeeded), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_failed), +#endif +}; + +#define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc) + +void efx_ethtool_get_drvinfo(struct net_device *net_dev, + struct ethtool_drvinfo *info) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); + strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version)); + efx_mcdi_print_fwver(efx, info->fw_version, + sizeof(info->fw_version)); + strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info)); +} + +u32 efx_ethtool_get_msglevel(struct net_device *net_dev) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + return efx->msg_enable; +} + +void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + efx->msg_enable = msg_enable; +} + +void efx_ethtool_get_pauseparam(struct net_device *net_dev, + struct ethtool_pauseparam *pause) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + pause->rx_pause = !!(efx->wanted_fc & EFX_FC_RX); + pause->tx_pause = !!(efx->wanted_fc & EFX_FC_TX); + pause->autoneg = !!(efx->wanted_fc & EFX_FC_AUTO); +} + +/** + * efx_fill_test - fill in an individual self-test entry + * @test_index: Index of the test + * @strings: Ethtool strings, or %NULL + * @data: Ethtool test results, or %NULL + * @test: Pointer to test result (used only if data != %NULL) + * @unit_format: Unit name format (e.g. "chan\%d") + * @unit_id: Unit id (e.g. 0 for "chan0") + * @test_format: Test name format (e.g. "loopback.\%s.tx.sent") + * @test_id: Test id (e.g. "PHYXS" for "loopback.PHYXS.tx_sent") + * + * Fill in an individual self-test entry. + */ +static void efx_fill_test(unsigned int test_index, u8 *strings, u64 *data, + int *test, const char *unit_format, int unit_id, + const char *test_format, const char *test_id) +{ + char unit_str[ETH_GSTRING_LEN], test_str[ETH_GSTRING_LEN]; + + /* Fill data value, if applicable */ + if (data) + data[test_index] = *test; + + /* Fill string, if applicable */ + if (strings) { + if (strchr(unit_format, '%')) + snprintf(unit_str, sizeof(unit_str), + unit_format, unit_id); + else + strcpy(unit_str, unit_format); + snprintf(test_str, sizeof(test_str), test_format, test_id); + snprintf(strings + test_index * ETH_GSTRING_LEN, + ETH_GSTRING_LEN, + "%-6s %-24s", unit_str, test_str); + } +} + +#define EFX_CHANNEL_NAME(_channel) "chan%d", _channel->channel +#define EFX_TX_QUEUE_NAME(_tx_queue) "txq%d", _tx_queue->queue +#define EFX_RX_QUEUE_NAME(_rx_queue) "rxq%d", _rx_queue->queue +#define EFX_LOOPBACK_NAME(_mode, _counter) \ + "loopback.%s." _counter, STRING_TABLE_LOOKUP(_mode, efx_loopback_mode) + +/** + * efx_fill_loopback_test - fill in a block of loopback self-test entries + * @efx: Efx NIC + * @lb_tests: Efx loopback self-test results structure + * @mode: Loopback test mode + * @test_index: Starting index of the test + * @strings: Ethtool strings, or %NULL + * @data: Ethtool test results, or %NULL + * + * Fill in a block of loopback self-test entries. Return new test + * index. + */ +static int efx_fill_loopback_test(struct efx_nic *efx, + struct efx_loopback_self_tests *lb_tests, + enum efx_loopback_mode mode, + unsigned int test_index, + u8 *strings, u64 *data) +{ + struct efx_channel *channel = + efx_get_channel(efx, efx->tx_channel_offset); + struct efx_tx_queue *tx_queue; + + efx_for_each_channel_tx_queue(tx_queue, channel) { + efx_fill_test(test_index++, strings, data, + &lb_tests->tx_sent[tx_queue->queue], + EFX_TX_QUEUE_NAME(tx_queue), + EFX_LOOPBACK_NAME(mode, "tx_sent")); + efx_fill_test(test_index++, strings, data, + &lb_tests->tx_done[tx_queue->queue], + EFX_TX_QUEUE_NAME(tx_queue), + EFX_LOOPBACK_NAME(mode, "tx_done")); + } + efx_fill_test(test_index++, strings, data, + &lb_tests->rx_good, + "rx", 0, + EFX_LOOPBACK_NAME(mode, "rx_good")); + efx_fill_test(test_index++, strings, data, + &lb_tests->rx_bad, + "rx", 0, + EFX_LOOPBACK_NAME(mode, "rx_bad")); + + return test_index; +} + +/** + * efx_ethtool_fill_self_tests - get self-test details + * @efx: Efx NIC + * @tests: Efx self-test results structure, or %NULL + * @strings: Ethtool strings, or %NULL + * @data: Ethtool test results, or %NULL + * + * Get self-test number of strings, strings, and/or test results. + * Return number of strings (== number of test results). + * + * The reason for merging these three functions is to make sure that + * they can never be inconsistent. + */ +int efx_ethtool_fill_self_tests(struct efx_nic *efx, + struct efx_self_tests *tests, + u8 *strings, u64 *data) +{ + struct efx_channel *channel; + unsigned int n = 0, i; + enum efx_loopback_mode mode; + + efx_fill_test(n++, strings, data, &tests->phy_alive, + "phy", 0, "alive", NULL); + efx_fill_test(n++, strings, data, &tests->nvram, + "core", 0, "nvram", NULL); + efx_fill_test(n++, strings, data, &tests->interrupt, + "core", 0, "interrupt", NULL); + + /* Event queues */ + efx_for_each_channel(channel, efx) { + efx_fill_test(n++, strings, data, + &tests->eventq_dma[channel->channel], + EFX_CHANNEL_NAME(channel), + "eventq.dma", NULL); + efx_fill_test(n++, strings, data, + &tests->eventq_int[channel->channel], + EFX_CHANNEL_NAME(channel), + "eventq.int", NULL); + } + + efx_fill_test(n++, strings, data, &tests->memory, + "core", 0, "memory", NULL); + efx_fill_test(n++, strings, data, &tests->registers, + "core", 0, "registers", NULL); + + if (efx->phy_op->run_tests != NULL) { + EFX_WARN_ON_PARANOID(efx->phy_op->test_name == NULL); + + for (i = 0; true; ++i) { + const char *name; + + EFX_WARN_ON_PARANOID(i >= EFX_MAX_PHY_TESTS); + name = efx->phy_op->test_name(efx, i); + if (name == NULL) + break; + + efx_fill_test(n++, strings, data, &tests->phy_ext[i], + "phy", 0, name, NULL); + } + } + + /* Loopback tests */ + for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) { + if (!(efx->loopback_modes & (1 << mode))) + continue; + n = efx_fill_loopback_test(efx, + &tests->loopback[mode], mode, n, + strings, data); + } + + return n; +} + +static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings) +{ + size_t n_stats = 0; + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) { + if (efx_channel_has_tx_queues(channel)) { + n_stats++; + if (strings != NULL) { + snprintf(strings, ETH_GSTRING_LEN, + "tx-%u.tx_packets", + channel->tx_queue[0].queue / + EFX_TXQ_TYPES); + + strings += ETH_GSTRING_LEN; + } + } + } + efx_for_each_channel(channel, efx) { + if (efx_channel_has_rx_queue(channel)) { + n_stats++; + if (strings != NULL) { + snprintf(strings, ETH_GSTRING_LEN, + "rx-%d.rx_packets", channel->channel); + strings += ETH_GSTRING_LEN; + } + } + } + if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) { + unsigned short xdp; + + for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) { + n_stats++; + if (strings) { + snprintf(strings, ETH_GSTRING_LEN, + "tx-xdp-cpu-%hu.tx_packets", xdp); + strings += ETH_GSTRING_LEN; + } + } + } + + return n_stats; +} + +int efx_ethtool_get_sset_count(struct net_device *net_dev, int string_set) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + switch (string_set) { + case ETH_SS_STATS: + return efx->type->describe_stats(efx, NULL) + + EFX_ETHTOOL_SW_STAT_COUNT + + efx_describe_per_queue_stats(efx, NULL) + + efx_ptp_describe_stats(efx, NULL); + case ETH_SS_TEST: + return efx_ethtool_fill_self_tests(efx, NULL, NULL, NULL); + default: + return -EINVAL; + } +} + +void efx_ethtool_get_strings(struct net_device *net_dev, + u32 string_set, u8 *strings) +{ + struct efx_nic *efx = netdev_priv(net_dev); + int i; + + switch (string_set) { + case ETH_SS_STATS: + strings += (efx->type->describe_stats(efx, strings) * + ETH_GSTRING_LEN); + for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) + strlcpy(strings + i * ETH_GSTRING_LEN, + efx_sw_stat_desc[i].name, ETH_GSTRING_LEN); + strings += EFX_ETHTOOL_SW_STAT_COUNT * ETH_GSTRING_LEN; + strings += (efx_describe_per_queue_stats(efx, strings) * + ETH_GSTRING_LEN); + efx_ptp_describe_stats(efx, strings); + break; + case ETH_SS_TEST: + efx_ethtool_fill_self_tests(efx, NULL, strings, NULL); + break; + default: + /* No other string sets */ + break; + } +} + +void efx_ethtool_get_stats(struct net_device *net_dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct efx_nic *efx = netdev_priv(net_dev); + const struct efx_sw_stat_desc *stat; + struct efx_channel *channel; + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + int i; + + spin_lock_bh(&efx->stats_lock); + + /* Get NIC statistics */ + data += efx->type->update_stats(efx, data, NULL); + + /* Get software statistics */ + for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) { + stat = &efx_sw_stat_desc[i]; + switch (stat->source) { + case EFX_ETHTOOL_STAT_SOURCE_nic: + data[i] = stat->get_stat((void *)efx + stat->offset); + break; + case EFX_ETHTOOL_STAT_SOURCE_channel: + data[i] = 0; + efx_for_each_channel(channel, efx) + data[i] += stat->get_stat((void *)channel + + stat->offset); + break; + case EFX_ETHTOOL_STAT_SOURCE_tx_queue: + data[i] = 0; + efx_for_each_channel(channel, efx) { + efx_for_each_channel_tx_queue(tx_queue, channel) + data[i] += + stat->get_stat((void *)tx_queue + + stat->offset); + } + break; + } + } + data += EFX_ETHTOOL_SW_STAT_COUNT; + + spin_unlock_bh(&efx->stats_lock); + + efx_for_each_channel(channel, efx) { + if (efx_channel_has_tx_queues(channel)) { + *data = 0; + efx_for_each_channel_tx_queue(tx_queue, channel) { + *data += tx_queue->tx_packets; + } + data++; + } + } + efx_for_each_channel(channel, efx) { + if (efx_channel_has_rx_queue(channel)) { + *data = 0; + efx_for_each_channel_rx_queue(rx_queue, channel) { + *data += rx_queue->rx_packets; + } + data++; + } + } + if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) { + int xdp; + + for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) { + data[0] = efx->xdp_tx_queues[xdp]->tx_packets; + data++; + } + } + + efx_ptp_update_stats(efx, data); +} diff --git a/drivers/net/ethernet/sfc/ethtool_common.h b/drivers/net/ethernet/sfc/ethtool_common.h new file mode 100644 index 000000000000..fa624313f330 --- /dev/null +++ b/drivers/net/ethernet/sfc/ethtool_common.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2019 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_ETHTOOL_COMMON_H +#define EFX_ETHTOOL_COMMON_H + +void efx_ethtool_get_drvinfo(struct net_device *net_dev, + struct ethtool_drvinfo *info); +u32 efx_ethtool_get_msglevel(struct net_device *net_dev); +void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable); +void efx_ethtool_get_pauseparam(struct net_device *net_dev, + struct ethtool_pauseparam *pause); +int efx_ethtool_fill_self_tests(struct efx_nic *efx, + struct efx_self_tests *tests, + u8 *strings, u64 *data); +int efx_ethtool_get_sset_count(struct net_device *net_dev, int string_set); +void efx_ethtool_get_strings(struct net_device *net_dev, u32 string_set, + u8 *strings); +void efx_ethtool_get_stats(struct net_device *net_dev, + struct ethtool_stats *stats __attribute__ ((unused)), + u64 *data); + +#endif diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index eecc348b1c32..bee4cd9d7135 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -2108,7 +2108,7 @@ static void ef4_net_stats(struct net_device *net_dev, } /* Context: netif_tx_lock held, BHs disabled. */ -static void ef4_watchdog(struct net_device *net_dev) +static void ef4_watchdog(struct net_device *net_dev, unsigned int txqueue) { struct ef4_nic *efx = netdev_priv(net_dev); diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index eedd32e2bfcb..dbbb898adddb 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -15,6 +15,7 @@ #include "net_driver.h" #include "bitfield.h" #include "efx.h" +#include "rx_common.h" #include "nic.h" #include "farch_regs.h" #include "sriov.h" diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index 9081f84a2604..54a45010b576 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -346,11 +346,8 @@ int efx_mcdi_flush_rxqs(struct efx_nic *efx); int efx_mcdi_port_probe(struct efx_nic *efx); void efx_mcdi_port_remove(struct efx_nic *efx); int efx_mcdi_port_reconfigure(struct efx_nic *efx); -int efx_mcdi_port_get_number(struct efx_nic *efx); u32 efx_mcdi_phy_get_caps(struct efx_nic *efx); void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev); -int efx_mcdi_set_mac(struct efx_nic *efx); -#define EFX_MC_STATS_GENERATION_INVALID ((__force __le64)(-1)) void efx_mcdi_mac_start_stats(struct efx_nic *efx); void efx_mcdi_mac_stop_stats(struct efx_nic *efx); void efx_mcdi_mac_pull_stats(struct efx_nic *efx); diff --git a/drivers/net/ethernet/sfc/mcdi_functions.c b/drivers/net/ethernet/sfc/mcdi_functions.c new file mode 100644 index 000000000000..dcfe78b0fa5a --- /dev/null +++ b/drivers/net/ethernet/sfc/mcdi_functions.c @@ -0,0 +1,386 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2019 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include "efx.h" +#include "nic.h" +#include "mcdi_functions.h" +#include "mcdi.h" +#include "mcdi_pcol.h" + +int efx_mcdi_free_vis(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF_ERR(outbuf); + size_t outlen; + int rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FREE_VIS, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + + /* -EALREADY means nothing to free, so ignore */ + if (rc == -EALREADY) + rc = 0; + if (rc) + efx_mcdi_display_error(efx, MC_CMD_FREE_VIS, 0, outbuf, outlen, + rc); + return rc; +} + +int efx_mcdi_alloc_vis(struct efx_nic *efx, unsigned int min_vis, + unsigned int max_vis, unsigned int *vi_base, + unsigned int *allocated_vis) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN); + MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis); + MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis); + rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc != 0) + return rc; + + if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN) + return -EIO; + + netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n", + MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE)); + + if (vi_base) + *vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE); + if (allocated_vis) + *allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT); + return 0; +} + +int efx_mcdi_ev_probe(struct efx_channel *channel) +{ + return efx_nic_alloc_buffer(channel->efx, &channel->eventq.buf, + (channel->eventq_mask + 1) * + sizeof(efx_qword_t), + GFP_KERNEL); +} + +int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2) +{ + MCDI_DECLARE_BUF(inbuf, + MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 / + EFX_BUF_SIZE)); + MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN); + size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE; + struct efx_nic *efx = channel->efx; + size_t inlen, outlen; + dma_addr_t dma_addr; + int rc, i; + + /* Fill event queue with all ones (i.e. empty events) */ + memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len); + + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_SIZE, channel->eventq_mask + 1); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel); + /* INIT_EVQ expects index in vector table, not absolute */ + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE, + MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_RELOAD, 0); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_MODE, + MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0); + + if (v2) { + /* Use the new generic approach to specifying event queue + * configuration, requesting lower latency or higher throughput. + * The options that actually get used appear in the output. + */ + MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS, + INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1, + INIT_EVQ_V2_IN_FLAG_TYPE, + MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO); + } else { + MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS, + INIT_EVQ_IN_FLAG_INTERRUPTING, 1, + INIT_EVQ_IN_FLAG_RX_MERGE, 1, + INIT_EVQ_IN_FLAG_TX_MERGE, 1, + INIT_EVQ_IN_FLAG_CUT_THRU, v1_cut_thru); + } + + dma_addr = channel->eventq.buf.dma_addr; + for (i = 0; i < entries; ++i) { + MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr); + dma_addr += EFX_BUF_SIZE; + } + + inlen = MC_CMD_INIT_EVQ_IN_LEN(entries); + + rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen, + outbuf, sizeof(outbuf), &outlen); + + if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN) + netif_dbg(efx, drv, efx->net_dev, + "Channel %d using event queue flags %08x\n", + channel->channel, + MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS)); + + return rc; +} + +void efx_mcdi_ev_remove(struct efx_channel *channel) +{ + efx_nic_free_buffer(channel->efx, &channel->eventq.buf); +} + +void efx_mcdi_ev_fini(struct efx_channel *channel) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_EVQ_IN_LEN); + MCDI_DECLARE_BUF_ERR(outbuf); + struct efx_nic *efx = channel->efx; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, FINI_EVQ_IN_INSTANCE, channel->channel); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_EVQ, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + + if (rc && rc != -EALREADY) + goto fail; + + return; + +fail: + efx_mcdi_display_error(efx, MC_CMD_FINI_EVQ, MC_CMD_FINI_EVQ_IN_LEN, + outbuf, outlen, rc); +} + +int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / + EFX_BUF_SIZE)); + bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD; + size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE; + struct efx_channel *channel = tx_queue->channel; + struct efx_nic *efx = tx_queue->efx; + struct efx_ef10_nic_data *nic_data; + dma_addr_t dma_addr; + size_t inlen; + int rc, i; + + BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0); + + nic_data = efx->nic_data; + + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id); + + dma_addr = tx_queue->txd.buf.dma_addr; + + netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n", + tx_queue->queue, entries, (u64)dma_addr); + + for (i = 0; i < entries; ++i) { + MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr); + dma_addr += EFX_BUF_SIZE; + } + + inlen = MC_CMD_INIT_TXQ_IN_LEN(entries); + + do { + MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS, + /* This flag was removed from mcdi_pcol.h for + * the non-_EXT version of INIT_TXQ. However, + * firmware still honours it. + */ + INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2, + INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, + INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload, + INIT_TXQ_EXT_IN_FLAG_TIMESTAMP, + tx_queue->timestamping); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen, + NULL, 0, NULL); + if (rc == -ENOSPC && tso_v2) { + /* Retry without TSOv2 if we're short on contexts. */ + tso_v2 = false; + netif_warn(efx, probe, efx->net_dev, + "TSOv2 context not available to segment in " + "hardware. TCP performance may be reduced.\n" + ); + } else if (rc) { + efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ, + MC_CMD_INIT_TXQ_EXT_IN_LEN, + NULL, 0, rc); + goto fail; + } + } while (rc); + + return 0; + +fail: + return rc; +} + +void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue) +{ + efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf); +} + +void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN); + MCDI_DECLARE_BUF_ERR(outbuf); + struct efx_nic *efx = tx_queue->efx; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE, + tx_queue->queue); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + + if (rc && rc != -EALREADY) + goto fail; + + return; + +fail: + efx_mcdi_display_error(efx, MC_CMD_FINI_TXQ, MC_CMD_FINI_TXQ_IN_LEN, + outbuf, outlen, rc); +} + +int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue) +{ + return efx_nic_alloc_buffer(rx_queue->efx, &rx_queue->rxd.buf, + (rx_queue->ptr_mask + 1) * + sizeof(efx_qword_t), + GFP_KERNEL); +} + +void efx_mcdi_rx_init(struct efx_rx_queue *rx_queue) +{ + MCDI_DECLARE_BUF(inbuf, + MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / + EFX_BUF_SIZE)); + struct efx_channel *channel = efx_rx_queue_channel(rx_queue); + size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE; + struct efx_nic *efx = rx_queue->efx; + struct efx_ef10_nic_data *nic_data = efx->nic_data; + dma_addr_t dma_addr; + size_t inlen; + int rc; + int i; + BUILD_BUG_ON(MC_CMD_INIT_RXQ_OUT_LEN != 0); + + rx_queue->scatter_n = 0; + rx_queue->scatter_len = 0; + + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_SIZE, rx_queue->ptr_mask + 1); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_TARGET_EVQ, channel->channel); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_LABEL, efx_rx_queue_index(rx_queue)); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_INSTANCE, + efx_rx_queue_index(rx_queue)); + MCDI_POPULATE_DWORD_2(inbuf, INIT_RXQ_IN_FLAGS, + INIT_RXQ_IN_FLAG_PREFIX, 1, + INIT_RXQ_IN_FLAG_TIMESTAMP, 1); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, nic_data->vport_id); + + dma_addr = rx_queue->rxd.buf.dma_addr; + + netif_dbg(efx, hw, efx->net_dev, "pushing RXQ %d. %zu entries (%llx)\n", + efx_rx_queue_index(rx_queue), entries, (u64)dma_addr); + + for (i = 0; i < entries; ++i) { + MCDI_SET_ARRAY_QWORD(inbuf, INIT_RXQ_IN_DMA_ADDR, i, dma_addr); + dma_addr += EFX_BUF_SIZE; + } + + inlen = MC_CMD_INIT_RXQ_IN_LEN(entries); + + rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen, + NULL, 0, NULL); + if (rc) + netdev_WARN(efx->net_dev, "failed to initialise RXQ %d\n", + efx_rx_queue_index(rx_queue)); +} + +void efx_mcdi_rx_remove(struct efx_rx_queue *rx_queue) +{ + efx_nic_free_buffer(rx_queue->efx, &rx_queue->rxd.buf); +} + +void efx_mcdi_rx_fini(struct efx_rx_queue *rx_queue) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN); + MCDI_DECLARE_BUF_ERR(outbuf); + struct efx_nic *efx = rx_queue->efx; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, FINI_RXQ_IN_INSTANCE, + efx_rx_queue_index(rx_queue)); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_RXQ, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + + if (rc && rc != -EALREADY) + goto fail; + + return; + +fail: + efx_mcdi_display_error(efx, MC_CMD_FINI_RXQ, MC_CMD_FINI_RXQ_IN_LEN, + outbuf, outlen, rc); +} + +int efx_mcdi_window_mode_to_stride(struct efx_nic *efx, u8 vi_window_mode) +{ + switch (vi_window_mode) { + case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K: + efx->vi_stride = 8192; + break; + case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K: + efx->vi_stride = 16384; + break; + case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K: + efx->vi_stride = 65536; + break; + default: + netif_err(efx, probe, efx->net_dev, + "Unrecognised VI window mode %d\n", + vi_window_mode); + return -EIO; + } + netif_dbg(efx, probe, efx->net_dev, "vi_stride = %u\n", + efx->vi_stride); + return 0; +} + +int efx_get_pf_index(struct efx_nic *efx, unsigned int *pf_index) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN); + size_t outlen; + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, outbuf, + sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < sizeof(outbuf)) + return -EIO; + + *pf_index = MCDI_DWORD(outbuf, GET_FUNCTION_INFO_OUT_PF); + return 0; +} diff --git a/drivers/net/ethernet/sfc/mcdi_functions.h b/drivers/net/ethernet/sfc/mcdi_functions.h new file mode 100644 index 000000000000..ca4a5ac1a66b --- /dev/null +++ b/drivers/net/ethernet/sfc/mcdi_functions.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +#ifndef EFX_MCDI_FUNCTIONS_H +#define EFX_MCDI_FUNCTIONS_H + +int efx_mcdi_alloc_vis(struct efx_nic *efx, unsigned int min_vis, + unsigned int max_vis, unsigned int *vi_base, + unsigned int *allocated_vis); +int efx_mcdi_free_vis(struct efx_nic *efx); + +int efx_mcdi_ev_probe(struct efx_channel *channel); +int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2); +void efx_mcdi_ev_remove(struct efx_channel *channel); +void efx_mcdi_ev_fini(struct efx_channel *channel); +int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2); +void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue); +void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue); +int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue); +void efx_mcdi_rx_init(struct efx_rx_queue *rx_queue); +void efx_mcdi_rx_remove(struct efx_rx_queue *rx_queue); +void efx_mcdi_rx_fini(struct efx_rx_queue *rx_queue); +int efx_mcdi_window_mode_to_stride(struct efx_nic *efx, u8 vi_window_mode); +int efx_get_pf_index(struct efx_nic *efx, unsigned int *pf_index); + +#endif diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c index fb7cde4980ed..ab5227b13ae6 100644 --- a/drivers/net/ethernet/sfc/mcdi_port.c +++ b/drivers/net/ethernet/sfc/mcdi_port.c @@ -14,106 +14,7 @@ #include "mcdi_pcol.h" #include "nic.h" #include "selftest.h" - -struct efx_mcdi_phy_data { - u32 flags; - u32 type; - u32 supported_cap; - u32 channel; - u32 port; - u32 stats_mask; - u8 name[20]; - u32 media; - u32 mmd_mask; - u8 revision[20]; - u32 forced_cap; -}; - -static int -efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg) -{ - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_CFG_OUT_LEN); - size_t outlen; - int rc; - - BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_IN_LEN != 0); - BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_OUT_NAME_LEN != sizeof(cfg->name)); - - rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_CFG, NULL, 0, - outbuf, sizeof(outbuf), &outlen); - if (rc) - goto fail; - - if (outlen < MC_CMD_GET_PHY_CFG_OUT_LEN) { - rc = -EIO; - goto fail; - } - - cfg->flags = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_FLAGS); - cfg->type = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_TYPE); - cfg->supported_cap = - MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_SUPPORTED_CAP); - cfg->channel = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_CHANNEL); - cfg->port = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_PRT); - cfg->stats_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_STATS_MASK); - memcpy(cfg->name, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_NAME), - sizeof(cfg->name)); - cfg->media = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MEDIA_TYPE); - cfg->mmd_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MMD_MASK); - memcpy(cfg->revision, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_REVISION), - sizeof(cfg->revision)); - - return 0; - -fail: - netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); - return rc; -} - -static int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities, - u32 flags, u32 loopback_mode, - u32 loopback_speed) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_LINK_IN_LEN); - int rc; - - BUILD_BUG_ON(MC_CMD_SET_LINK_OUT_LEN != 0); - - MCDI_SET_DWORD(inbuf, SET_LINK_IN_CAP, capabilities); - MCDI_SET_DWORD(inbuf, SET_LINK_IN_FLAGS, flags); - MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_MODE, loopback_mode); - MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_SPEED, loopback_speed); - - rc = efx_mcdi_rpc(efx, MC_CMD_SET_LINK, inbuf, sizeof(inbuf), - NULL, 0, NULL); - return rc; -} - -static int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes) -{ - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LOOPBACK_MODES_OUT_LEN); - size_t outlen; - int rc; - - rc = efx_mcdi_rpc(efx, MC_CMD_GET_LOOPBACK_MODES, NULL, 0, - outbuf, sizeof(outbuf), &outlen); - if (rc) - goto fail; - - if (outlen < (MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_OFST + - MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_LEN)) { - rc = -EIO; - goto fail; - } - - *loopback_modes = MCDI_QWORD(outbuf, GET_LOOPBACK_MODES_OUT_SUGGESTED); - - return 0; - -fail: - netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); - return rc; -} +#include "mcdi_port_common.h" static int efx_mcdi_mdio_read(struct net_device *net_dev, int prtad, int devad, u16 addr) @@ -168,246 +69,6 @@ static int efx_mcdi_mdio_write(struct net_device *net_dev, return 0; } -static void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset) -{ - #define SET_BIT(name) __set_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \ - linkset) - - bitmap_zero(linkset, __ETHTOOL_LINK_MODE_MASK_NBITS); - switch (media) { - case MC_CMD_MEDIA_KX4: - SET_BIT(Backplane); - if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) - SET_BIT(1000baseKX_Full); - if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) - SET_BIT(10000baseKX4_Full); - if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) - SET_BIT(40000baseKR4_Full); - break; - - case MC_CMD_MEDIA_XFP: - case MC_CMD_MEDIA_SFP_PLUS: - case MC_CMD_MEDIA_QSFP_PLUS: - SET_BIT(FIBRE); - if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) - SET_BIT(1000baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) - SET_BIT(10000baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) - SET_BIT(40000baseCR4_Full); - if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN)) - SET_BIT(100000baseCR4_Full); - if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN)) - SET_BIT(25000baseCR_Full); - if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN)) - SET_BIT(50000baseCR2_Full); - break; - - case MC_CMD_MEDIA_BASE_T: - SET_BIT(TP); - if (cap & (1 << MC_CMD_PHY_CAP_10HDX_LBN)) - SET_BIT(10baseT_Half); - if (cap & (1 << MC_CMD_PHY_CAP_10FDX_LBN)) - SET_BIT(10baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_100HDX_LBN)) - SET_BIT(100baseT_Half); - if (cap & (1 << MC_CMD_PHY_CAP_100FDX_LBN)) - SET_BIT(100baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_1000HDX_LBN)) - SET_BIT(1000baseT_Half); - if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) - SET_BIT(1000baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) - SET_BIT(10000baseT_Full); - break; - } - - if (cap & (1 << MC_CMD_PHY_CAP_PAUSE_LBN)) - SET_BIT(Pause); - if (cap & (1 << MC_CMD_PHY_CAP_ASYM_LBN)) - SET_BIT(Asym_Pause); - if (cap & (1 << MC_CMD_PHY_CAP_AN_LBN)) - SET_BIT(Autoneg); - - #undef SET_BIT -} - -static u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset) -{ - u32 result = 0; - - #define TEST_BIT(name) test_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \ - linkset) - - if (TEST_BIT(10baseT_Half)) - result |= (1 << MC_CMD_PHY_CAP_10HDX_LBN); - if (TEST_BIT(10baseT_Full)) - result |= (1 << MC_CMD_PHY_CAP_10FDX_LBN); - if (TEST_BIT(100baseT_Half)) - result |= (1 << MC_CMD_PHY_CAP_100HDX_LBN); - if (TEST_BIT(100baseT_Full)) - result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN); - if (TEST_BIT(1000baseT_Half)) - result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN); - if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full)) - result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN); - if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full)) - result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN); - if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full)) - result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN); - if (TEST_BIT(100000baseCR4_Full)) - result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN); - if (TEST_BIT(25000baseCR_Full)) - result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN); - if (TEST_BIT(50000baseCR2_Full)) - result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN); - if (TEST_BIT(Pause)) - result |= (1 << MC_CMD_PHY_CAP_PAUSE_LBN); - if (TEST_BIT(Asym_Pause)) - result |= (1 << MC_CMD_PHY_CAP_ASYM_LBN); - if (TEST_BIT(Autoneg)) - result |= (1 << MC_CMD_PHY_CAP_AN_LBN); - - #undef TEST_BIT - - return result; -} - -static u32 efx_get_mcdi_phy_flags(struct efx_nic *efx) -{ - struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; - enum efx_phy_mode mode, supported; - u32 flags; - - /* TODO: Advertise the capabilities supported by this PHY */ - supported = 0; - if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_TXDIS_LBN)) - supported |= PHY_MODE_TX_DISABLED; - if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_LOWPOWER_LBN)) - supported |= PHY_MODE_LOW_POWER; - if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_POWEROFF_LBN)) - supported |= PHY_MODE_OFF; - - mode = efx->phy_mode & supported; - - flags = 0; - if (mode & PHY_MODE_TX_DISABLED) - flags |= (1 << MC_CMD_SET_LINK_IN_TXDIS_LBN); - if (mode & PHY_MODE_LOW_POWER) - flags |= (1 << MC_CMD_SET_LINK_IN_LOWPOWER_LBN); - if (mode & PHY_MODE_OFF) - flags |= (1 << MC_CMD_SET_LINK_IN_POWEROFF_LBN); - - return flags; -} - -static u8 mcdi_to_ethtool_media(u32 media) -{ - switch (media) { - case MC_CMD_MEDIA_XAUI: - case MC_CMD_MEDIA_CX4: - case MC_CMD_MEDIA_KX4: - return PORT_OTHER; - - case MC_CMD_MEDIA_XFP: - case MC_CMD_MEDIA_SFP_PLUS: - case MC_CMD_MEDIA_QSFP_PLUS: - return PORT_FIBRE; - - case MC_CMD_MEDIA_BASE_T: - return PORT_TP; - - default: - return PORT_OTHER; - } -} - -static void efx_mcdi_phy_decode_link(struct efx_nic *efx, - struct efx_link_state *link_state, - u32 speed, u32 flags, u32 fcntl) -{ - switch (fcntl) { - case MC_CMD_FCNTL_AUTO: - WARN_ON(1); /* This is not a link mode */ - link_state->fc = EFX_FC_AUTO | EFX_FC_TX | EFX_FC_RX; - break; - case MC_CMD_FCNTL_BIDIR: - link_state->fc = EFX_FC_TX | EFX_FC_RX; - break; - case MC_CMD_FCNTL_RESPOND: - link_state->fc = EFX_FC_RX; - break; - default: - WARN_ON(1); - /* Fall through */ - case MC_CMD_FCNTL_OFF: - link_state->fc = 0; - break; - } - - link_state->up = !!(flags & (1 << MC_CMD_GET_LINK_OUT_LINK_UP_LBN)); - link_state->fd = !!(flags & (1 << MC_CMD_GET_LINK_OUT_FULL_DUPLEX_LBN)); - link_state->speed = speed; -} - -/* The semantics of the ethtool FEC mode bitmask are not well defined, - * particularly the meaning of combinations of bits. Which means we get to - * define our own semantics, as follows: - * OFF overrides any other bits, and means "disable all FEC" (with the - * exception of 25G KR4/CR4, where it is not possible to reject it if AN - * partner requests it). - * AUTO on its own means use cable requirements and link partner autoneg with - * fw-default preferences for the cable type. - * AUTO and either RS or BASER means use the specified FEC type if cable and - * link partner support it, otherwise autoneg/fw-default. - * RS or BASER alone means use the specified FEC type if cable and link partner - * support it and either requests it, otherwise no FEC. - * Both RS and BASER (whether AUTO or not) means use FEC if cable and link - * partner support it, preferring RS to BASER. - */ -static u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap) -{ - u32 ret = 0; - - if (ethtool_cap & ETHTOOL_FEC_OFF) - return 0; - - if (ethtool_cap & ETHTOOL_FEC_AUTO) - ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) | - (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) | - (1 << MC_CMD_PHY_CAP_RS_FEC_LBN); - if (ethtool_cap & ETHTOOL_FEC_RS) - ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) | - (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN); - if (ethtool_cap & ETHTOOL_FEC_BASER) - ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) | - (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) | - (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) | - (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN); - return ret; -} - -/* Invert ethtool_fec_caps_to_mcdi. There are two combinations that function - * can never produce, (baser xor rs) and neither req; the implementation below - * maps both of those to AUTO. This should never matter, and it's not clear - * what a better mapping would be anyway. - */ -static u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g) -{ - bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN), - rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN), - baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) - : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN), - baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN) - : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN); - - if (!baser && !rs) - return ETHTOOL_FEC_OFF; - return (rs_req ? ETHTOOL_FEC_RS : 0) | - (baser_req ? ETHTOOL_FEC_BASER : 0) | - (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO); -} - static int efx_mcdi_phy_probe(struct efx_nic *efx) { struct efx_mcdi_phy_data *phy_data; @@ -527,58 +188,6 @@ int efx_mcdi_port_reconfigure(struct efx_nic *efx) efx->loopback_mode, 0); } -/* Verify that the forced flow control settings (!EFX_FC_AUTO) are - * supported by the link partner. Warn the user if this isn't the case - */ -static void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa) -{ - struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; - u32 rmtadv; - - /* The link partner capabilities are only relevant if the - * link supports flow control autonegotiation */ - if (~phy_cfg->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN)) - return; - - /* If flow control autoneg is supported and enabled, then fine */ - if (efx->wanted_fc & EFX_FC_AUTO) - return; - - rmtadv = 0; - if (lpa & (1 << MC_CMD_PHY_CAP_PAUSE_LBN)) - rmtadv |= ADVERTISED_Pause; - if (lpa & (1 << MC_CMD_PHY_CAP_ASYM_LBN)) - rmtadv |= ADVERTISED_Asym_Pause; - - if ((efx->wanted_fc & EFX_FC_TX) && rmtadv == ADVERTISED_Asym_Pause) - netif_err(efx, link, efx->net_dev, - "warning: link partner doesn't support pause frames"); -} - -static bool efx_mcdi_phy_poll(struct efx_nic *efx) -{ - struct efx_link_state old_state = efx->link_state; - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN); - int rc; - - WARN_ON(!mutex_is_locked(&efx->mac_lock)); - - BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); - - rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, - outbuf, sizeof(outbuf), NULL); - if (rc) - efx->link_state.up = false; - else - efx_mcdi_phy_decode_link( - efx, &efx->link_state, - MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED), - MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS), - MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL)); - - return !efx_link_state_equal(&efx->link_state, &old_state); -} - static void efx_mcdi_phy_remove(struct efx_nic *efx) { struct efx_mcdi_phy_data *phy_data = efx->phy_data; @@ -666,58 +275,6 @@ efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx, return 0; } -static int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, - struct ethtool_fecparam *fec) -{ - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN); - u32 caps, active, speed; /* MCDI format */ - bool is_25g = false; - size_t outlen; - int rc; - - BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); - rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, - outbuf, sizeof(outbuf), &outlen); - if (rc) - return rc; - if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN) - return -EOPNOTSUPP; - - /* behaviour for 25G/50G links depends on 25G BASER bit */ - speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED); - is_25g = speed == 25000 || speed == 50000; - - caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP); - fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g); - /* BASER is never supported on 100G */ - if (speed == 100000) - fec->fec &= ~ETHTOOL_FEC_BASER; - - active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE); - switch (active) { - case MC_CMD_FEC_NONE: - fec->active_fec = ETHTOOL_FEC_OFF; - break; - case MC_CMD_FEC_BASER: - fec->active_fec = ETHTOOL_FEC_BASER; - break; - case MC_CMD_FEC_RS: - fec->active_fec = ETHTOOL_FEC_RS; - break; - default: - netif_warn(efx, hw, efx->net_dev, - "Firmware reports unrecognised FEC_TYPE %u\n", - active); - /* We don't know what firmware has picked. AUTO is as good a - * "can't happen" value as any other. - */ - fec->active_fec = ETHTOOL_FEC_AUTO; - break; - } - - return 0; -} - static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx, const struct ethtool_fecparam *fec) { @@ -745,27 +302,6 @@ static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx, return 0; } -static int efx_mcdi_phy_test_alive(struct efx_nic *efx) -{ - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN); - size_t outlen; - int rc; - - BUILD_BUG_ON(MC_CMD_GET_PHY_STATE_IN_LEN != 0); - - rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_STATE, NULL, 0, - outbuf, sizeof(outbuf), &outlen); - if (rc) - return rc; - - if (outlen < MC_CMD_GET_PHY_STATE_OUT_LEN) - return -EIO; - if (MCDI_DWORD(outbuf, GET_PHY_STATE_OUT_STATE) != MC_CMD_PHY_STATE_OK) - return -EINVAL; - - return 0; -} - static const char *const mcdi_sft9001_cable_diag_names[] = { "cable.pairA.length", "cable.pairB.length", @@ -1139,84 +675,6 @@ u32 efx_mcdi_phy_get_caps(struct efx_nic *efx) return phy_data->supported_cap; } -static unsigned int efx_mcdi_event_link_speed[] = { - [MCDI_EVENT_LINKCHANGE_SPEED_100M] = 100, - [MCDI_EVENT_LINKCHANGE_SPEED_1G] = 1000, - [MCDI_EVENT_LINKCHANGE_SPEED_10G] = 10000, - [MCDI_EVENT_LINKCHANGE_SPEED_40G] = 40000, - [MCDI_EVENT_LINKCHANGE_SPEED_25G] = 25000, - [MCDI_EVENT_LINKCHANGE_SPEED_50G] = 50000, - [MCDI_EVENT_LINKCHANGE_SPEED_100G] = 100000, -}; - -void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev) -{ - u32 flags, fcntl, speed, lpa; - - speed = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_SPEED); - EFX_WARN_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed)); - speed = efx_mcdi_event_link_speed[speed]; - - flags = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LINK_FLAGS); - fcntl = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_FCNTL); - lpa = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LP_CAP); - - /* efx->link_state is only modified by efx_mcdi_phy_get_link(), - * which is only run after flushing the event queues. Therefore, it - * is safe to modify the link state outside of the mac_lock here. - */ - efx_mcdi_phy_decode_link(efx, &efx->link_state, speed, flags, fcntl); - - efx_mcdi_phy_check_fcntl(efx, lpa); - - efx_link_status_changed(efx); -} - -int efx_mcdi_set_mac(struct efx_nic *efx) -{ - u32 fcntl; - MCDI_DECLARE_BUF(cmdbytes, MC_CMD_SET_MAC_IN_LEN); - - BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0); - - /* This has no effect on EF10 */ - ether_addr_copy(MCDI_PTR(cmdbytes, SET_MAC_IN_ADDR), - efx->net_dev->dev_addr); - - MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_MTU, - EFX_MAX_FRAME_LEN(efx->net_dev->mtu)); - MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_DRAIN, 0); - - /* Set simple MAC filter for Siena */ - MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT, - SET_MAC_IN_REJECT_UNCST, efx->unicast_filter); - - MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS, - SET_MAC_IN_FLAG_INCLUDE_FCS, - !!(efx->net_dev->features & NETIF_F_RXFCS)); - - switch (efx->wanted_fc) { - case EFX_FC_RX | EFX_FC_TX: - fcntl = MC_CMD_FCNTL_BIDIR; - break; - case EFX_FC_RX: - fcntl = MC_CMD_FCNTL_RESPOND; - break; - default: - fcntl = MC_CMD_FCNTL_OFF; - break; - } - if (efx->wanted_fc & EFX_FC_AUTO) - fcntl = MC_CMD_FCNTL_AUTO; - if (efx->fc_disable) - fcntl = MC_CMD_FCNTL_OFF; - - MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_FCNTL, fcntl); - - return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, cmdbytes, sizeof(cmdbytes), - NULL, 0, NULL); -} - bool efx_mcdi_mac_check_fault(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN); @@ -1348,17 +806,3 @@ void efx_mcdi_port_remove(struct efx_nic *efx) efx->phy_op->remove(efx); efx_nic_free_buffer(efx, &efx->stats_buffer); } - -/* Get physical port number (EF10 only; on Siena it is same as PF number) */ -int efx_mcdi_port_get_number(struct efx_nic *efx) -{ - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN); - int rc; - - rc = efx_mcdi_rpc(efx, MC_CMD_GET_PORT_ASSIGNMENT, NULL, 0, - outbuf, sizeof(outbuf), NULL); - if (rc) - return rc; - - return MCDI_DWORD(outbuf, GET_PORT_ASSIGNMENT_OUT_PORT); -} diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.c b/drivers/net/ethernet/sfc/mcdi_port_common.c new file mode 100644 index 000000000000..a6a072ba46d3 --- /dev/null +++ b/drivers/net/ethernet/sfc/mcdi_port_common.c @@ -0,0 +1,568 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "mcdi_port_common.h" +#include "efx_common.h" + +int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_CFG_OUT_LEN); + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_IN_LEN != 0); + BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_OUT_NAME_LEN != sizeof(cfg->name)); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_CFG, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + if (outlen < MC_CMD_GET_PHY_CFG_OUT_LEN) { + rc = -EIO; + goto fail; + } + + cfg->flags = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_FLAGS); + cfg->type = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_TYPE); + cfg->supported_cap = + MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_SUPPORTED_CAP); + cfg->channel = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_CHANNEL); + cfg->port = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_PRT); + cfg->stats_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_STATS_MASK); + memcpy(cfg->name, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_NAME), + sizeof(cfg->name)); + cfg->media = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MEDIA_TYPE); + cfg->mmd_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MMD_MASK); + memcpy(cfg->revision, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_REVISION), + sizeof(cfg->revision)); + + return 0; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +void efx_link_set_advertising(struct efx_nic *efx, + const unsigned long *advertising) +{ + memcpy(efx->link_advertising, advertising, + sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK())); + + efx->link_advertising[0] |= ADVERTISED_Autoneg; + if (advertising[0] & ADVERTISED_Pause) + efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX); + else + efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); + if (advertising[0] & ADVERTISED_Asym_Pause) + efx->wanted_fc ^= EFX_FC_TX; +} + +int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities, + u32 flags, u32 loopback_mode, u32 loopback_speed) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_LINK_IN_LEN); + int rc; + + BUILD_BUG_ON(MC_CMD_SET_LINK_OUT_LEN != 0); + + MCDI_SET_DWORD(inbuf, SET_LINK_IN_CAP, capabilities); + MCDI_SET_DWORD(inbuf, SET_LINK_IN_FLAGS, flags); + MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_MODE, loopback_mode); + MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_SPEED, loopback_speed); + + rc = efx_mcdi_rpc(efx, MC_CMD_SET_LINK, inbuf, sizeof(inbuf), + NULL, 0, NULL); + return rc; +} + +int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LOOPBACK_MODES_OUT_LEN); + size_t outlen; + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_LOOPBACK_MODES, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + if (outlen < (MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_OFST + + MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_LEN)) { + rc = -EIO; + goto fail; + } + + *loopback_modes = MCDI_QWORD(outbuf, GET_LOOPBACK_MODES_OUT_SUGGESTED); + + return 0; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset) +{ + #define SET_BIT(name) __set_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \ + linkset) + + bitmap_zero(linkset, __ETHTOOL_LINK_MODE_MASK_NBITS); + switch (media) { + case MC_CMD_MEDIA_KX4: + SET_BIT(Backplane); + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) + SET_BIT(1000baseKX_Full); + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) + SET_BIT(10000baseKX4_Full); + if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) + SET_BIT(40000baseKR4_Full); + break; + + case MC_CMD_MEDIA_XFP: + case MC_CMD_MEDIA_SFP_PLUS: + case MC_CMD_MEDIA_QSFP_PLUS: + SET_BIT(FIBRE); + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) + SET_BIT(1000baseT_Full); + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) + SET_BIT(10000baseT_Full); + if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) + SET_BIT(40000baseCR4_Full); + if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN)) + SET_BIT(100000baseCR4_Full); + if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN)) + SET_BIT(25000baseCR_Full); + if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN)) + SET_BIT(50000baseCR2_Full); + break; + + case MC_CMD_MEDIA_BASE_T: + SET_BIT(TP); + if (cap & (1 << MC_CMD_PHY_CAP_10HDX_LBN)) + SET_BIT(10baseT_Half); + if (cap & (1 << MC_CMD_PHY_CAP_10FDX_LBN)) + SET_BIT(10baseT_Full); + if (cap & (1 << MC_CMD_PHY_CAP_100HDX_LBN)) + SET_BIT(100baseT_Half); + if (cap & (1 << MC_CMD_PHY_CAP_100FDX_LBN)) + SET_BIT(100baseT_Full); + if (cap & (1 << MC_CMD_PHY_CAP_1000HDX_LBN)) + SET_BIT(1000baseT_Half); + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) + SET_BIT(1000baseT_Full); + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) + SET_BIT(10000baseT_Full); + break; + } + + if (cap & (1 << MC_CMD_PHY_CAP_PAUSE_LBN)) + SET_BIT(Pause); + if (cap & (1 << MC_CMD_PHY_CAP_ASYM_LBN)) + SET_BIT(Asym_Pause); + if (cap & (1 << MC_CMD_PHY_CAP_AN_LBN)) + SET_BIT(Autoneg); + + #undef SET_BIT +} + +u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset) +{ + u32 result = 0; + + #define TEST_BIT(name) test_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \ + linkset) + + if (TEST_BIT(10baseT_Half)) + result |= (1 << MC_CMD_PHY_CAP_10HDX_LBN); + if (TEST_BIT(10baseT_Full)) + result |= (1 << MC_CMD_PHY_CAP_10FDX_LBN); + if (TEST_BIT(100baseT_Half)) + result |= (1 << MC_CMD_PHY_CAP_100HDX_LBN); + if (TEST_BIT(100baseT_Full)) + result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN); + if (TEST_BIT(1000baseT_Half)) + result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN); + if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full)) + result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN); + if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full)) + result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN); + if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full)) + result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN); + if (TEST_BIT(100000baseCR4_Full)) + result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN); + if (TEST_BIT(25000baseCR_Full)) + result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN); + if (TEST_BIT(50000baseCR2_Full)) + result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN); + if (TEST_BIT(Pause)) + result |= (1 << MC_CMD_PHY_CAP_PAUSE_LBN); + if (TEST_BIT(Asym_Pause)) + result |= (1 << MC_CMD_PHY_CAP_ASYM_LBN); + if (TEST_BIT(Autoneg)) + result |= (1 << MC_CMD_PHY_CAP_AN_LBN); + + #undef TEST_BIT + + return result; +} + +u32 efx_get_mcdi_phy_flags(struct efx_nic *efx) +{ + struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + enum efx_phy_mode mode, supported; + u32 flags; + + /* TODO: Advertise the capabilities supported by this PHY */ + supported = 0; + if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_TXDIS_LBN)) + supported |= PHY_MODE_TX_DISABLED; + if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_LOWPOWER_LBN)) + supported |= PHY_MODE_LOW_POWER; + if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_POWEROFF_LBN)) + supported |= PHY_MODE_OFF; + + mode = efx->phy_mode & supported; + + flags = 0; + if (mode & PHY_MODE_TX_DISABLED) + flags |= (1 << MC_CMD_SET_LINK_IN_TXDIS_LBN); + if (mode & PHY_MODE_LOW_POWER) + flags |= (1 << MC_CMD_SET_LINK_IN_LOWPOWER_LBN); + if (mode & PHY_MODE_OFF) + flags |= (1 << MC_CMD_SET_LINK_IN_POWEROFF_LBN); + + return flags; +} + +u8 mcdi_to_ethtool_media(u32 media) +{ + switch (media) { + case MC_CMD_MEDIA_XAUI: + case MC_CMD_MEDIA_CX4: + case MC_CMD_MEDIA_KX4: + return PORT_OTHER; + + case MC_CMD_MEDIA_XFP: + case MC_CMD_MEDIA_SFP_PLUS: + case MC_CMD_MEDIA_QSFP_PLUS: + return PORT_FIBRE; + + case MC_CMD_MEDIA_BASE_T: + return PORT_TP; + + default: + return PORT_OTHER; + } +} + +void efx_mcdi_phy_decode_link(struct efx_nic *efx, + struct efx_link_state *link_state, + u32 speed, u32 flags, u32 fcntl) +{ + switch (fcntl) { + case MC_CMD_FCNTL_AUTO: + WARN_ON(1); /* This is not a link mode */ + link_state->fc = EFX_FC_AUTO | EFX_FC_TX | EFX_FC_RX; + break; + case MC_CMD_FCNTL_BIDIR: + link_state->fc = EFX_FC_TX | EFX_FC_RX; + break; + case MC_CMD_FCNTL_RESPOND: + link_state->fc = EFX_FC_RX; + break; + default: + WARN_ON(1); + /* Fall through */ + case MC_CMD_FCNTL_OFF: + link_state->fc = 0; + break; + } + + link_state->up = !!(flags & (1 << MC_CMD_GET_LINK_OUT_LINK_UP_LBN)); + link_state->fd = !!(flags & (1 << MC_CMD_GET_LINK_OUT_FULL_DUPLEX_LBN)); + link_state->speed = speed; +} + +/* The semantics of the ethtool FEC mode bitmask are not well defined, + * particularly the meaning of combinations of bits. Which means we get to + * define our own semantics, as follows: + * OFF overrides any other bits, and means "disable all FEC" (with the + * exception of 25G KR4/CR4, where it is not possible to reject it if AN + * partner requests it). + * AUTO on its own means use cable requirements and link partner autoneg with + * fw-default preferences for the cable type. + * AUTO and either RS or BASER means use the specified FEC type if cable and + * link partner support it, otherwise autoneg/fw-default. + * RS or BASER alone means use the specified FEC type if cable and link partner + * support it and either requests it, otherwise no FEC. + * Both RS and BASER (whether AUTO or not) means use FEC if cable and link + * partner support it, preferring RS to BASER. + */ +u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap) +{ + u32 ret = 0; + + if (ethtool_cap & ETHTOOL_FEC_OFF) + return 0; + + if (ethtool_cap & ETHTOOL_FEC_AUTO) + ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_RS_FEC_LBN); + if (ethtool_cap & ETHTOOL_FEC_RS) + ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN); + if (ethtool_cap & ETHTOOL_FEC_BASER) + ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) | + (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN); + return ret; +} + +/* Invert ethtool_fec_caps_to_mcdi. There are two combinations that function + * can never produce, (baser xor rs) and neither req; the implementation below + * maps both of those to AUTO. This should never matter, and it's not clear + * what a better mapping would be anyway. + */ +u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g) +{ + bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN), + rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN), + baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) + : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN), + baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN) + : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN); + + if (!baser && !rs) + return ETHTOOL_FEC_OFF; + return (rs_req ? ETHTOOL_FEC_RS : 0) | + (baser_req ? ETHTOOL_FEC_BASER : 0) | + (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO); +} + +/* Verify that the forced flow control settings (!EFX_FC_AUTO) are + * supported by the link partner. Warn the user if this isn't the case + */ +void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa) +{ + struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + u32 rmtadv; + + /* The link partner capabilities are only relevant if the + * link supports flow control autonegotiation + */ + if (~phy_cfg->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN)) + return; + + /* If flow control autoneg is supported and enabled, then fine */ + if (efx->wanted_fc & EFX_FC_AUTO) + return; + + rmtadv = 0; + if (lpa & (1 << MC_CMD_PHY_CAP_PAUSE_LBN)) + rmtadv |= ADVERTISED_Pause; + if (lpa & (1 << MC_CMD_PHY_CAP_ASYM_LBN)) + rmtadv |= ADVERTISED_Asym_Pause; + + if ((efx->wanted_fc & EFX_FC_TX) && rmtadv == ADVERTISED_Asym_Pause) + netif_err(efx, link, efx->net_dev, + "warning: link partner doesn't support pause frames"); +} + +bool efx_mcdi_phy_poll(struct efx_nic *efx) +{ + struct efx_link_state old_state = efx->link_state; + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN); + int rc; + + WARN_ON(!mutex_is_locked(&efx->mac_lock)); + + BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, + outbuf, sizeof(outbuf), NULL); + if (rc) + efx->link_state.up = false; + else + efx_mcdi_phy_decode_link( + efx, &efx->link_state, + MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED), + MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS), + MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL)); + + return !efx_link_state_equal(&efx->link_state, &old_state); +} + +int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, struct ethtool_fecparam *fec) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN); + u32 caps, active, speed; /* MCDI format */ + bool is_25g = false; + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); + rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN) + return -EOPNOTSUPP; + + /* behaviour for 25G/50G links depends on 25G BASER bit */ + speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED); + is_25g = speed == 25000 || speed == 50000; + + caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP); + fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g); + /* BASER is never supported on 100G */ + if (speed == 100000) + fec->fec &= ~ETHTOOL_FEC_BASER; + + active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE); + switch (active) { + case MC_CMD_FEC_NONE: + fec->active_fec = ETHTOOL_FEC_OFF; + break; + case MC_CMD_FEC_BASER: + fec->active_fec = ETHTOOL_FEC_BASER; + break; + case MC_CMD_FEC_RS: + fec->active_fec = ETHTOOL_FEC_RS; + break; + default: + netif_warn(efx, hw, efx->net_dev, + "Firmware reports unrecognised FEC_TYPE %u\n", + active); + /* We don't know what firmware has picked. AUTO is as good a + * "can't happen" value as any other. + */ + fec->active_fec = ETHTOOL_FEC_AUTO; + break; + } + + return 0; +} + +int efx_mcdi_phy_test_alive(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN); + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_PHY_STATE_IN_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_STATE, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + + if (outlen < MC_CMD_GET_PHY_STATE_OUT_LEN) + return -EIO; + if (MCDI_DWORD(outbuf, GET_PHY_STATE_OUT_STATE) != MC_CMD_PHY_STATE_OK) + return -EINVAL; + + return 0; +} + +int efx_mcdi_set_mac(struct efx_nic *efx) +{ + u32 fcntl; + MCDI_DECLARE_BUF(cmdbytes, MC_CMD_SET_MAC_IN_LEN); + + BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0); + + /* This has no effect on EF10 */ + ether_addr_copy(MCDI_PTR(cmdbytes, SET_MAC_IN_ADDR), + efx->net_dev->dev_addr); + + MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_MTU, + EFX_MAX_FRAME_LEN(efx->net_dev->mtu)); + MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_DRAIN, 0); + + /* Set simple MAC filter for Siena */ + MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT, + SET_MAC_IN_REJECT_UNCST, efx->unicast_filter); + + MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS, + SET_MAC_IN_FLAG_INCLUDE_FCS, + !!(efx->net_dev->features & NETIF_F_RXFCS)); + + switch (efx->wanted_fc) { + case EFX_FC_RX | EFX_FC_TX: + fcntl = MC_CMD_FCNTL_BIDIR; + break; + case EFX_FC_RX: + fcntl = MC_CMD_FCNTL_RESPOND; + break; + default: + fcntl = MC_CMD_FCNTL_OFF; + break; + } + if (efx->wanted_fc & EFX_FC_AUTO) + fcntl = MC_CMD_FCNTL_AUTO; + if (efx->fc_disable) + fcntl = MC_CMD_FCNTL_OFF; + + MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_FCNTL, fcntl); + + return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, cmdbytes, sizeof(cmdbytes), + NULL, 0, NULL); +} + +/* Get physical port number (EF10 only; on Siena it is same as PF number) */ +int efx_mcdi_port_get_number(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN); + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_PORT_ASSIGNMENT, NULL, 0, + outbuf, sizeof(outbuf), NULL); + if (rc) + return rc; + + return MCDI_DWORD(outbuf, GET_PORT_ASSIGNMENT_OUT_PORT); +} + +static unsigned int efx_mcdi_event_link_speed[] = { + [MCDI_EVENT_LINKCHANGE_SPEED_100M] = 100, + [MCDI_EVENT_LINKCHANGE_SPEED_1G] = 1000, + [MCDI_EVENT_LINKCHANGE_SPEED_10G] = 10000, + [MCDI_EVENT_LINKCHANGE_SPEED_40G] = 40000, + [MCDI_EVENT_LINKCHANGE_SPEED_25G] = 25000, + [MCDI_EVENT_LINKCHANGE_SPEED_50G] = 50000, + [MCDI_EVENT_LINKCHANGE_SPEED_100G] = 100000, +}; + +void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev) +{ + u32 flags, fcntl, speed, lpa; + + speed = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_SPEED); + EFX_WARN_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed)); + speed = efx_mcdi_event_link_speed[speed]; + + flags = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LINK_FLAGS); + fcntl = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_FCNTL); + lpa = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LP_CAP); + + /* efx->link_state is only modified by efx_mcdi_phy_get_link(), + * which is only run after flushing the event queues. Therefore, it + * is safe to modify the link state outside of the mac_lock here. + */ + efx_mcdi_phy_decode_link(efx, &efx->link_state, speed, flags, fcntl); + + efx_mcdi_phy_check_fcntl(efx, lpa); + + efx_link_status_changed(efx); +} diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.h b/drivers/net/ethernet/sfc/mcdi_port_common.h new file mode 100644 index 000000000000..b16f11265269 --- /dev/null +++ b/drivers/net/ethernet/sfc/mcdi_port_common.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +#ifndef EFX_MCDI_PORT_COMMON_H +#define EFX_MCDI_PORT_COMMON_H + +#include "net_driver.h" +#include "mcdi.h" +#include "mcdi_pcol.h" + +struct efx_mcdi_phy_data { + u32 flags; + u32 type; + u32 supported_cap; + u32 channel; + u32 port; + u32 stats_mask; + u8 name[20]; + u32 media; + u32 mmd_mask; + u8 revision[20]; + u32 forced_cap; +}; + +#define EFX_MC_STATS_GENERATION_INVALID ((__force __le64)(-1)) + +int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg); +void efx_link_set_advertising(struct efx_nic *efx, + const unsigned long *advertising); +int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities, + u32 flags, u32 loopback_mode, u32 loopback_speed); +int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes); +void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset); +u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset); +u32 efx_get_mcdi_phy_flags(struct efx_nic *efx); +u8 mcdi_to_ethtool_media(u32 media); +void efx_mcdi_phy_decode_link(struct efx_nic *efx, + struct efx_link_state *link_state, + u32 speed, u32 flags, u32 fcntl); +u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap); +u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g); +void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa); +bool efx_mcdi_phy_poll(struct efx_nic *efx); +int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, + struct ethtool_fecparam *fec); +int efx_mcdi_phy_test_alive(struct efx_nic *efx); +int efx_mcdi_set_mac(struct efx_nic *efx); +int efx_mcdi_port_get_number(struct efx_nic *efx); +void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev); + +#endif diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index dfd5182d9e47..9f9886f222c8 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -24,7 +24,6 @@ #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/vmalloc.h> -#include <linux/i2c.h> #include <linux/mtd/mtd.h> #include <net/busy_poll.h> #include <net/xdp.h> @@ -139,6 +138,8 @@ struct efx_special_buffer { * freed when descriptor completes * @xdpf: When @flags & %EFX_TX_BUF_XDP, the XDP frame information; its @data * member is the associated buffer to drop a page reference on. + * @option: When @flags & %EFX_TX_BUF_OPTION, an EF10-specific option + * descriptor. * @dma_addr: DMA address of the fragment. * @flags: Flags for allocation and DMA mapping type * @len: Length of this fragment. @@ -153,7 +154,7 @@ struct efx_tx_buffer { struct xdp_frame *xdpf; }; union { - efx_qword_t option; + efx_qword_t option; /* EF10 */ dma_addr_t dma_addr; }; unsigned short flags; @@ -743,13 +744,13 @@ union efx_multicast_hash { struct vfdi_status; /* The reserved RSS context value */ -#define EFX_EF10_RSS_CONTEXT_INVALID 0xffffffff +#define EFX_MCDI_RSS_CONTEXT_INVALID 0xffffffff /** * struct efx_rss_context - A user-defined RSS context for filtering * @list: node of linked list on which this struct is stored * @context_id: the RSS_CONTEXT_ID returned by MC firmware, or - * %EFX_EF10_RSS_CONTEXT_INVALID if this context is not present on the NIC. - * For Siena, 0 if RSS is active, else %EFX_EF10_RSS_CONTEXT_INVALID. + * %EFX_MCDI_RSS_CONTEXT_INVALID if this context is not present on the NIC. + * For Siena, 0 if RSS is active, else %EFX_MCDI_RSS_CONTEXT_INVALID. * @user_id: the rss_context ID exposed to userspace over ethtool. * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled * @rx_hash_key: Toeplitz hash key for this RSS context @@ -1611,6 +1612,15 @@ static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue, return &rx_queue->buffer[index]; } +static inline struct efx_rx_buffer * +efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf) +{ + if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask))) + return efx_rx_buffer(rx_queue, 0); + else + return rx_buf + 1; +} + /** * EFX_MAX_FRAME_LEN - calculate maximum frame length * diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 1f7c5717de75..6670fda8f35a 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -9,9 +9,9 @@ #define EFX_NIC_H #include <linux/net_tstamp.h> -#include <linux/i2c-algo-bit.h> #include "net_driver.h" #include "efx.h" +#include "efx_common.h" #include "mcdi.h" enum { @@ -506,6 +506,9 @@ static inline void efx_nic_push_buffers(struct efx_tx_queue *tx_queue) tx_queue->efx->type->tx_write(tx_queue); } +int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + bool *data_mapped); + /* RX data path */ static inline int efx_nic_probe_rx(struct efx_rx_queue *rx_queue) { @@ -554,6 +557,7 @@ static inline void efx_nic_eventq_read_ack(struct efx_channel *channel) { channel->efx->type->ev_read_ack(channel); } + void efx_nic_event_test_start(struct efx_channel *channel); /* Falcon/Siena queue operations */ @@ -671,6 +675,7 @@ struct efx_farch_register_test { unsigned address; efx_oword_t mask; }; + int efx_farch_test_registers(struct efx_nic *efx, const struct efx_farch_register_test *regs, size_t n_regs); diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index c29bf862a94c..a2042f16babc 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -21,6 +21,7 @@ #include <linux/bpf_trace.h> #include "net_driver.h" #include "efx.h" +#include "rx_common.h" #include "filter.h" #include "nic.h" #include "selftest.h" @@ -32,60 +33,13 @@ /* Maximum rx prefix used by any architecture. */ #define EFX_MAX_RX_PREFIX_SIZE 16 -/* Number of RX buffers to recycle pages for. When creating the RX page recycle - * ring, this number is divided by the number of buffers per page to calculate - * the number of pages to store in the RX page recycle ring. - */ -#define EFX_RECYCLE_RING_SIZE_IOMMU 4096 -#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH) - /* Size of buffer allocated for skb header area. */ #define EFX_SKB_HEADERS 128u -/* This is the percentage fill level below which new RX descriptors - * will be added to the RX descriptor ring. - */ -static unsigned int rx_refill_threshold; - /* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */ #define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \ EFX_RX_USR_BUF_SIZE) -/* - * RX maximum head room required. - * - * This must be at least 1 to prevent overflow, plus one packet-worth - * to allow pipelined receives. - */ -#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS) - -static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf) -{ - return page_address(buf->page) + buf->page_offset; -} - -static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh) -{ -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) - return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset)); -#else - const u8 *data = eh + efx->rx_packet_hash_offset; - return (u32)data[0] | - (u32)data[1] << 8 | - (u32)data[2] << 16 | - (u32)data[3] << 24; -#endif -} - -static inline struct efx_rx_buffer * -efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf) -{ - if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask))) - return efx_rx_buffer(rx_queue, 0); - else - return rx_buf + 1; -} - static inline void efx_sync_rx_buffer(struct efx_nic *efx, struct efx_rx_buffer *rx_buf, unsigned int len) @@ -94,301 +48,6 @@ static inline void efx_sync_rx_buffer(struct efx_nic *efx, DMA_FROM_DEVICE); } -void efx_rx_config_page_split(struct efx_nic *efx) -{ - efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align + - XDP_PACKET_HEADROOM, - EFX_RX_BUF_ALIGNMENT); - efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : - ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / - efx->rx_page_buf_step); - efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / - efx->rx_bufs_per_page; - efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH, - efx->rx_bufs_per_page); -} - -/* Check the RX page recycle ring for a page that can be reused. */ -static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue) -{ - struct efx_nic *efx = rx_queue->efx; - struct page *page; - struct efx_rx_page_state *state; - unsigned index; - - index = rx_queue->page_remove & rx_queue->page_ptr_mask; - page = rx_queue->page_ring[index]; - if (page == NULL) - return NULL; - - rx_queue->page_ring[index] = NULL; - /* page_remove cannot exceed page_add. */ - if (rx_queue->page_remove != rx_queue->page_add) - ++rx_queue->page_remove; - - /* If page_count is 1 then we hold the only reference to this page. */ - if (page_count(page) == 1) { - ++rx_queue->page_recycle_count; - return page; - } else { - state = page_address(page); - dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, - PAGE_SIZE << efx->rx_buffer_order, - DMA_FROM_DEVICE); - put_page(page); - ++rx_queue->page_recycle_failed; - } - - return NULL; -} - -/** - * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers - * - * @rx_queue: Efx RX queue - * - * This allocates a batch of pages, maps them for DMA, and populates - * struct efx_rx_buffers for each one. Return a negative error code or - * 0 on success. If a single page can be used for multiple buffers, - * then the page will either be inserted fully, or not at all. - */ -static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) -{ - struct efx_nic *efx = rx_queue->efx; - struct efx_rx_buffer *rx_buf; - struct page *page; - unsigned int page_offset; - struct efx_rx_page_state *state; - dma_addr_t dma_addr; - unsigned index, count; - - count = 0; - do { - page = efx_reuse_page(rx_queue); - if (page == NULL) { - page = alloc_pages(__GFP_COMP | - (atomic ? GFP_ATOMIC : GFP_KERNEL), - efx->rx_buffer_order); - if (unlikely(page == NULL)) - return -ENOMEM; - dma_addr = - dma_map_page(&efx->pci_dev->dev, page, 0, - PAGE_SIZE << efx->rx_buffer_order, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(&efx->pci_dev->dev, - dma_addr))) { - __free_pages(page, efx->rx_buffer_order); - return -EIO; - } - state = page_address(page); - state->dma_addr = dma_addr; - } else { - state = page_address(page); - dma_addr = state->dma_addr; - } - - dma_addr += sizeof(struct efx_rx_page_state); - page_offset = sizeof(struct efx_rx_page_state); - - do { - index = rx_queue->added_count & rx_queue->ptr_mask; - rx_buf = efx_rx_buffer(rx_queue, index); - rx_buf->dma_addr = dma_addr + efx->rx_ip_align + - XDP_PACKET_HEADROOM; - rx_buf->page = page; - rx_buf->page_offset = page_offset + efx->rx_ip_align + - XDP_PACKET_HEADROOM; - rx_buf->len = efx->rx_dma_len; - rx_buf->flags = 0; - ++rx_queue->added_count; - get_page(page); - dma_addr += efx->rx_page_buf_step; - page_offset += efx->rx_page_buf_step; - } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE); - - rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE; - } while (++count < efx->rx_pages_per_batch); - - return 0; -} - -/* Unmap a DMA-mapped page. This function is only called for the final RX - * buffer in a page. - */ -static void efx_unmap_rx_buffer(struct efx_nic *efx, - struct efx_rx_buffer *rx_buf) -{ - struct page *page = rx_buf->page; - - if (page) { - struct efx_rx_page_state *state = page_address(page); - dma_unmap_page(&efx->pci_dev->dev, - state->dma_addr, - PAGE_SIZE << efx->rx_buffer_order, - DMA_FROM_DEVICE); - } -} - -static void efx_free_rx_buffers(struct efx_rx_queue *rx_queue, - struct efx_rx_buffer *rx_buf, - unsigned int num_bufs) -{ - do { - if (rx_buf->page) { - put_page(rx_buf->page); - rx_buf->page = NULL; - } - rx_buf = efx_rx_buf_next(rx_queue, rx_buf); - } while (--num_bufs); -} - -/* Attempt to recycle the page if there is an RX recycle ring; the page can - * only be added if this is the final RX buffer, to prevent pages being used in - * the descriptor ring and appearing in the recycle ring simultaneously. - */ -static void efx_recycle_rx_page(struct efx_channel *channel, - struct efx_rx_buffer *rx_buf) -{ - struct page *page = rx_buf->page; - struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); - struct efx_nic *efx = rx_queue->efx; - unsigned index; - - /* Only recycle the page after processing the final buffer. */ - if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE)) - return; - - index = rx_queue->page_add & rx_queue->page_ptr_mask; - if (rx_queue->page_ring[index] == NULL) { - unsigned read_index = rx_queue->page_remove & - rx_queue->page_ptr_mask; - - /* The next slot in the recycle ring is available, but - * increment page_remove if the read pointer currently - * points here. - */ - if (read_index == index) - ++rx_queue->page_remove; - rx_queue->page_ring[index] = page; - ++rx_queue->page_add; - return; - } - ++rx_queue->page_recycle_full; - efx_unmap_rx_buffer(efx, rx_buf); - put_page(rx_buf->page); -} - -static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, - struct efx_rx_buffer *rx_buf) -{ - /* Release the page reference we hold for the buffer. */ - if (rx_buf->page) - put_page(rx_buf->page); - - /* If this is the last buffer in a page, unmap and free it. */ - if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) { - efx_unmap_rx_buffer(rx_queue->efx, rx_buf); - efx_free_rx_buffers(rx_queue, rx_buf, 1); - } - rx_buf->page = NULL; -} - -/* Recycle the pages that are used by buffers that have just been received. */ -static void efx_recycle_rx_pages(struct efx_channel *channel, - struct efx_rx_buffer *rx_buf, - unsigned int n_frags) -{ - struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); - - do { - efx_recycle_rx_page(channel, rx_buf); - rx_buf = efx_rx_buf_next(rx_queue, rx_buf); - } while (--n_frags); -} - -static void efx_discard_rx_packet(struct efx_channel *channel, - struct efx_rx_buffer *rx_buf, - unsigned int n_frags) -{ - struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); - - efx_recycle_rx_pages(channel, rx_buf, n_frags); - - efx_free_rx_buffers(rx_queue, rx_buf, n_frags); -} - -/** - * efx_fast_push_rx_descriptors - push new RX descriptors quickly - * @rx_queue: RX descriptor queue - * - * This will aim to fill the RX descriptor queue up to - * @rx_queue->@max_fill. If there is insufficient atomic - * memory to do so, a slow fill will be scheduled. - * - * The caller must provide serialisation (none is used here). In practise, - * this means this function must run from the NAPI handler, or be called - * when NAPI is disabled. - */ -void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic) -{ - struct efx_nic *efx = rx_queue->efx; - unsigned int fill_level, batch_size; - int space, rc = 0; - - if (!rx_queue->refill_enabled) - return; - - /* Calculate current fill level, and exit if we don't need to fill */ - fill_level = (rx_queue->added_count - rx_queue->removed_count); - EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries); - if (fill_level >= rx_queue->fast_fill_trigger) - goto out; - - /* Record minimum fill level */ - if (unlikely(fill_level < rx_queue->min_fill)) { - if (fill_level) - rx_queue->min_fill = fill_level; - } - - batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page; - space = rx_queue->max_fill - fill_level; - EFX_WARN_ON_ONCE_PARANOID(space < batch_size); - - netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, - "RX queue %d fast-filling descriptor ring from" - " level %d to level %d\n", - efx_rx_queue_index(rx_queue), fill_level, - rx_queue->max_fill); - - - do { - rc = efx_init_rx_buffers(rx_queue, atomic); - if (unlikely(rc)) { - /* Ensure that we don't leave the rx queue empty */ - efx_schedule_slow_fill(rx_queue); - goto out; - } - } while ((space -= batch_size) >= batch_size); - - netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, - "RX queue %d fast-filled descriptor ring " - "to level %d\n", efx_rx_queue_index(rx_queue), - rx_queue->added_count - rx_queue->removed_count); - - out: - if (rx_queue->notified_count != rx_queue->added_count) - efx_nic_notify_rx_desc(rx_queue); -} - -void efx_rx_slow_fill(struct timer_list *t) -{ - struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill); - - /* Post an event to cause NAPI to run and refill the queue */ - efx_nic_generate_fill_event(rx_queue); - ++rx_queue->slow_fill_count; -} - static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf, int len) @@ -412,53 +71,6 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, efx_rx_queue_channel(rx_queue)->n_rx_overlength++; } -/* Pass a received packet up through GRO. GRO can handle pages - * regardless of checksum state and skbs with a good checksum. - */ -static void -efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, - unsigned int n_frags, u8 *eh) -{ - struct napi_struct *napi = &channel->napi_str; - struct efx_nic *efx = channel->efx; - struct sk_buff *skb; - - skb = napi_get_frags(napi); - if (unlikely(!skb)) { - struct efx_rx_queue *rx_queue; - - rx_queue = efx_channel_get_rx_queue(channel); - efx_free_rx_buffers(rx_queue, rx_buf, n_frags); - return; - } - - if (efx->net_dev->features & NETIF_F_RXHASH) - skb_set_hash(skb, efx_rx_buf_hash(efx, eh), - PKT_HASH_TYPE_L3); - skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? - CHECKSUM_UNNECESSARY : CHECKSUM_NONE); - skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); - - for (;;) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_buf->page, rx_buf->page_offset, - rx_buf->len); - rx_buf->page = NULL; - skb->len += rx_buf->len; - if (skb_shinfo(skb)->nr_frags == n_frags) - break; - - rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); - } - - skb->data_len = skb->len; - skb->truesize += n_frags * efx->rx_buffer_truesize; - - skb_record_rx_queue(skb, channel->rx_queue.core_index); - - napi_gro_frags(napi); -} - /* Allocate and construct an SKB around page fragments */ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, @@ -805,174 +417,6 @@ out: channel->rx_pkt_n_frags = 0; } -int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) -{ - struct efx_nic *efx = rx_queue->efx; - unsigned int entries; - int rc; - - /* Create the smallest power-of-two aligned ring */ - entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE); - EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); - rx_queue->ptr_mask = entries - 1; - - netif_dbg(efx, probe, efx->net_dev, - "creating RX queue %d size %#x mask %#x\n", - efx_rx_queue_index(rx_queue), efx->rxq_entries, - rx_queue->ptr_mask); - - /* Allocate RX buffers */ - rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer), - GFP_KERNEL); - if (!rx_queue->buffer) - return -ENOMEM; - - rc = efx_nic_probe_rx(rx_queue); - if (rc) { - kfree(rx_queue->buffer); - rx_queue->buffer = NULL; - } - - return rc; -} - -static void efx_init_rx_recycle_ring(struct efx_nic *efx, - struct efx_rx_queue *rx_queue) -{ - unsigned int bufs_in_recycle_ring, page_ring_size; - - /* Set the RX recycle ring size */ -#ifdef CONFIG_PPC64 - bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; -#else - if (iommu_present(&pci_bus_type)) - bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; - else - bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU; -#endif /* CONFIG_PPC64 */ - - page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring / - efx->rx_bufs_per_page); - rx_queue->page_ring = kcalloc(page_ring_size, - sizeof(*rx_queue->page_ring), GFP_KERNEL); - rx_queue->page_ptr_mask = page_ring_size - 1; -} - -void efx_init_rx_queue(struct efx_rx_queue *rx_queue) -{ - struct efx_nic *efx = rx_queue->efx; - unsigned int max_fill, trigger, max_trigger; - int rc = 0; - - netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, - "initialising RX queue %d\n", efx_rx_queue_index(rx_queue)); - - /* Initialise ptr fields */ - rx_queue->added_count = 0; - rx_queue->notified_count = 0; - rx_queue->removed_count = 0; - rx_queue->min_fill = -1U; - efx_init_rx_recycle_ring(efx, rx_queue); - - rx_queue->page_remove = 0; - rx_queue->page_add = rx_queue->page_ptr_mask + 1; - rx_queue->page_recycle_count = 0; - rx_queue->page_recycle_failed = 0; - rx_queue->page_recycle_full = 0; - - /* Initialise limit fields */ - max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM; - max_trigger = - max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page; - if (rx_refill_threshold != 0) { - trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; - if (trigger > max_trigger) - trigger = max_trigger; - } else { - trigger = max_trigger; - } - - rx_queue->max_fill = max_fill; - rx_queue->fast_fill_trigger = trigger; - rx_queue->refill_enabled = true; - - /* Initialise XDP queue information */ - rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev, - rx_queue->core_index); - - if (rc) { - netif_err(efx, rx_err, efx->net_dev, - "Failure to initialise XDP queue information rc=%d\n", - rc); - efx->xdp_rxq_info_failed = true; - } else { - rx_queue->xdp_rxq_info_valid = true; - } - - /* Set up RX descriptor ring */ - efx_nic_init_rx(rx_queue); -} - -void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) -{ - int i; - struct efx_nic *efx = rx_queue->efx; - struct efx_rx_buffer *rx_buf; - - netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, - "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue)); - - del_timer_sync(&rx_queue->slow_fill); - - /* Release RX buffers from the current read ptr to the write ptr */ - if (rx_queue->buffer) { - for (i = rx_queue->removed_count; i < rx_queue->added_count; - i++) { - unsigned index = i & rx_queue->ptr_mask; - rx_buf = efx_rx_buffer(rx_queue, index); - efx_fini_rx_buffer(rx_queue, rx_buf); - } - } - - /* Unmap and release the pages in the recycle ring. Remove the ring. */ - for (i = 0; i <= rx_queue->page_ptr_mask; i++) { - struct page *page = rx_queue->page_ring[i]; - struct efx_rx_page_state *state; - - if (page == NULL) - continue; - - state = page_address(page); - dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, - PAGE_SIZE << efx->rx_buffer_order, - DMA_FROM_DEVICE); - put_page(page); - } - kfree(rx_queue->page_ring); - rx_queue->page_ring = NULL; - - if (rx_queue->xdp_rxq_info_valid) - xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info); - - rx_queue->xdp_rxq_info_valid = false; -} - -void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) -{ - netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, - "destroying RX queue %d\n", efx_rx_queue_index(rx_queue)); - - efx_nic_remove_rx(rx_queue); - - kfree(rx_queue->buffer); - rx_queue->buffer = NULL; -} - - -module_param(rx_refill_threshold, uint, 0444); -MODULE_PARM_DESC(rx_refill_threshold, - "RX descriptor ring refill threshold (%)"); - #ifdef CONFIG_RFS_ACCEL static void efx_filter_rfs_work(struct work_struct *data) @@ -1206,37 +650,3 @@ bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota) } #endif /* CONFIG_RFS_ACCEL */ - -/** - * efx_filter_is_mc_recipient - test whether spec is a multicast recipient - * @spec: Specification to test - * - * Return: %true if the specification is a non-drop RX filter that - * matches a local MAC address I/G bit value of 1 or matches a local - * IPv4 or IPv6 address value in the respective multicast address - * range. Otherwise %false. - */ -bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec) -{ - if (!(spec->flags & EFX_FILTER_FLAG_RX) || - spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP) - return false; - - if (spec->match_flags & - (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) && - is_multicast_ether_addr(spec->loc_mac)) - return true; - - if ((spec->match_flags & - (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) == - (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) { - if (spec->ether_type == htons(ETH_P_IP) && - ipv4_is_multicast(spec->loc_host[0])) - return true; - if (spec->ether_type == htons(ETH_P_IPV6) && - ((const u8 *)spec->loc_host)[0] == 0xff) - return true; - } - - return false; -} diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c new file mode 100644 index 000000000000..ee8beb87bdc1 --- /dev/null +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -0,0 +1,851 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include <linux/module.h> +#include <linux/iommu.h> +#include "efx.h" +#include "nic.h" +#include "rx_common.h" + +/* This is the percentage fill level below which new RX descriptors + * will be added to the RX descriptor ring. + */ +static unsigned int rx_refill_threshold; +module_param(rx_refill_threshold, uint, 0444); +MODULE_PARM_DESC(rx_refill_threshold, + "RX descriptor ring refill threshold (%)"); + +/* Number of RX buffers to recycle pages for. When creating the RX page recycle + * ring, this number is divided by the number of buffers per page to calculate + * the number of pages to store in the RX page recycle ring. + */ +#define EFX_RECYCLE_RING_SIZE_IOMMU 4096 +#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH) + +/* RX maximum head room required. + * + * This must be at least 1 to prevent overflow, plus one packet-worth + * to allow pipelined receives. + */ +#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS) + +/* Check the RX page recycle ring for a page that can be reused. */ +static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + struct efx_rx_page_state *state; + unsigned int index; + struct page *page; + + index = rx_queue->page_remove & rx_queue->page_ptr_mask; + page = rx_queue->page_ring[index]; + if (page == NULL) + return NULL; + + rx_queue->page_ring[index] = NULL; + /* page_remove cannot exceed page_add. */ + if (rx_queue->page_remove != rx_queue->page_add) + ++rx_queue->page_remove; + + /* If page_count is 1 then we hold the only reference to this page. */ + if (page_count(page) == 1) { + ++rx_queue->page_recycle_count; + return page; + } else { + state = page_address(page); + dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + put_page(page); + ++rx_queue->page_recycle_failed; + } + + return NULL; +} + +/* Attempt to recycle the page if there is an RX recycle ring; the page can + * only be added if this is the final RX buffer, to prevent pages being used in + * the descriptor ring and appearing in the recycle ring simultaneously. + */ +static void efx_recycle_rx_page(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf) +{ + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + struct efx_nic *efx = rx_queue->efx; + struct page *page = rx_buf->page; + unsigned int index; + + /* Only recycle the page after processing the final buffer. */ + if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE)) + return; + + index = rx_queue->page_add & rx_queue->page_ptr_mask; + if (rx_queue->page_ring[index] == NULL) { + unsigned int read_index = rx_queue->page_remove & + rx_queue->page_ptr_mask; + + /* The next slot in the recycle ring is available, but + * increment page_remove if the read pointer currently + * points here. + */ + if (read_index == index) + ++rx_queue->page_remove; + rx_queue->page_ring[index] = page; + ++rx_queue->page_add; + return; + } + ++rx_queue->page_recycle_full; + efx_unmap_rx_buffer(efx, rx_buf); + put_page(rx_buf->page); +} + +/* Recycle the pages that are used by buffers that have just been received. */ +void efx_recycle_rx_pages(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags) +{ + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + + do { + efx_recycle_rx_page(channel, rx_buf); + rx_buf = efx_rx_buf_next(rx_queue, rx_buf); + } while (--n_frags); +} + +void efx_discard_rx_packet(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags) +{ + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + + efx_recycle_rx_pages(channel, rx_buf, n_frags); + + efx_free_rx_buffers(rx_queue, rx_buf, n_frags); +} + +static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue) +{ + unsigned int bufs_in_recycle_ring, page_ring_size; + struct efx_nic *efx = rx_queue->efx; + + /* Set the RX recycle ring size */ +#ifdef CONFIG_PPC64 + bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; +#else + if (iommu_present(&pci_bus_type)) + bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; + else + bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU; +#endif /* CONFIG_PPC64 */ + + page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring / + efx->rx_bufs_per_page); + rx_queue->page_ring = kcalloc(page_ring_size, + sizeof(*rx_queue->page_ring), GFP_KERNEL); + rx_queue->page_ptr_mask = page_ring_size - 1; +} + +static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + int i; + + /* Unmap and release the pages in the recycle ring. Remove the ring. */ + for (i = 0; i <= rx_queue->page_ptr_mask; i++) { + struct page *page = rx_queue->page_ring[i]; + struct efx_rx_page_state *state; + + if (page == NULL) + continue; + + state = page_address(page); + dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + put_page(page); + } + kfree(rx_queue->page_ring); + rx_queue->page_ring = NULL; +} + +static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf) +{ + /* Release the page reference we hold for the buffer. */ + if (rx_buf->page) + put_page(rx_buf->page); + + /* If this is the last buffer in a page, unmap and free it. */ + if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) { + efx_unmap_rx_buffer(rx_queue->efx, rx_buf); + efx_free_rx_buffers(rx_queue, rx_buf, 1); + } + rx_buf->page = NULL; +} + +int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned int entries; + int rc; + + /* Create the smallest power-of-two aligned ring */ + entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE); + EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); + rx_queue->ptr_mask = entries - 1; + + netif_dbg(efx, probe, efx->net_dev, + "creating RX queue %d size %#x mask %#x\n", + efx_rx_queue_index(rx_queue), efx->rxq_entries, + rx_queue->ptr_mask); + + /* Allocate RX buffers */ + rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer), + GFP_KERNEL); + if (!rx_queue->buffer) + return -ENOMEM; + + rc = efx_nic_probe_rx(rx_queue); + if (rc) { + kfree(rx_queue->buffer); + rx_queue->buffer = NULL; + } + + return rc; +} + +void efx_init_rx_queue(struct efx_rx_queue *rx_queue) +{ + unsigned int max_fill, trigger, max_trigger; + struct efx_nic *efx = rx_queue->efx; + int rc = 0; + + netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, + "initialising RX queue %d\n", efx_rx_queue_index(rx_queue)); + + /* Initialise ptr fields */ + rx_queue->added_count = 0; + rx_queue->notified_count = 0; + rx_queue->removed_count = 0; + rx_queue->min_fill = -1U; + efx_init_rx_recycle_ring(rx_queue); + + rx_queue->page_remove = 0; + rx_queue->page_add = rx_queue->page_ptr_mask + 1; + rx_queue->page_recycle_count = 0; + rx_queue->page_recycle_failed = 0; + rx_queue->page_recycle_full = 0; + + /* Initialise limit fields */ + max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM; + max_trigger = + max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page; + if (rx_refill_threshold != 0) { + trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; + if (trigger > max_trigger) + trigger = max_trigger; + } else { + trigger = max_trigger; + } + + rx_queue->max_fill = max_fill; + rx_queue->fast_fill_trigger = trigger; + rx_queue->refill_enabled = true; + + /* Initialise XDP queue information */ + rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev, + rx_queue->core_index); + + if (rc) { + netif_err(efx, rx_err, efx->net_dev, + "Failure to initialise XDP queue information rc=%d\n", + rc); + efx->xdp_rxq_info_failed = true; + } else { + rx_queue->xdp_rxq_info_valid = true; + } + + /* Set up RX descriptor ring */ + efx_nic_init_rx(rx_queue); +} + +void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) +{ + struct efx_rx_buffer *rx_buf; + int i; + + netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, + "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue)); + + del_timer_sync(&rx_queue->slow_fill); + + /* Release RX buffers from the current read ptr to the write ptr */ + if (rx_queue->buffer) { + for (i = rx_queue->removed_count; i < rx_queue->added_count; + i++) { + unsigned int index = i & rx_queue->ptr_mask; + + rx_buf = efx_rx_buffer(rx_queue, index); + efx_fini_rx_buffer(rx_queue, rx_buf); + } + } + + efx_fini_rx_recycle_ring(rx_queue); + + if (rx_queue->xdp_rxq_info_valid) + xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info); + + rx_queue->xdp_rxq_info_valid = false; +} + +void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) +{ + netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, + "destroying RX queue %d\n", efx_rx_queue_index(rx_queue)); + + efx_nic_remove_rx(rx_queue); + + kfree(rx_queue->buffer); + rx_queue->buffer = NULL; +} + +/* Unmap a DMA-mapped page. This function is only called for the final RX + * buffer in a page. + */ +void efx_unmap_rx_buffer(struct efx_nic *efx, + struct efx_rx_buffer *rx_buf) +{ + struct page *page = rx_buf->page; + + if (page) { + struct efx_rx_page_state *state = page_address(page); + + dma_unmap_page(&efx->pci_dev->dev, + state->dma_addr, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + } +} + +void efx_free_rx_buffers(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf, + unsigned int num_bufs) +{ + do { + if (rx_buf->page) { + put_page(rx_buf->page); + rx_buf->page = NULL; + } + rx_buf = efx_rx_buf_next(rx_queue, rx_buf); + } while (--num_bufs); +} + +void efx_rx_slow_fill(struct timer_list *t) +{ + struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill); + + /* Post an event to cause NAPI to run and refill the queue */ + efx_nic_generate_fill_event(rx_queue); + ++rx_queue->slow_fill_count; +} + +void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) +{ + mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10)); +} + +/* efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers + * + * @rx_queue: Efx RX queue + * + * This allocates a batch of pages, maps them for DMA, and populates + * struct efx_rx_buffers for each one. Return a negative error code or + * 0 on success. If a single page can be used for multiple buffers, + * then the page will either be inserted fully, or not at all. + */ +static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) +{ + unsigned int page_offset, index, count; + struct efx_nic *efx = rx_queue->efx; + struct efx_rx_page_state *state; + struct efx_rx_buffer *rx_buf; + dma_addr_t dma_addr; + struct page *page; + + count = 0; + do { + page = efx_reuse_page(rx_queue); + if (page == NULL) { + page = alloc_pages(__GFP_COMP | + (atomic ? GFP_ATOMIC : GFP_KERNEL), + efx->rx_buffer_order); + if (unlikely(page == NULL)) + return -ENOMEM; + dma_addr = + dma_map_page(&efx->pci_dev->dev, page, 0, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(&efx->pci_dev->dev, + dma_addr))) { + __free_pages(page, efx->rx_buffer_order); + return -EIO; + } + state = page_address(page); + state->dma_addr = dma_addr; + } else { + state = page_address(page); + dma_addr = state->dma_addr; + } + + dma_addr += sizeof(struct efx_rx_page_state); + page_offset = sizeof(struct efx_rx_page_state); + + do { + index = rx_queue->added_count & rx_queue->ptr_mask; + rx_buf = efx_rx_buffer(rx_queue, index); + rx_buf->dma_addr = dma_addr + efx->rx_ip_align + + XDP_PACKET_HEADROOM; + rx_buf->page = page; + rx_buf->page_offset = page_offset + efx->rx_ip_align + + XDP_PACKET_HEADROOM; + rx_buf->len = efx->rx_dma_len; + rx_buf->flags = 0; + ++rx_queue->added_count; + get_page(page); + dma_addr += efx->rx_page_buf_step; + page_offset += efx->rx_page_buf_step; + } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE); + + rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE; + } while (++count < efx->rx_pages_per_batch); + + return 0; +} + +void efx_rx_config_page_split(struct efx_nic *efx) +{ + efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align + + XDP_PACKET_HEADROOM, + EFX_RX_BUF_ALIGNMENT); + efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : + ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / + efx->rx_page_buf_step); + efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / + efx->rx_bufs_per_page; + efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH, + efx->rx_bufs_per_page); +} + +/* efx_fast_push_rx_descriptors - push new RX descriptors quickly + * @rx_queue: RX descriptor queue + * + * This will aim to fill the RX descriptor queue up to + * @rx_queue->@max_fill. If there is insufficient atomic + * memory to do so, a slow fill will be scheduled. + * + * The caller must provide serialisation (none is used here). In practise, + * this means this function must run from the NAPI handler, or be called + * when NAPI is disabled. + */ +void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned int fill_level, batch_size; + int space, rc = 0; + + if (!rx_queue->refill_enabled) + return; + + /* Calculate current fill level, and exit if we don't need to fill */ + fill_level = (rx_queue->added_count - rx_queue->removed_count); + EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries); + if (fill_level >= rx_queue->fast_fill_trigger) + goto out; + + /* Record minimum fill level */ + if (unlikely(fill_level < rx_queue->min_fill)) { + if (fill_level) + rx_queue->min_fill = fill_level; + } + + batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page; + space = rx_queue->max_fill - fill_level; + EFX_WARN_ON_ONCE_PARANOID(space < batch_size); + + netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, + "RX queue %d fast-filling descriptor ring from" + " level %d to level %d\n", + efx_rx_queue_index(rx_queue), fill_level, + rx_queue->max_fill); + + do { + rc = efx_init_rx_buffers(rx_queue, atomic); + if (unlikely(rc)) { + /* Ensure that we don't leave the rx queue empty */ + efx_schedule_slow_fill(rx_queue); + goto out; + } + } while ((space -= batch_size) >= batch_size); + + netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, + "RX queue %d fast-filled descriptor ring " + "to level %d\n", efx_rx_queue_index(rx_queue), + rx_queue->added_count - rx_queue->removed_count); + + out: + if (rx_queue->notified_count != rx_queue->added_count) + efx_nic_notify_rx_desc(rx_queue); +} + +/* Pass a received packet up through GRO. GRO can handle pages + * regardless of checksum state and skbs with a good checksum. + */ +void +efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, + unsigned int n_frags, u8 *eh) +{ + struct napi_struct *napi = &channel->napi_str; + struct efx_nic *efx = channel->efx; + struct sk_buff *skb; + + skb = napi_get_frags(napi); + if (unlikely(!skb)) { + struct efx_rx_queue *rx_queue; + + rx_queue = efx_channel_get_rx_queue(channel); + efx_free_rx_buffers(rx_queue, rx_buf, n_frags); + return; + } + + if (efx->net_dev->features & NETIF_F_RXHASH) + skb_set_hash(skb, efx_rx_buf_hash(efx, eh), + PKT_HASH_TYPE_L3); + skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? + CHECKSUM_UNNECESSARY : CHECKSUM_NONE); + skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); + + for (;;) { + skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, + rx_buf->page, rx_buf->page_offset, + rx_buf->len); + rx_buf->page = NULL; + skb->len += rx_buf->len; + if (skb_shinfo(skb)->nr_frags == n_frags) + break; + + rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); + } + + skb->data_len = skb->len; + skb->truesize += n_frags * efx->rx_buffer_truesize; + + skb_record_rx_queue(skb, channel->rx_queue.core_index); + + napi_gro_frags(napi); +} + +/* RSS contexts. We're using linked lists and crappy O(n) algorithms, because + * (a) this is an infrequent control-plane operation and (b) n is small (max 64) + */ +struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx) +{ + struct list_head *head = &efx->rss_context.list; + struct efx_rss_context *ctx, *new; + u32 id = 1; /* Don't use zero, that refers to the master RSS context */ + + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + + /* Search for first gap in the numbering */ + list_for_each_entry(ctx, head, list) { + if (ctx->user_id != id) + break; + id++; + /* Check for wrap. If this happens, we have nearly 2^32 + * allocated RSS contexts, which seems unlikely. + */ + if (WARN_ON_ONCE(!id)) + return NULL; + } + + /* Create the new entry */ + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; + new->rx_hash_udp_4tuple = false; + + /* Insert the new entry into the gap */ + new->user_id = id; + list_add_tail(&new->list, &ctx->list); + return new; +} + +struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id) +{ + struct list_head *head = &efx->rss_context.list; + struct efx_rss_context *ctx; + + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + + list_for_each_entry(ctx, head, list) + if (ctx->user_id == id) + return ctx; + return NULL; +} + +void efx_free_rss_context_entry(struct efx_rss_context *ctx) +{ + list_del(&ctx->list); + kfree(ctx); +} + +void efx_set_default_rx_indir_table(struct efx_nic *efx, + struct efx_rss_context *ctx) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) + ctx->rx_indir_table[i] = + ethtool_rxfh_indir_default(i, efx->rss_spread); +} + +/** + * efx_filter_is_mc_recipient - test whether spec is a multicast recipient + * @spec: Specification to test + * + * Return: %true if the specification is a non-drop RX filter that + * matches a local MAC address I/G bit value of 1 or matches a local + * IPv4 or IPv6 address value in the respective multicast address + * range. Otherwise %false. + */ +bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec) +{ + if (!(spec->flags & EFX_FILTER_FLAG_RX) || + spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP) + return false; + + if (spec->match_flags & + (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) && + is_multicast_ether_addr(spec->loc_mac)) + return true; + + if ((spec->match_flags & + (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) == + (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) { + if (spec->ether_type == htons(ETH_P_IP) && + ipv4_is_multicast(spec->loc_host[0])) + return true; + if (spec->ether_type == htons(ETH_P_IPV6) && + ((const u8 *)spec->loc_host)[0] == 0xff) + return true; + } + + return false; +} + +bool efx_filter_spec_equal(const struct efx_filter_spec *left, + const struct efx_filter_spec *right) +{ + if ((left->match_flags ^ right->match_flags) | + ((left->flags ^ right->flags) & + (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) + return false; + + return memcmp(&left->outer_vid, &right->outer_vid, + sizeof(struct efx_filter_spec) - + offsetof(struct efx_filter_spec, outer_vid)) == 0; +} + +u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) +{ + BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); + return jhash2((const u32 *)&spec->outer_vid, + (sizeof(struct efx_filter_spec) - + offsetof(struct efx_filter_spec, outer_vid)) / 4, + 0); +} + +#ifdef CONFIG_RFS_ACCEL +bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, + bool *force) +{ + if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) { + /* ARFS is currently updating this entry, leave it */ + return false; + } + if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) { + /* ARFS tried and failed to update this, so it's probably out + * of date. Remove the filter and the ARFS rule entry. + */ + rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; + *force = true; + return true; + } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */ + /* ARFS has moved on, so old filter is not needed. Since we did + * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will + * not be removed by efx_rps_hash_del() subsequently. + */ + *force = true; + return true; + } + /* Remove it iff ARFS wants to. */ + return true; +} + +static +struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, + const struct efx_filter_spec *spec) +{ + u32 hash = efx_filter_spec_hash(spec); + + lockdep_assert_held(&efx->rps_hash_lock); + if (!efx->rps_hash_table) + return NULL; + return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; +} + +struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, + const struct efx_filter_spec *spec) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (!head) + return NULL; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) + return rule; + } + return NULL; +} + +struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, + const struct efx_filter_spec *spec, + bool *new) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (!head) + return NULL; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) { + *new = false; + return rule; + } + } + rule = kmalloc(sizeof(*rule), GFP_ATOMIC); + *new = true; + if (rule) { + memcpy(&rule->spec, spec, sizeof(rule->spec)); + hlist_add_head(&rule->node, head); + } + return rule; +} + +void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (WARN_ON(!head)) + return; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) { + /* Someone already reused the entry. We know that if + * this check doesn't fire (i.e. filter_id == REMOVING) + * then the REMOVING mark was put there by our caller, + * because caller is holding a lock on filter table and + * only holders of that lock set REMOVING. + */ + if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING) + return; + hlist_del(node); + kfree(rule); + return; + } + } + /* We didn't find it. */ + WARN_ON(1); +} +#endif + +int efx_probe_filters(struct efx_nic *efx) +{ + int rc; + + init_rwsem(&efx->filter_sem); + mutex_lock(&efx->mac_lock); + down_write(&efx->filter_sem); + rc = efx->type->filter_table_probe(efx); + if (rc) + goto out_unlock; + +#ifdef CONFIG_RFS_ACCEL + if (efx->type->offload_features & NETIF_F_NTUPLE) { + struct efx_channel *channel; + int i, success = 1; + + efx_for_each_channel(channel, efx) { + channel->rps_flow_id = + kcalloc(efx->type->max_rx_ip_filters, + sizeof(*channel->rps_flow_id), + GFP_KERNEL); + if (!channel->rps_flow_id) + success = 0; + else + for (i = 0; + i < efx->type->max_rx_ip_filters; + ++i) + channel->rps_flow_id[i] = + RPS_FLOW_ID_INVALID; + channel->rfs_expire_index = 0; + channel->rfs_filter_count = 0; + } + + if (!success) { + efx_for_each_channel(channel, efx) + kfree(channel->rps_flow_id); + efx->type->filter_table_remove(efx); + rc = -ENOMEM; + goto out_unlock; + } + } +#endif +out_unlock: + up_write(&efx->filter_sem); + mutex_unlock(&efx->mac_lock); + return rc; +} + +void efx_remove_filters(struct efx_nic *efx) +{ +#ifdef CONFIG_RFS_ACCEL + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) { + cancel_delayed_work_sync(&channel->filter_work); + kfree(channel->rps_flow_id); + } +#endif + down_write(&efx->filter_sem); + efx->type->filter_table_remove(efx); + up_write(&efx->filter_sem); +} diff --git a/drivers/net/ethernet/sfc/rx_common.h b/drivers/net/ethernet/sfc/rx_common.h new file mode 100644 index 000000000000..c41f12a89477 --- /dev/null +++ b/drivers/net/ethernet/sfc/rx_common.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_RX_COMMON_H +#define EFX_RX_COMMON_H + +/* Preferred number of descriptors to fill at once */ +#define EFX_RX_PREFERRED_BATCH 8U + +/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */ +#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \ + EFX_RX_USR_BUF_SIZE) + +static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf) +{ + return page_address(buf->page) + buf->page_offset; +} + +static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset)); +#else + const u8 *data = eh + efx->rx_packet_hash_offset; + + return (u32)data[0] | + (u32)data[1] << 8 | + (u32)data[2] << 16 | + (u32)data[3] << 24; +#endif +} + +void efx_rx_slow_fill(struct timer_list *t); + +void efx_recycle_rx_pages(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags); +void efx_discard_rx_packet(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags); + +int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); +void efx_init_rx_queue(struct efx_rx_queue *rx_queue); +void efx_fini_rx_queue(struct efx_rx_queue *rx_queue); +void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); +void efx_destroy_rx_queue(struct efx_rx_queue *rx_queue); + +void efx_init_rx_buffer(struct efx_rx_queue *rx_queue, + struct page *page, + unsigned int page_offset, + u16 flags); +void efx_unmap_rx_buffer(struct efx_nic *efx, struct efx_rx_buffer *rx_buf); +void efx_free_rx_buffers(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf, + unsigned int num_bufs); + +void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); +void efx_rx_config_page_split(struct efx_nic *efx); +void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic); + +void +efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, + unsigned int n_frags, u8 *eh); + +struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx); +struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id); +void efx_free_rss_context_entry(struct efx_rss_context *ctx); +void efx_set_default_rx_indir_table(struct efx_nic *efx, + struct efx_rss_context *ctx); + +bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec); +bool efx_filter_spec_equal(const struct efx_filter_spec *left, + const struct efx_filter_spec *right); +u32 efx_filter_spec_hash(const struct efx_filter_spec *spec); + +#ifdef CONFIG_RFS_ACCEL +bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, + bool *force); +struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, + const struct efx_filter_spec *spec); +struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, + const struct efx_filter_spec *spec, + bool *new); +void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec); +#endif + +int efx_probe_filters(struct efx_nic *efx); +void efx_remove_filters(struct efx_nic *efx); + +#endif diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c index 8474cf8ea7d3..1ae369022d7d 100644 --- a/drivers/net/ethernet/sfc/selftest.c +++ b/drivers/net/ethernet/sfc/selftest.c @@ -18,6 +18,8 @@ #include <linux/slab.h> #include "net_driver.h" #include "efx.h" +#include "efx_common.h" +#include "efx_channels.h" #include "nic.h" #include "selftest.h" #include "workarounds.h" @@ -783,7 +785,7 @@ void efx_selftest_async_cancel(struct efx_nic *efx) cancel_delayed_work_sync(&efx->selftest_work); } -void efx_selftest_async_work(struct work_struct *data) +static void efx_selftest_async_work(struct work_struct *data) { struct efx_nic *efx = container_of(data, struct efx_nic, selftest_work.work); @@ -802,3 +804,8 @@ void efx_selftest_async_work(struct work_struct *data) channel->channel, cpu); } } + +void efx_selftest_async_init(struct efx_nic *efx) +{ + INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work); +} diff --git a/drivers/net/ethernet/sfc/selftest.h b/drivers/net/ethernet/sfc/selftest.h index a3553816d92c..ca88ebb4f6b1 100644 --- a/drivers/net/ethernet/sfc/selftest.h +++ b/drivers/net/ethernet/sfc/selftest.h @@ -45,8 +45,8 @@ void efx_loopback_rx_packet(struct efx_nic *efx, const char *buf_ptr, int pkt_len); int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests, unsigned flags); +void efx_selftest_async_init(struct efx_nic *efx); void efx_selftest_async_start(struct efx_nic *efx); void efx_selftest_async_cancel(struct efx_nic *efx); -void efx_selftest_async_work(struct work_struct *data); #endif /* EFX_SELFTEST_H */ diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 81499244a4b4..baa464161626 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -14,12 +14,14 @@ #include "net_driver.h" #include "bitfield.h" #include "efx.h" +#include "efx_common.h" #include "nic.h" #include "farch_regs.h" #include "io.h" #include "workarounds.h" #include "mcdi.h" #include "mcdi_pcol.h" +#include "mcdi_port_common.h" #include "selftest.h" #include "siena_sriov.h" diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c index dfbdf05dcf79..83dcfcae3d4b 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.c +++ b/drivers/net/ethernet/sfc/siena_sriov.c @@ -7,6 +7,7 @@ #include <linux/module.h> #include "net_driver.h" #include "efx.h" +#include "efx_channels.h" #include "nic.h" #include "io.h" #include "mcdi.h" diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 00c1c4402451..04d7f41d7ed9 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -20,6 +20,7 @@ #include "io.h" #include "nic.h" #include "tx.h" +#include "tx_common.h" #include "workarounds.h" #include "ef10_regs.h" @@ -56,72 +57,6 @@ u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue, return efx_tx_get_copy_buffer(tx_queue, buffer); } -static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, - struct efx_tx_buffer *buffer, - unsigned int *pkts_compl, - unsigned int *bytes_compl) -{ - if (buffer->unmap_len) { - struct device *dma_dev = &tx_queue->efx->pci_dev->dev; - dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset; - if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) - dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, - DMA_TO_DEVICE); - else - dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, - DMA_TO_DEVICE); - buffer->unmap_len = 0; - } - - if (buffer->flags & EFX_TX_BUF_SKB) { - struct sk_buff *skb = (struct sk_buff *)buffer->skb; - - EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl); - (*pkts_compl)++; - (*bytes_compl) += skb->len; - if (tx_queue->timestamping && - (tx_queue->completed_timestamp_major || - tx_queue->completed_timestamp_minor)) { - struct skb_shared_hwtstamps hwtstamp; - - hwtstamp.hwtstamp = - efx_ptp_nic_to_kernel_time(tx_queue); - skb_tstamp_tx(skb, &hwtstamp); - - tx_queue->completed_timestamp_major = 0; - tx_queue->completed_timestamp_minor = 0; - } - dev_consume_skb_any((struct sk_buff *)buffer->skb); - netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, - "TX queue %d transmission id %x complete\n", - tx_queue->queue, tx_queue->read_count); - } else if (buffer->flags & EFX_TX_BUF_XDP) { - xdp_return_frame_rx_napi(buffer->xdpf); - } - - buffer->len = 0; - buffer->flags = 0; -} - -unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) -{ - /* Header and payload descriptor for each output segment, plus - * one for every input fragment boundary within a segment - */ - unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS; - - /* Possibly one more per segment for option descriptors */ - if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) - max_descs += EFX_TSO_MAX_SEGS; - - /* Possibly more for PCIe page boundaries within input fragments */ - if (PAGE_SIZE > EFX_PAGE_SIZE) - max_descs += max_t(unsigned int, MAX_SKB_FRAGS, - DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE)); - - return max_descs; -} - static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) { /* We need to consider both queues that the net core sees as one */ @@ -333,125 +268,6 @@ static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, } #endif /* EFX_USE_PIO */ -static struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue, - dma_addr_t dma_addr, - size_t len) -{ - const struct efx_nic_type *nic_type = tx_queue->efx->type; - struct efx_tx_buffer *buffer; - unsigned int dma_len; - - /* Map the fragment taking account of NIC-dependent DMA limits. */ - do { - buffer = efx_tx_queue_get_insert_buffer(tx_queue); - dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len); - - buffer->len = dma_len; - buffer->dma_addr = dma_addr; - buffer->flags = EFX_TX_BUF_CONT; - len -= dma_len; - dma_addr += dma_len; - ++tx_queue->insert_count; - } while (len); - - return buffer; -} - -/* Map all data from an SKB for DMA and create descriptors on the queue. - */ -static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, - unsigned int segment_count) -{ - struct efx_nic *efx = tx_queue->efx; - struct device *dma_dev = &efx->pci_dev->dev; - unsigned int frag_index, nr_frags; - dma_addr_t dma_addr, unmap_addr; - unsigned short dma_flags; - size_t len, unmap_len; - - nr_frags = skb_shinfo(skb)->nr_frags; - frag_index = 0; - - /* Map header data. */ - len = skb_headlen(skb); - dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE); - dma_flags = EFX_TX_BUF_MAP_SINGLE; - unmap_len = len; - unmap_addr = dma_addr; - - if (unlikely(dma_mapping_error(dma_dev, dma_addr))) - return -EIO; - - if (segment_count) { - /* For TSO we need to put the header in to a separate - * descriptor. Map this separately if necessary. - */ - size_t header_len = skb_transport_header(skb) - skb->data + - (tcp_hdr(skb)->doff << 2u); - - if (header_len != len) { - tx_queue->tso_long_headers++; - efx_tx_map_chunk(tx_queue, dma_addr, header_len); - len -= header_len; - dma_addr += header_len; - } - } - - /* Add descriptors for each fragment. */ - do { - struct efx_tx_buffer *buffer; - skb_frag_t *fragment; - - buffer = efx_tx_map_chunk(tx_queue, dma_addr, len); - - /* The final descriptor for a fragment is responsible for - * unmapping the whole fragment. - */ - buffer->flags = EFX_TX_BUF_CONT | dma_flags; - buffer->unmap_len = unmap_len; - buffer->dma_offset = buffer->dma_addr - unmap_addr; - - if (frag_index >= nr_frags) { - /* Store SKB details with the final buffer for - * the completion. - */ - buffer->skb = skb; - buffer->flags = EFX_TX_BUF_SKB | dma_flags; - return 0; - } - - /* Move on to the next fragment. */ - fragment = &skb_shinfo(skb)->frags[frag_index++]; - len = skb_frag_size(fragment); - dma_addr = skb_frag_dma_map(dma_dev, fragment, - 0, len, DMA_TO_DEVICE); - dma_flags = 0; - unmap_len = len; - unmap_addr = dma_addr; - - if (unlikely(dma_mapping_error(dma_dev, dma_addr))) - return -EIO; - } while (1); -} - -/* Remove buffers put into a tx_queue for the current packet. - * None of the buffers must have an skb attached. - */ -static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, - unsigned int insert_count) -{ - struct efx_tx_buffer *buffer; - unsigned int bytes_compl = 0; - unsigned int pkts_compl = 0; - - /* Work backwards until we hit the original insert pointer value */ - while (tx_queue->insert_count != insert_count) { - --tx_queue->insert_count; - buffer = __efx_tx_queue_get_insert_buffer(tx_queue); - efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); - } -} - /* * Fallback to software TSO. * @@ -473,12 +289,9 @@ static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, dev_consume_skb_any(skb); skb = segments; - while (skb) { - next = skb->next; - skb->next = NULL; - + skb_list_walk_safe(skb, skb, next) { + skb_mark_not_on_list(skb); efx_enqueue_skb(tx_queue, skb); - skb = next; } return 0; @@ -687,41 +500,6 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, return i; } -/* Remove packets from the TX queue - * - * This removes packets from the TX queue, up to and including the - * specified index. - */ -static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, - unsigned int index, - unsigned int *pkts_compl, - unsigned int *bytes_compl) -{ - struct efx_nic *efx = tx_queue->efx; - unsigned int stop_index, read_ptr; - - stop_index = (index + 1) & tx_queue->ptr_mask; - read_ptr = tx_queue->read_count & tx_queue->ptr_mask; - - while (read_ptr != stop_index) { - struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; - - if (!(buffer->flags & EFX_TX_BUF_OPTION) && - unlikely(buffer->len == 0)) { - netif_err(efx, tx_err, efx->net_dev, - "TX queue %d spurious TX completion id %x\n", - tx_queue->queue, read_ptr); - efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); - return; - } - - efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); - - ++tx_queue->read_count; - read_ptr = tx_queue->read_count & tx_queue->ptr_mask; - } -} - /* Initiate a packet transmission. We use one channel per CPU * (sharing when we have more CPUs than channels). On Falcon, the TX * completion events will be directed back to the CPU that transmitted @@ -834,173 +612,3 @@ int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type, net_dev->num_tc = num_tc; return 0; } - -void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) -{ - unsigned fill_level; - struct efx_nic *efx = tx_queue->efx; - struct efx_tx_queue *txq2; - unsigned int pkts_compl = 0, bytes_compl = 0; - - EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask); - - efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); - tx_queue->pkts_compl += pkts_compl; - tx_queue->bytes_compl += bytes_compl; - - if (pkts_compl > 1) - ++tx_queue->merge_events; - - /* See if we need to restart the netif queue. This memory - * barrier ensures that we write read_count (inside - * efx_dequeue_buffers()) before reading the queue status. - */ - smp_mb(); - if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && - likely(efx->port_enabled) && - likely(netif_device_present(efx->net_dev))) { - txq2 = efx_tx_queue_partner(tx_queue); - fill_level = max(tx_queue->insert_count - tx_queue->read_count, - txq2->insert_count - txq2->read_count); - if (fill_level <= efx->txq_wake_thresh) - netif_tx_wake_queue(tx_queue->core_txq); - } - - /* Check whether the hardware queue is now empty */ - if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { - tx_queue->old_write_count = READ_ONCE(tx_queue->write_count); - if (tx_queue->read_count == tx_queue->old_write_count) { - smp_mb(); - tx_queue->empty_read_count = - tx_queue->read_count | EFX_EMPTY_COUNT_VALID; - } - } -} - -static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) -{ - return DIV_ROUND_UP(tx_queue->ptr_mask + 1, PAGE_SIZE >> EFX_TX_CB_ORDER); -} - -int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) -{ - struct efx_nic *efx = tx_queue->efx; - unsigned int entries; - int rc; - - /* Create the smallest power-of-two aligned ring */ - entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE); - EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); - tx_queue->ptr_mask = entries - 1; - - netif_dbg(efx, probe, efx->net_dev, - "creating TX queue %d size %#x mask %#x\n", - tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask); - - /* Allocate software ring */ - tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer), - GFP_KERNEL); - if (!tx_queue->buffer) - return -ENOMEM; - - tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue), - sizeof(tx_queue->cb_page[0]), GFP_KERNEL); - if (!tx_queue->cb_page) { - rc = -ENOMEM; - goto fail1; - } - - /* Allocate hardware ring */ - rc = efx_nic_probe_tx(tx_queue); - if (rc) - goto fail2; - - return 0; - -fail2: - kfree(tx_queue->cb_page); - tx_queue->cb_page = NULL; -fail1: - kfree(tx_queue->buffer); - tx_queue->buffer = NULL; - return rc; -} - -void efx_init_tx_queue(struct efx_tx_queue *tx_queue) -{ - struct efx_nic *efx = tx_queue->efx; - - netif_dbg(efx, drv, efx->net_dev, - "initialising TX queue %d\n", tx_queue->queue); - - tx_queue->insert_count = 0; - tx_queue->write_count = 0; - tx_queue->packet_write_count = 0; - tx_queue->old_write_count = 0; - tx_queue->read_count = 0; - tx_queue->old_read_count = 0; - tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; - tx_queue->xmit_more_available = false; - tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) && - tx_queue->channel == efx_ptp_channel(efx)); - tx_queue->completed_desc_ptr = tx_queue->ptr_mask; - tx_queue->completed_timestamp_major = 0; - tx_queue->completed_timestamp_minor = 0; - - tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel); - - /* Set up default function pointers. These may get replaced by - * efx_nic_init_tx() based off NIC/queue capabilities. - */ - tx_queue->handle_tso = efx_enqueue_skb_tso; - - /* Set up TX descriptor ring */ - efx_nic_init_tx(tx_queue); - - tx_queue->initialised = true; -} - -void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) -{ - struct efx_tx_buffer *buffer; - - netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, - "shutting down TX queue %d\n", tx_queue->queue); - - if (!tx_queue->buffer) - return; - - /* Free any buffers left in the ring */ - while (tx_queue->read_count != tx_queue->write_count) { - unsigned int pkts_compl = 0, bytes_compl = 0; - buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; - efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); - - ++tx_queue->read_count; - } - tx_queue->xmit_more_available = false; - netdev_tx_reset_queue(tx_queue->core_txq); -} - -void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) -{ - int i; - - if (!tx_queue->buffer) - return; - - netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, - "destroying TX queue %d\n", tx_queue->queue); - efx_nic_remove_tx(tx_queue); - - if (tx_queue->cb_page) { - for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++) - efx_nic_free_buffer(tx_queue->efx, - &tx_queue->cb_page[i]); - kfree(tx_queue->cb_page); - tx_queue->cb_page = NULL; - } - - kfree(tx_queue->buffer); - tx_queue->buffer = NULL; -} diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c new file mode 100644 index 000000000000..b1571e9789d0 --- /dev/null +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -0,0 +1,404 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include "efx.h" +#include "nic.h" +#include "tx_common.h" + +static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) +{ + return DIV_ROUND_UP(tx_queue->ptr_mask + 1, + PAGE_SIZE >> EFX_TX_CB_ORDER); +} + +int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + unsigned int entries; + int rc; + + /* Create the smallest power-of-two aligned ring */ + entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE); + EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); + tx_queue->ptr_mask = entries - 1; + + netif_dbg(efx, probe, efx->net_dev, + "creating TX queue %d size %#x mask %#x\n", + tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask); + + /* Allocate software ring */ + tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer), + GFP_KERNEL); + if (!tx_queue->buffer) + return -ENOMEM; + + tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue), + sizeof(tx_queue->cb_page[0]), GFP_KERNEL); + if (!tx_queue->cb_page) { + rc = -ENOMEM; + goto fail1; + } + + /* Allocate hardware ring */ + rc = efx_nic_probe_tx(tx_queue); + if (rc) + goto fail2; + + return 0; + +fail2: + kfree(tx_queue->cb_page); + tx_queue->cb_page = NULL; +fail1: + kfree(tx_queue->buffer); + tx_queue->buffer = NULL; + return rc; +} + +void efx_init_tx_queue(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + + netif_dbg(efx, drv, efx->net_dev, + "initialising TX queue %d\n", tx_queue->queue); + + tx_queue->insert_count = 0; + tx_queue->write_count = 0; + tx_queue->packet_write_count = 0; + tx_queue->old_write_count = 0; + tx_queue->read_count = 0; + tx_queue->old_read_count = 0; + tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; + tx_queue->xmit_more_available = false; + tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) && + tx_queue->channel == efx_ptp_channel(efx)); + tx_queue->completed_desc_ptr = tx_queue->ptr_mask; + tx_queue->completed_timestamp_major = 0; + tx_queue->completed_timestamp_minor = 0; + + tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel); + + /* Set up default function pointers. These may get replaced by + * efx_nic_init_tx() based off NIC/queue capabilities. + */ + tx_queue->handle_tso = efx_enqueue_skb_tso; + + /* Set up TX descriptor ring */ + efx_nic_init_tx(tx_queue); + + tx_queue->initialised = true; +} + +void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) +{ + struct efx_tx_buffer *buffer; + + netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, + "shutting down TX queue %d\n", tx_queue->queue); + + if (!tx_queue->buffer) + return; + + /* Free any buffers left in the ring */ + while (tx_queue->read_count != tx_queue->write_count) { + unsigned int pkts_compl = 0, bytes_compl = 0; + + buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; + efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); + + ++tx_queue->read_count; + } + tx_queue->xmit_more_available = false; + netdev_tx_reset_queue(tx_queue->core_txq); +} + +void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) +{ + int i; + + if (!tx_queue->buffer) + return; + + netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, + "destroying TX queue %d\n", tx_queue->queue); + efx_nic_remove_tx(tx_queue); + + if (tx_queue->cb_page) { + for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++) + efx_nic_free_buffer(tx_queue->efx, + &tx_queue->cb_page[i]); + kfree(tx_queue->cb_page); + tx_queue->cb_page = NULL; + } + + kfree(tx_queue->buffer); + tx_queue->buffer = NULL; +} + +void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, + unsigned int *pkts_compl, + unsigned int *bytes_compl) +{ + if (buffer->unmap_len) { + struct device *dma_dev = &tx_queue->efx->pci_dev->dev; + dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset; + + if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) + dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, + DMA_TO_DEVICE); + else + dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, + DMA_TO_DEVICE); + buffer->unmap_len = 0; + } + + if (buffer->flags & EFX_TX_BUF_SKB) { + struct sk_buff *skb = (struct sk_buff *)buffer->skb; + + EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl); + (*pkts_compl)++; + (*bytes_compl) += skb->len; + if (tx_queue->timestamping && + (tx_queue->completed_timestamp_major || + tx_queue->completed_timestamp_minor)) { + struct skb_shared_hwtstamps hwtstamp; + + hwtstamp.hwtstamp = + efx_ptp_nic_to_kernel_time(tx_queue); + skb_tstamp_tx(skb, &hwtstamp); + + tx_queue->completed_timestamp_major = 0; + tx_queue->completed_timestamp_minor = 0; + } + dev_consume_skb_any((struct sk_buff *)buffer->skb); + netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, + "TX queue %d transmission id %x complete\n", + tx_queue->queue, tx_queue->read_count); + } else if (buffer->flags & EFX_TX_BUF_XDP) { + xdp_return_frame_rx_napi(buffer->xdpf); + } + + buffer->len = 0; + buffer->flags = 0; +} + +/* Remove packets from the TX queue + * + * This removes packets from the TX queue, up to and including the + * specified index. + */ +static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, + unsigned int index, + unsigned int *pkts_compl, + unsigned int *bytes_compl) +{ + struct efx_nic *efx = tx_queue->efx; + unsigned int stop_index, read_ptr; + + stop_index = (index + 1) & tx_queue->ptr_mask; + read_ptr = tx_queue->read_count & tx_queue->ptr_mask; + + while (read_ptr != stop_index) { + struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; + + if (!(buffer->flags & EFX_TX_BUF_OPTION) && + unlikely(buffer->len == 0)) { + netif_err(efx, tx_err, efx->net_dev, + "TX queue %d spurious TX completion id %x\n", + tx_queue->queue, read_ptr); + efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); + return; + } + + efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); + + ++tx_queue->read_count; + read_ptr = tx_queue->read_count & tx_queue->ptr_mask; + } +} + +void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) +{ + unsigned int fill_level, pkts_compl = 0, bytes_compl = 0; + struct efx_nic *efx = tx_queue->efx; + struct efx_tx_queue *txq2; + + EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask); + + efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); + tx_queue->pkts_compl += pkts_compl; + tx_queue->bytes_compl += bytes_compl; + + if (pkts_compl > 1) + ++tx_queue->merge_events; + + /* See if we need to restart the netif queue. This memory + * barrier ensures that we write read_count (inside + * efx_dequeue_buffers()) before reading the queue status. + */ + smp_mb(); + if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && + likely(efx->port_enabled) && + likely(netif_device_present(efx->net_dev))) { + txq2 = efx_tx_queue_partner(tx_queue); + fill_level = max(tx_queue->insert_count - tx_queue->read_count, + txq2->insert_count - txq2->read_count); + if (fill_level <= efx->txq_wake_thresh) + netif_tx_wake_queue(tx_queue->core_txq); + } + + /* Check whether the hardware queue is now empty */ + if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { + tx_queue->old_write_count = READ_ONCE(tx_queue->write_count); + if (tx_queue->read_count == tx_queue->old_write_count) { + smp_mb(); + tx_queue->empty_read_count = + tx_queue->read_count | EFX_EMPTY_COUNT_VALID; + } + } +} + +/* Remove buffers put into a tx_queue for the current packet. + * None of the buffers must have an skb attached. + */ +void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, + unsigned int insert_count) +{ + struct efx_tx_buffer *buffer; + unsigned int bytes_compl = 0; + unsigned int pkts_compl = 0; + + /* Work backwards until we hit the original insert pointer value */ + while (tx_queue->insert_count != insert_count) { + --tx_queue->insert_count; + buffer = __efx_tx_queue_get_insert_buffer(tx_queue); + efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); + } +} + +struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, size_t len) +{ + const struct efx_nic_type *nic_type = tx_queue->efx->type; + struct efx_tx_buffer *buffer; + unsigned int dma_len; + + /* Map the fragment taking account of NIC-dependent DMA limits. */ + do { + buffer = efx_tx_queue_get_insert_buffer(tx_queue); + dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len); + + buffer->len = dma_len; + buffer->dma_addr = dma_addr; + buffer->flags = EFX_TX_BUF_CONT; + len -= dma_len; + dma_addr += dma_len; + ++tx_queue->insert_count; + } while (len); + + return buffer; +} + +/* Map all data from an SKB for DMA and create descriptors on the queue. */ +int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + unsigned int segment_count) +{ + struct efx_nic *efx = tx_queue->efx; + struct device *dma_dev = &efx->pci_dev->dev; + unsigned int frag_index, nr_frags; + dma_addr_t dma_addr, unmap_addr; + unsigned short dma_flags; + size_t len, unmap_len; + + nr_frags = skb_shinfo(skb)->nr_frags; + frag_index = 0; + + /* Map header data. */ + len = skb_headlen(skb); + dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE); + dma_flags = EFX_TX_BUF_MAP_SINGLE; + unmap_len = len; + unmap_addr = dma_addr; + + if (unlikely(dma_mapping_error(dma_dev, dma_addr))) + return -EIO; + + if (segment_count) { + /* For TSO we need to put the header in to a separate + * descriptor. Map this separately if necessary. + */ + size_t header_len = skb_transport_header(skb) - skb->data + + (tcp_hdr(skb)->doff << 2u); + + if (header_len != len) { + tx_queue->tso_long_headers++; + efx_tx_map_chunk(tx_queue, dma_addr, header_len); + len -= header_len; + dma_addr += header_len; + } + } + + /* Add descriptors for each fragment. */ + do { + struct efx_tx_buffer *buffer; + skb_frag_t *fragment; + + buffer = efx_tx_map_chunk(tx_queue, dma_addr, len); + + /* The final descriptor for a fragment is responsible for + * unmapping the whole fragment. + */ + buffer->flags = EFX_TX_BUF_CONT | dma_flags; + buffer->unmap_len = unmap_len; + buffer->dma_offset = buffer->dma_addr - unmap_addr; + + if (frag_index >= nr_frags) { + /* Store SKB details with the final buffer for + * the completion. + */ + buffer->skb = skb; + buffer->flags = EFX_TX_BUF_SKB | dma_flags; + return 0; + } + + /* Move on to the next fragment. */ + fragment = &skb_shinfo(skb)->frags[frag_index++]; + len = skb_frag_size(fragment); + dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len, + DMA_TO_DEVICE); + dma_flags = 0; + unmap_len = len; + unmap_addr = dma_addr; + + if (unlikely(dma_mapping_error(dma_dev, dma_addr))) + return -EIO; + } while (1); +} + +unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) +{ + /* Header and payload descriptor for each output segment, plus + * one for every input fragment boundary within a segment + */ + unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS; + + /* Possibly one more per segment for option descriptors */ + if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) + max_descs += EFX_TSO_MAX_SEGS; + + /* Possibly more for PCIe page boundaries within input fragments */ + if (PAGE_SIZE > EFX_PAGE_SIZE) + max_descs += max_t(unsigned int, MAX_SKB_FRAGS, + DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE)); + + return max_descs; +} diff --git a/drivers/net/ethernet/sfc/tx_common.h b/drivers/net/ethernet/sfc/tx_common.h new file mode 100644 index 000000000000..f92f1fe3a87f --- /dev/null +++ b/drivers/net/ethernet/sfc/tx_common.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_TX_COMMON_H +#define EFX_TX_COMMON_H + +int efx_probe_tx_queue(struct efx_tx_queue *tx_queue); +void efx_init_tx_queue(struct efx_tx_queue *tx_queue); +void efx_fini_tx_queue(struct efx_tx_queue *tx_queue); +void efx_remove_tx_queue(struct efx_tx_queue *tx_queue); + +void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, + unsigned int *pkts_compl, + unsigned int *bytes_compl); + +void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); + +void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, + unsigned int insert_count); + +struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, size_t len); +int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + unsigned int segment_count); + +unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); + +#endif diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index d242906ae233..06637b03deed 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -114,7 +114,7 @@ struct ioc3_private { static int ioc3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static void ioc3_set_multicast_list(struct net_device *dev); static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev); -static void ioc3_timeout(struct net_device *dev); +static void ioc3_timeout(struct net_device *dev, unsigned int txqueue); static inline unsigned int ioc3_hash(const unsigned char *addr); static void ioc3_start(struct ioc3_private *ip); static inline void ioc3_stop(struct ioc3_private *ip); @@ -1479,7 +1479,7 @@ drop_packet: return NETDEV_TX_OK; } -static void ioc3_timeout(struct net_device *dev) +static void ioc3_timeout(struct net_device *dev, unsigned int txqueue) { struct ioc3_private *ip = netdev_priv(dev); diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c index 539bc5db989c..0c396ecd3389 100644 --- a/drivers/net/ethernet/sgi/meth.c +++ b/drivers/net/ethernet/sgi/meth.c @@ -90,7 +90,7 @@ struct meth_private { spinlock_t meth_lock; }; -static void meth_tx_timeout(struct net_device *dev); +static void meth_tx_timeout(struct net_device *dev, unsigned int txqueue); static irqreturn_t meth_interrupt(int irq, void *dev_id); /* global, initialized in ip32-setup.c */ @@ -727,7 +727,7 @@ static netdev_tx_t meth_tx(struct sk_buff *skb, struct net_device *dev) /* * Deal with a transmit timeout. */ -static void meth_tx_timeout(struct net_device *dev) +static void meth_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct meth_private *priv = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c index c7641a236eb8..cb043eb1bdc1 100644 --- a/drivers/net/ethernet/silan/sc92031.c +++ b/drivers/net/ethernet/silan/sc92031.c @@ -1078,7 +1078,7 @@ static void sc92031_set_multicast_list(struct net_device *dev) spin_unlock_bh(&priv->lock); } -static void sc92031_tx_timeout(struct net_device *dev) +static void sc92031_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sc92031_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index 5b351beb78cb..5a4b6e3ab38f 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1538,7 +1538,7 @@ err_out_0: goto out; } -static void sis190_tx_timeout(struct net_device *dev) +static void sis190_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sis190_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->mmio_addr; diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 85eaccbbbac1..81ed7589e33c 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -222,7 +222,7 @@ static int mdio_read(struct net_device *net_dev, int phy_id, int location); static void mdio_write(struct net_device *net_dev, int phy_id, int location, int val); static void sis900_timer(struct timer_list *t); static void sis900_check_mode (struct net_device *net_dev, struct mii_phy *mii_phy); -static void sis900_tx_timeout(struct net_device *net_dev); +static void sis900_tx_timeout(struct net_device *net_dev, unsigned int txqueue); static void sis900_init_tx_ring(struct net_device *net_dev); static void sis900_init_rx_ring(struct net_device *net_dev); static netdev_tx_t sis900_start_xmit(struct sk_buff *skb, @@ -1537,7 +1537,7 @@ static void sis900_read_mode(struct net_device *net_dev, int *speed, int *duplex * disable interrupts and do some tasks */ -static void sis900_tx_timeout(struct net_device *net_dev) +static void sis900_tx_timeout(struct net_device *net_dev, unsigned int txqueue) { struct sis900_private *sis_priv = netdev_priv(net_dev); void __iomem *ioaddr = sis_priv->ioaddr; diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index be47d864f8b9..61ddee0c2a2e 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -280,6 +280,7 @@ struct epic_private { signed char phys[4]; /* MII device addresses. */ u16 advertising; /* NWay media advertisement */ int mii_phy_cnt; + u32 ethtool_ops_nesting; struct mii_if_info mii; unsigned int tx_full:1; /* The Tx queue is full. */ unsigned int default_port:4; /* Last dev->if_port value. */ @@ -291,7 +292,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int loc, int val); static void epic_restart(struct net_device *dev); static void epic_timer(struct timer_list *t); -static void epic_tx_timeout(struct net_device *dev); +static void epic_tx_timeout(struct net_device *dev, unsigned int txqueue); static void epic_init_ring(struct net_device *dev); static netdev_tx_t epic_start_xmit(struct sk_buff *skb, struct net_device *dev); @@ -861,7 +862,7 @@ static void epic_timer(struct timer_list *t) add_timer(&ep->timer); } -static void epic_tx_timeout(struct net_device *dev) +static void epic_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct epic_private *ep = netdev_priv(dev); void __iomem *ioaddr = ep->ioaddr; @@ -1435,8 +1436,10 @@ static int ethtool_begin(struct net_device *dev) struct epic_private *ep = netdev_priv(dev); void __iomem *ioaddr = ep->ioaddr; + if (ep->ethtool_ops_nesting == U32_MAX) + return -EBUSY; /* power-up, if interface is down */ - if (!netif_running(dev)) { + if (!ep->ethtool_ops_nesting++ && !netif_running(dev)) { ew32(GENCTL, 0x0200); ew32(NVCTL, (er32(NVCTL) & ~0x003c) | 0x4800); } @@ -1449,7 +1452,7 @@ static void ethtool_complete(struct net_device *dev) void __iomem *ioaddr = ep->ioaddr; /* power-down, if interface is down */ - if (!netif_running(dev)) { + if (!--ep->ethtool_ops_nesting && !netif_running(dev)) { ew32(GENCTL, 0x0008); ew32(NVCTL, (er32(NVCTL) & ~0x483c) | 0x0000); } diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 8d88e4083456..186c0bddbe5f 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -936,7 +936,7 @@ static void smc911x_phy_configure(struct work_struct *work) if (lp->ctl_rspeed != 100) my_ad_caps &= ~(ADVERTISE_100BASE4|ADVERTISE_100FULL|ADVERTISE_100HALF); - if (!lp->ctl_rfduplx) + if (!lp->ctl_rfduplx) my_ad_caps &= ~(ADVERTISE_100FULL|ADVERTISE_10FULL); /* Update our Auto-Neg Advertisement Register */ @@ -1245,7 +1245,7 @@ static void smc911x_poll_controller(struct net_device *dev) #endif /* Our watchdog timed out. Called by the networking layer */ -static void smc911x_timeout(struct net_device *dev) +static void smc911x_timeout(struct net_device *dev, unsigned int txqueue) { struct smc911x_local *lp = netdev_priv(dev); int status, mask; diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c index d3bb2ba51f40..4b2330deed47 100644 --- a/drivers/net/ethernet/smsc/smc9194.c +++ b/drivers/net/ethernet/smsc/smc9194.c @@ -216,7 +216,7 @@ static int smc_open(struct net_device *dev); /* . Our watchdog timed out. Called by the networking layer */ -static void smc_timeout(struct net_device *dev); +static void smc_timeout(struct net_device *dev, unsigned int txqueue); /* . This is called by the kernel in response to 'ifconfig ethX down'. It @@ -1094,7 +1094,7 @@ static int smc_open(struct net_device *dev) .-------------------------------------------------------- */ -static void smc_timeout(struct net_device *dev) +static void smc_timeout(struct net_device *dev, unsigned int txqueue) { /* If we get here, some higher level has decided we are broken. There should really be a "kick me" function call instead. */ diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index a55f430f6a7b..f2a50eb3c1e0 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -271,7 +271,7 @@ static void smc91c92_release(struct pcmcia_device *link); static int smc_open(struct net_device *dev); static int smc_close(struct net_device *dev); static int smc_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -static void smc_tx_timeout(struct net_device *dev); +static void smc_tx_timeout(struct net_device *dev, unsigned int txqueue); static netdev_tx_t smc_start_xmit(struct sk_buff *skb, struct net_device *dev); static irqreturn_t smc_interrupt(int irq, void *dev_id); @@ -1178,7 +1178,7 @@ static void smc_hardware_send_packet(struct net_device * dev) /*====================================================================*/ -static void smc_tx_timeout(struct net_device *dev) +static void smc_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct smc_private *smc = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 3a6761131f4c..90410f9d3b1a 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -1321,7 +1321,7 @@ static void smc_poll_controller(struct net_device *dev) #endif /* Our watchdog timed out. Called by the networking layer */ -static void smc_timeout(struct net_device *dev) +static void smc_timeout(struct net_device *dev, unsigned int txqueue) { struct smc_local *lp = netdev_priv(dev); void __iomem *ioaddr = lp->base; diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 869a498e3b5e..6870a6ce76a6 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -243,6 +243,7 @@ NET_IP_ALIGN) #define NETSEC_RX_BUF_NON_DATA (NETSEC_RXBUF_HEADROOM + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) +#define NETSEC_RX_BUF_SIZE (PAGE_SIZE - NETSEC_RX_BUF_NON_DATA) #define DESC_SZ sizeof(struct netsec_de) @@ -719,7 +720,6 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv, { struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX]; - enum dma_data_direction dma_dir; struct page *page; page = page_pool_dev_alloc_pages(dring->page_pool); @@ -734,9 +734,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv, /* Make sure the incoming payload fits in the page for XDP and non-XDP * cases and reserve enough space for headroom + skb_shared_info */ - *desc_len = PAGE_SIZE - NETSEC_RX_BUF_NON_DATA; - dma_dir = page_pool_get_dma_dir(dring->page_pool); - dma_sync_single_for_device(priv->dev, *dma_handle, *desc_len, dma_dir); + *desc_len = NETSEC_RX_BUF_SIZE; return page_address(page); } @@ -883,6 +881,8 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp) static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog, struct xdp_buff *xdp) { + struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX]; + unsigned int len = xdp->data_end - xdp->data; u32 ret = NETSEC_XDP_PASS; int err; u32 act; @@ -896,7 +896,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog, case XDP_TX: ret = netsec_xdp_xmit_back(priv, xdp); if (ret != NETSEC_XDP_TX) - xdp_return_buff(xdp); + __page_pool_put_page(dring->page_pool, + virt_to_head_page(xdp->data), + len, true); break; case XDP_REDIRECT: err = xdp_do_redirect(priv->ndev, xdp, prog); @@ -904,7 +906,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog, ret = NETSEC_XDP_REDIR; } else { ret = NETSEC_XDP_CONSUMED; - xdp_return_buff(xdp); + __page_pool_put_page(dring->page_pool, + virt_to_head_page(xdp->data), + len, true); } break; default: @@ -915,7 +919,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog, /* fall through -- handle aborts by dropping packet */ case XDP_DROP: ret = NETSEC_XDP_CONSUMED; - xdp_return_buff(xdp); + __page_pool_put_page(dring->page_pool, + virt_to_head_page(xdp->data), + len, true); break; } @@ -1014,7 +1020,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) * cache state. Since we paid the allocation cost if * building an skb fails try to put the page into cache */ - page_pool_recycle_direct(dring->page_pool, page); + __page_pool_put_page(dring->page_pool, page, + pkt_len, true); netif_err(priv, drv, priv->ndev, "rx failed to build skb\n"); break; @@ -1272,17 +1279,19 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv) { struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX]; struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog); - struct page_pool_params pp_params = { 0 }; + struct page_pool_params pp_params = { + .order = 0, + /* internal DMA mapping in page_pool */ + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .pool_size = DESC_NUM, + .nid = NUMA_NO_NODE, + .dev = priv->dev, + .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, + .offset = NETSEC_RXBUF_HEADROOM, + .max_len = NETSEC_RX_BUF_SIZE, + }; int i, err; - pp_params.order = 0; - /* internal DMA mapping in page_pool */ - pp_params.flags = PP_FLAG_DMA_MAP; - pp_params.pool_size = DESC_NUM; - pp_params.nid = cpu_to_node(0); - pp_params.dev = priv->dev; - pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; - dring->page_pool = page_pool_create(&pp_params); if (IS_ERR(dring->page_pool)) { err = PTR_ERR(dring->page_pool); diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 94f94686cf7d..487099092693 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -363,6 +363,12 @@ struct dma_features { unsigned int dvlan; unsigned int l3l4fnum; unsigned int arpoffsel; + /* TSN Features */ + unsigned int estwid; + unsigned int estdep; + unsigned int estsel; + unsigned int fpesel; + unsigned int tbssel; }; /* RX Buffer size must be multiple of 4/8/16 bytes */ diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h index 9f0b9a9e63b3..49d6a866244f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs.h @@ -171,6 +171,15 @@ struct dma_extended_desc { __le32 des7; /* Tx/Rx Timestamp High */ }; +/* Enhanced descriptor for TBS */ +struct dma_edesc { + __le32 des4; + __le32 des5; + __le32 des6; + __le32 des7; + struct dma_desc basic; +}; + /* Transmit checksum insertion control */ #define TX_CIC_FULL 3 /* Include IP header and pseudoheader */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index dd9967aeda22..2342d497348e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -40,7 +40,7 @@ struct tegra_eqos { static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat_dat) { - struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; u32 burst_map = 0; u32 bit_index = 0; u32 a_index = 0; @@ -52,9 +52,10 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, return -ENOMEM; } - plat_dat->axi->axi_lpi_en = of_property_read_bool(np, "snps,en-lpi"); - if (of_property_read_u32(np, "snps,write-requests", - &plat_dat->axi->axi_wr_osr_lmt)) { + plat_dat->axi->axi_lpi_en = device_property_read_bool(dev, + "snps,en-lpi"); + if (device_property_read_u32(dev, "snps,write-requests", + &plat_dat->axi->axi_wr_osr_lmt)) { /** * Since the register has a reset value of 1, if property * is missing, default to 1. @@ -68,8 +69,8 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, plat_dat->axi->axi_wr_osr_lmt--; } - if (of_property_read_u32(np, "snps,read-requests", - &plat_dat->axi->axi_rd_osr_lmt)) { + if (device_property_read_u32(dev, "snps,read-requests", + &plat_dat->axi->axi_rd_osr_lmt)) { /** * Since the register has a reset value of 1, if property * is missing, default to 1. @@ -82,7 +83,7 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, */ plat_dat->axi->axi_rd_osr_lmt--; } - of_property_read_u32(np, "snps,burst-map", &burst_map); + device_property_read_u32(dev, "snps,burst-map", &burst_map); /* converts burst-map bitmask to burst array */ for (bit_index = 0; bit_index < 7; bit_index++) { @@ -270,6 +271,7 @@ static void *tegra_eqos_probe(struct platform_device *pdev, struct plat_stmmacenet_data *data, struct stmmac_resources *res) { + struct device *dev = &pdev->dev; struct tegra_eqos *eqos; int err; @@ -282,6 +284,9 @@ static void *tegra_eqos_probe(struct platform_device *pdev, eqos->dev = &pdev->dev; eqos->regs = res->addr; + if (!is_of_node(dev->fwnode)) + goto bypass_clk_reset_gpio; + eqos->clk_master = devm_clk_get(&pdev->dev, "master_bus"); if (IS_ERR(eqos->clk_master)) { err = PTR_ERR(eqos->clk_master); @@ -354,6 +359,7 @@ static void *tegra_eqos_probe(struct platform_device *pdev, usleep_range(2000, 4000); +bypass_clk_reset_gpio: data->fix_mac_speed = tegra_eqos_fix_speed; data->init = tegra_eqos_init; data->bsp_priv = eqos; @@ -421,7 +427,7 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) void *priv; int ret; - data = of_device_get_match_data(&pdev->dev); + data = device_get_match_data(&pdev->dev); memset(&stmmac_res, 0, sizeof(struct stmmac_resources)); @@ -478,7 +484,7 @@ static int dwc_eth_dwmac_remove(struct platform_device *pdev) const struct dwc_eth_dwmac_data *data; int err; - data = of_device_get_match_data(&pdev->dev); + data = device_get_match_data(&pdev->dev); err = stmmac_dvr_remove(&pdev->dev); if (err < 0) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index bdb80421acac..9e4b83832938 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -55,6 +55,8 @@ struct mediatek_dwmac_plat_data { struct regmap *peri_regmap; struct device *dev; phy_interface_t phy_mode; + int num_clks_to_config; + bool rmii_clk_from_mac; bool rmii_rxc; }; @@ -73,21 +75,33 @@ struct mediatek_dwmac_variant { /* list of clocks required for mac */ static const char * const mt2712_dwmac_clk_l[] = { - "axi", "apb", "mac_main", "ptp_ref" + "axi", "apb", "mac_main", "ptp_ref", "rmii_internal" }; static int mt2712_set_interface(struct mediatek_dwmac_plat_data *plat) { + int rmii_clk_from_mac = plat->rmii_clk_from_mac ? RMII_CLK_SRC_INTERNAL : 0; int rmii_rxc = plat->rmii_rxc ? RMII_CLK_SRC_RXC : 0; u32 intf_val = 0; + /* The clock labeled as "rmii_internal" in mt2712_dwmac_clk_l is needed + * only in RMII(when MAC provides the reference clock), and useless for + * RGMII/MII/RMII(when PHY provides the reference clock). + * num_clks_to_config indicates the real number of clocks should be + * configured, equals to (plat->variant->num_clks - 1) in default for all the case, + * then +1 for rmii_clk_from_mac case. + */ + plat->num_clks_to_config = plat->variant->num_clks - 1; + /* select phy interface in top control domain */ switch (plat->phy_mode) { case PHY_INTERFACE_MODE_MII: intf_val |= PHY_INTF_MII; break; case PHY_INTERFACE_MODE_RMII: - intf_val |= (PHY_INTF_RMII | rmii_rxc); + if (plat->rmii_clk_from_mac) + plat->num_clks_to_config++; + intf_val |= (PHY_INTF_RMII | rmii_rxc | rmii_clk_from_mac); break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_TXID: @@ -173,35 +187,50 @@ static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat) delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv); break; case PHY_INTERFACE_MODE_RMII: - /* the rmii reference clock is from external phy, - * and the property "rmii_rxc" indicates which pin(TXC/RXC) - * the reference clk is connected to. The reference clock is a - * received signal, so rx_delay/rx_inv are used to indicate - * the reference clock timing adjustment - */ - if (plat->rmii_rxc) { - /* the rmii reference clock from outside is connected - * to RXC pin, the reference clock will be adjusted - * by RXC delay macro circuit. - */ - delay_val |= FIELD_PREP(ETH_DLY_RXC_ENABLE, !!mac_delay->rx_delay); - delay_val |= FIELD_PREP(ETH_DLY_RXC_STAGES, mac_delay->rx_delay); - delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv); - } else { - /* the rmii reference clock from outside is connected - * to TXC pin, the reference clock will be adjusted - * by TXC delay macro circuit. + if (plat->rmii_clk_from_mac) { + /* case 1: mac provides the rmii reference clock, + * and the clock output to TXC pin. + * The egress timing can be adjusted by GTXC delay macro circuit. + * The ingress timing can be adjusted by TXC delay macro circuit. */ delay_val |= FIELD_PREP(ETH_DLY_TXC_ENABLE, !!mac_delay->rx_delay); delay_val |= FIELD_PREP(ETH_DLY_TXC_STAGES, mac_delay->rx_delay); delay_val |= FIELD_PREP(ETH_DLY_TXC_INV, mac_delay->rx_inv); + + delay_val |= FIELD_PREP(ETH_DLY_GTXC_ENABLE, !!mac_delay->tx_delay); + delay_val |= FIELD_PREP(ETH_DLY_GTXC_STAGES, mac_delay->tx_delay); + delay_val |= FIELD_PREP(ETH_DLY_GTXC_INV, mac_delay->tx_inv); + } else { + /* case 2: the rmii reference clock is from external phy, + * and the property "rmii_rxc" indicates which pin(TXC/RXC) + * the reference clk is connected to. The reference clock is a + * received signal, so rx_delay/rx_inv are used to indicate + * the reference clock timing adjustment + */ + if (plat->rmii_rxc) { + /* the rmii reference clock from outside is connected + * to RXC pin, the reference clock will be adjusted + * by RXC delay macro circuit. + */ + delay_val |= FIELD_PREP(ETH_DLY_RXC_ENABLE, !!mac_delay->rx_delay); + delay_val |= FIELD_PREP(ETH_DLY_RXC_STAGES, mac_delay->rx_delay); + delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv); + } else { + /* the rmii reference clock from outside is connected + * to TXC pin, the reference clock will be adjusted + * by TXC delay macro circuit. + */ + delay_val |= FIELD_PREP(ETH_DLY_TXC_ENABLE, !!mac_delay->rx_delay); + delay_val |= FIELD_PREP(ETH_DLY_TXC_STAGES, mac_delay->rx_delay); + delay_val |= FIELD_PREP(ETH_DLY_TXC_INV, mac_delay->rx_inv); + } + /* tx_inv will inverse the tx clock inside mac relateive to + * reference clock from external phy, + * and this bit is located in the same register with fine-tune + */ + if (mac_delay->tx_inv) + fine_val = ETH_RMII_DLY_TX_INV; } - /* tx_inv will inverse the tx clock inside mac relateive to - * reference clock from external phy, - * and this bit is located in the same register with fine-tune - */ - if (mac_delay->tx_inv) - fine_val = ETH_RMII_DLY_TX_INV; break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_TXID: @@ -278,6 +307,7 @@ static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat) mac_delay->tx_inv = of_property_read_bool(plat->np, "mediatek,txc-inverse"); mac_delay->rx_inv = of_property_read_bool(plat->np, "mediatek,rxc-inverse"); plat->rmii_rxc = of_property_read_bool(plat->np, "mediatek,rmii-rxc"); + plat->rmii_clk_from_mac = of_property_read_bool(plat->np, "mediatek,rmii-clk-from-mac"); return 0; } @@ -294,6 +324,8 @@ static int mediatek_dwmac_clk_init(struct mediatek_dwmac_plat_data *plat) for (i = 0; i < num; i++) plat->clks[i].id = variant->clk_list[i]; + plat->num_clks_to_config = variant->num_clks; + return devm_clk_bulk_get(plat->dev, num, plat->clks); } @@ -321,7 +353,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) return ret; } - ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks); + ret = clk_bulk_prepare_enable(plat->num_clks_to_config, plat->clks); if (ret) { dev_err(plat->dev, "failed to enable clks, err = %d\n", ret); return ret; @@ -336,9 +368,8 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv) { struct mediatek_dwmac_plat_data *plat = priv; - const struct mediatek_dwmac_variant *variant = plat->variant; - clk_bulk_disable_unprepare(variant->num_clks, plat->clks); + clk_bulk_disable_unprepare(plat->num_clks_to_config, plat->clks); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 01b484cb177e..58e0511badba 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -335,14 +335,30 @@ static void sun8i_dwmac_dump_mac_regs(struct mac_device_info *hw, } } -static void sun8i_dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan) +static void sun8i_dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, + bool rx, bool tx) { - writel(EMAC_RX_INT | EMAC_TX_INT, ioaddr + EMAC_INT_EN); + u32 value = readl(ioaddr + EMAC_INT_EN); + + if (rx) + value |= EMAC_RX_INT; + if (tx) + value |= EMAC_TX_INT; + + writel(value, ioaddr + EMAC_INT_EN); } -static void sun8i_dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan) +static void sun8i_dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, + bool rx, bool tx) { - writel(0, ioaddr + EMAC_INT_EN); + u32 value = readl(ioaddr + EMAC_INT_EN); + + if (rx) + value &= ~EMAC_RX_INT; + if (tx) + value &= ~EMAC_TX_INT; + + writel(value, ioaddr + EMAC_INT_EN); } static void sun8i_dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 2dc70d104161..af50af27550b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -64,6 +64,8 @@ #define GMAC_RXQCTRL_MCBCQEN_SHIFT 20 #define GMAC_RXQCTRL_TACPQE BIT(21) #define GMAC_RXQCTRL_TACPQE_SHIFT 21 +#define GMAC_RXQCTRL_FPRQ GENMASK(26, 24) +#define GMAC_RXQCTRL_FPRQ_SHIFT 24 /* MAC Packet Filtering */ #define GMAC_PACKET_FILTER_PR BIT(0) @@ -176,6 +178,8 @@ enum power_event { #define GMAC_CONFIG_SARC GENMASK(30, 28) #define GMAC_CONFIG_SARC_SHIFT 28 #define GMAC_CONFIG_IPC BIT(27) +#define GMAC_CONFIG_IPG GENMASK(26, 24) +#define GMAC_CONFIG_IPG_SHIFT 24 #define GMAC_CONFIG_2K BIT(22) #define GMAC_CONFIG_ACS BIT(20) #define GMAC_CONFIG_BE BIT(18) @@ -183,6 +187,7 @@ enum power_event { #define GMAC_CONFIG_JE BIT(16) #define GMAC_CONFIG_PS BIT(15) #define GMAC_CONFIG_FES BIT(14) +#define GMAC_CONFIG_FES_SHIFT 14 #define GMAC_CONFIG_DM BIT(13) #define GMAC_CONFIG_LM BIT(12) #define GMAC_CONFIG_DCRS BIT(9) @@ -190,6 +195,9 @@ enum power_event { #define GMAC_CONFIG_RE BIT(0) /* MAC extended config */ +#define GMAC_CONFIG_EIPG GENMASK(29, 25) +#define GMAC_CONFIG_EIPG_SHIFT 25 +#define GMAC_CONFIG_EIPG_EN BIT(24) #define GMAC_CONFIG_HDSMS GENMASK(22, 20) #define GMAC_CONFIG_HDSMS_SHIFT 20 #define GMAC_CONFIG_HDSMS_256 (0x2 << GMAC_CONFIG_HDSMS_SHIFT) @@ -231,6 +239,11 @@ enum power_event { /* MAC HW features3 bitmap */ #define GMAC_HW_FEAT_ASP GENMASK(29, 28) +#define GMAC_HW_FEAT_TBSSEL BIT(27) +#define GMAC_HW_FEAT_FPESEL BIT(26) +#define GMAC_HW_FEAT_ESTWID GENMASK(21, 20) +#define GMAC_HW_FEAT_ESTDEP GENMASK(19, 17) +#define GMAC_HW_FEAT_ESTSEL BIT(16) #define GMAC_HW_FEAT_FRPES GENMASK(14, 13) #define GMAC_HW_FEAT_FRPBS GENMASK(12, 11) #define GMAC_HW_FEAT_FRPSEL BIT(10) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 40ca00e596dd..f0c0ea616032 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -984,6 +984,8 @@ const struct stmmac_ops dwmac410_ops = { .set_arp_offload = dwmac4_set_arp_offload, .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, + .est_configure = dwmac5_est_configure, + .fpe_configure = dwmac5_fpe_configure, }; const struct stmmac_ops dwmac510_ops = { @@ -1027,6 +1029,8 @@ const struct stmmac_ops dwmac510_ops = { .set_arp_offload = dwmac4_set_arp_offload, .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, + .est_configure = dwmac5_est_configure, + .fpe_configure = dwmac5_fpe_configure, }; int dwmac4_setup(struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 3e14da69f378..eff82065a501 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -10,6 +10,7 @@ #include <linux/stmmac.h> #include "common.h" +#include "dwmac4.h" #include "dwmac4_descs.h" static int dwmac4_wrback_get_tx_status(void *data, struct stmmac_extra_stats *x, @@ -505,6 +506,14 @@ static void dwmac4_set_sec_addr(struct dma_desc *p, dma_addr_t addr) p->des3 = cpu_to_le32(upper_32_bits(addr) | RDES3_BUFFER2_VALID_ADDR); } +static void dwmac4_set_tbs(struct dma_edesc *p, u32 sec, u32 nsec) +{ + p->des4 = cpu_to_le32((sec & TDES4_LT) | TDES4_LTV); + p->des5 = cpu_to_le32(nsec & TDES5_LT); + p->des6 = 0; + p->des7 = 0; +} + const struct stmmac_desc_ops dwmac4_desc_ops = { .tx_status = dwmac4_wrback_get_tx_status, .rx_status = dwmac4_wrback_get_rx_status, @@ -534,6 +543,7 @@ const struct stmmac_desc_ops dwmac4_desc_ops = { .set_vlan = dwmac4_set_vlan, .get_rx_header_len = dwmac4_get_rx_header_len, .set_sec_addr = dwmac4_set_sec_addr, + .set_tbs = dwmac4_set_tbs, }; const struct stmmac_mode_ops dwmac4_ring_mode_ops = { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h index 6d92109dc9aa..6da070ccd737 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h @@ -73,6 +73,13 @@ #define TDES3_CONTEXT_TYPE BIT(30) #define TDES3_CONTEXT_TYPE_SHIFT 30 +/* TDES4 */ +#define TDES4_LTV BIT(31) +#define TDES4_LT GENMASK(7, 0) + +/* TDES5 */ +#define TDES5_LT GENMASK(31, 8) + /* TDS3 use for both format (read and write back) */ #define TDES3_OWN BIT(31) #define TDES3_OWN_SHIFT 31 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index c15409030710..bb29bfcd62c3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -404,6 +404,11 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr, /* 5.10 Features */ dma_cap->asp = (hw_cap & GMAC_HW_FEAT_ASP) >> 28; + dma_cap->tbssel = (hw_cap & GMAC_HW_FEAT_TBSSEL) >> 27; + dma_cap->fpesel = (hw_cap & GMAC_HW_FEAT_FPESEL) >> 26; + dma_cap->estwid = (hw_cap & GMAC_HW_FEAT_ESTWID) >> 20; + dma_cap->estdep = (hw_cap & GMAC_HW_FEAT_ESTDEP) >> 17; + dma_cap->estsel = (hw_cap & GMAC_HW_FEAT_ESTSEL) >> 16; dma_cap->frpes = (hw_cap & GMAC_HW_FEAT_FRPES) >> 13; dma_cap->frpbs = (hw_cap & GMAC_HW_FEAT_FRPBS) >> 11; dma_cap->frpsel = (hw_cap & GMAC_HW_FEAT_FRPSEL) >> 10; @@ -467,6 +472,25 @@ static void dwmac4_enable_sph(void __iomem *ioaddr, bool en, u32 chan) writel(value, ioaddr + DMA_CHAN_CONTROL(chan)); } +static int dwmac4_enable_tbs(void __iomem *ioaddr, bool en, u32 chan) +{ + u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan)); + + if (en) + value |= DMA_CONTROL_EDSE; + else + value &= ~DMA_CONTROL_EDSE; + + writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan)); + + value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan)) & DMA_CONTROL_EDSE; + if (en && !value) + return -EIO; + + writel(DMA_TBS_DEF_FTOS, ioaddr + DMA_TBS_CTRL); + return 0; +} + const struct stmmac_dma_ops dwmac4_dma_ops = { .reset = dwmac4_dma_reset, .init = dwmac4_dma_init, @@ -523,4 +547,5 @@ const struct stmmac_dma_ops dwmac410_dma_ops = { .qmode = dwmac4_qmode, .set_bfsize = dwmac4_set_bfsize, .enable_sph = dwmac4_enable_sph, + .enable_tbs = dwmac4_enable_tbs, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h index 589931795847..8391ca63d943 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h @@ -22,6 +22,7 @@ #define DMA_DEBUG_STATUS_1 0x00001010 #define DMA_DEBUG_STATUS_2 0x00001014 #define DMA_AXI_BUS_MODE 0x00001028 +#define DMA_TBS_CTRL 0x00001050 /* DMA Bus Mode bitmap */ #define DMA_BUS_MODE_SFT_RESET BIT(0) @@ -82,6 +83,11 @@ #define DMA_AXI_BURST_LEN_MASK 0x000000FE +/* DMA TBS Control */ +#define DMA_TBS_FTOS GENMASK(31, 8) +#define DMA_TBS_FTOV BIT(0) +#define DMA_TBS_DEF_FTOS (DMA_TBS_FTOS | DMA_TBS_FTOV) + /* Following DMA defines are chanels oriented */ #define DMA_CHAN_BASE_ADDR 0x00001100 #define DMA_CHAN_BASE_OFFSET 0x80 @@ -114,6 +120,7 @@ #define DMA_CONTROL_MSS_MASK GENMASK(13, 0) /* DMA Tx Channel X Control register defines */ +#define DMA_CONTROL_EDSE BIT(28) #define DMA_CONTROL_TSE BIT(12) #define DMA_CONTROL_OSP BIT(4) #define DMA_CONTROL_ST BIT(0) @@ -168,6 +175,8 @@ /* DMA default interrupt mask for 4.00 */ #define DMA_CHAN_INTR_DEFAULT_MASK (DMA_CHAN_INTR_NORMAL | \ DMA_CHAN_INTR_ABNORMAL) +#define DMA_CHAN_INTR_DEFAULT_RX (DMA_CHAN_INTR_ENA_RIE) +#define DMA_CHAN_INTR_DEFAULT_TX (DMA_CHAN_INTR_ENA_TIE) #define DMA_CHAN_INTR_NORMAL_4_10 (DMA_CHAN_INTR_ENA_NIE_4_10 | \ DMA_CHAN_INTR_ENA_RIE | \ @@ -178,6 +187,8 @@ /* DMA default interrupt mask for 4.10a */ #define DMA_CHAN_INTR_DEFAULT_MASK_4_10 (DMA_CHAN_INTR_NORMAL_4_10 | \ DMA_CHAN_INTR_ABNORMAL_4_10) +#define DMA_CHAN_INTR_DEFAULT_RX_4_10 (DMA_CHAN_INTR_ENA_RIE) +#define DMA_CHAN_INTR_DEFAULT_TX_4_10 (DMA_CHAN_INTR_ENA_TIE) /* channel 0 specific fields */ #define DMA_CHAN0_DBG_STAT_TPS GENMASK(15, 12) @@ -186,9 +197,10 @@ #define DMA_CHAN0_DBG_STAT_RPS_SHIFT 8 int dwmac4_dma_reset(void __iomem *ioaddr); -void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan); -void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan); -void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan); +void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); +void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); +void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); +void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); void dwmac4_dma_start_tx(void __iomem *ioaddr, u32 chan); void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan); void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index f2a29a90e085..9becca280074 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -97,21 +97,52 @@ void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan) writel(len, ioaddr + DMA_CHAN_RX_RING_LEN(chan)); } -void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(DMA_CHAN_INTR_DEFAULT_MASK, ioaddr + - DMA_CHAN_INTR_ENA(chan)); + u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); + + if (rx) + value |= DMA_CHAN_INTR_DEFAULT_RX; + if (tx) + value |= DMA_CHAN_INTR_DEFAULT_TX; + + writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan)); } -void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(DMA_CHAN_INTR_DEFAULT_MASK_4_10, - ioaddr + DMA_CHAN_INTR_ENA(chan)); + u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); + + if (rx) + value |= DMA_CHAN_INTR_DEFAULT_RX_4_10; + if (tx) + value |= DMA_CHAN_INTR_DEFAULT_TX_4_10; + + writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan)); } -void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(0, ioaddr + DMA_CHAN_INTR_ENA(chan)); + u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); + + if (rx) + value &= ~DMA_CHAN_INTR_DEFAULT_RX; + if (tx) + value &= ~DMA_CHAN_INTR_DEFAULT_TX; + + writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan)); +} + +void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) +{ + u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); + + if (rx) + value &= ~DMA_CHAN_INTR_DEFAULT_RX_4_10; + if (tx) + value &= ~DMA_CHAN_INTR_DEFAULT_TX_4_10; + + writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan)); } int dwmac4_dma_interrupt(void __iomem *ioaddr, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index e436fa160c7d..494c859b4ade 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -550,3 +550,122 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, writel(val, ioaddr + MAC_PPS_CONTROL); return 0; } + +static int dwmac5_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl) +{ + u32 ctrl; + + writel(val, ioaddr + MTL_EST_GCL_DATA); + + ctrl = (reg << ADDR_SHIFT); + ctrl |= gcl ? 0 : GCRR; + + writel(ctrl, ioaddr + MTL_EST_GCL_CONTROL); + + ctrl |= SRWO; + writel(ctrl, ioaddr + MTL_EST_GCL_CONTROL); + + return readl_poll_timeout(ioaddr + MTL_EST_GCL_CONTROL, + ctrl, !(ctrl & SRWO), 100, 5000); +} + +int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, + unsigned int ptp_rate) +{ + u32 speed, total_offset, offset, ctrl, ctr_low; + u32 extcfg = readl(ioaddr + GMAC_EXT_CONFIG); + u32 mac_cfg = readl(ioaddr + GMAC_CONFIG); + int i, ret = 0x0; + u64 total_ctr; + + if (extcfg & GMAC_CONFIG_EIPG_EN) { + offset = (extcfg & GMAC_CONFIG_EIPG) >> GMAC_CONFIG_EIPG_SHIFT; + offset = 104 + (offset * 8); + } else { + offset = (mac_cfg & GMAC_CONFIG_IPG) >> GMAC_CONFIG_IPG_SHIFT; + offset = 96 - (offset * 8); + } + + speed = mac_cfg & (GMAC_CONFIG_PS | GMAC_CONFIG_FES); + speed = speed >> GMAC_CONFIG_FES_SHIFT; + + switch (speed) { + case 0x0: + offset = offset * 1000; /* 1G */ + break; + case 0x1: + offset = offset * 400; /* 2.5G */ + break; + case 0x2: + offset = offset * 100000; /* 10M */ + break; + case 0x3: + offset = offset * 10000; /* 100M */ + break; + default: + return -EINVAL; + } + + offset = offset / 1000; + + ret |= dwmac5_est_write(ioaddr, BTR_LOW, cfg->btr[0], false); + ret |= dwmac5_est_write(ioaddr, BTR_HIGH, cfg->btr[1], false); + ret |= dwmac5_est_write(ioaddr, TER, cfg->ter, false); + ret |= dwmac5_est_write(ioaddr, LLR, cfg->gcl_size, false); + if (ret) + return ret; + + total_offset = 0; + for (i = 0; i < cfg->gcl_size; i++) { + ret = dwmac5_est_write(ioaddr, i, cfg->gcl[i] + offset, true); + if (ret) + return ret; + + total_offset += offset; + } + + total_ctr = cfg->ctr[0] + cfg->ctr[1] * 1000000000; + total_ctr += total_offset; + + ctr_low = do_div(total_ctr, 1000000000); + + ret |= dwmac5_est_write(ioaddr, CTR_LOW, ctr_low, false); + ret |= dwmac5_est_write(ioaddr, CTR_HIGH, total_ctr, false); + if (ret) + return ret; + + ctrl = readl(ioaddr + MTL_EST_CONTROL); + ctrl &= ~PTOV; + ctrl |= ((1000000000 / ptp_rate) * 6) << PTOV_SHIFT; + if (cfg->enable) + ctrl |= EEST | SSWL; + else + ctrl &= ~EEST; + + writel(ctrl, ioaddr + MTL_EST_CONTROL); + return 0; +} + +void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, + bool enable) +{ + u32 value; + + if (!enable) { + value = readl(ioaddr + MAC_FPE_CTRL_STS); + + value &= ~EFPE; + + writel(value, ioaddr + MAC_FPE_CTRL_STS); + return; + } + + value = readl(ioaddr + GMAC_RXQ_CTRL1); + value &= ~GMAC_RXQCTRL_FPRQ; + value |= (num_rxq - 1) << GMAC_RXQCTRL_FPRQ_SHIFT; + writel(value, ioaddr + GMAC_RXQ_CTRL1); + + value = readl(ioaddr + MAC_FPE_CTRL_STS); + value |= EFPE; + writel(value, ioaddr + MAC_FPE_CTRL_STS); +} diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h index 23fecf68f781..3e8faa96b4d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h @@ -11,6 +11,9 @@ #define PRTYEN BIT(1) #define TMOUTEN BIT(0) +#define MAC_FPE_CTRL_STS 0x00000234 +#define EFPE BIT(0) + #define MAC_PPS_CONTROL 0x00000b70 #define PPS_MAXIDX(x) ((((x) + 1) * 8) - 1) #define PPS_MINIDX(x) ((x) * 8) @@ -30,6 +33,23 @@ #define MAC_PPSx_INTERVAL(x) (0x00000b88 + ((x) * 0x10)) #define MAC_PPSx_WIDTH(x) (0x00000b8c + ((x) * 0x10)) +#define MTL_EST_CONTROL 0x00000c50 +#define PTOV GENMASK(31, 24) +#define PTOV_SHIFT 24 +#define SSWL BIT(1) +#define EEST BIT(0) +#define MTL_EST_GCL_CONTROL 0x00000c80 +#define BTR_LOW 0x0 +#define BTR_HIGH 0x1 +#define CTR_LOW 0x2 +#define CTR_HIGH 0x3 +#define TER 0x4 +#define LLR 0x5 +#define ADDR_SHIFT 8 +#define GCRR BIT(2) +#define SRWO BIT(0) +#define MTL_EST_GCL_DATA 0x00000c84 + #define MTL_RXP_CONTROL_STATUS 0x00000ca0 #define RXPI BIT(31) #define NPE GENMASK(23, 16) @@ -83,5 +103,9 @@ int dwmac5_rxp_config(void __iomem *ioaddr, struct stmmac_tc_entry *entries, int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, struct stmmac_pps_cfg *cfg, bool enable, u32 sub_second_inc, u32 systime_flags); +int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, + unsigned int ptp_rate); +void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, + bool enable); #endif /* __DWMAC5_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h index 292b880f3f9f..e5dbd0bc257e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h @@ -96,6 +96,8 @@ /* DMA default interrupt mask */ #define DMA_INTR_DEFAULT_MASK (DMA_INTR_NORMAL | DMA_INTR_ABNORMAL) +#define DMA_INTR_DEFAULT_RX (DMA_INTR_ENA_RIE) +#define DMA_INTR_DEFAULT_TX (DMA_INTR_ENA_TIE) /* DMA Status register defines */ #define DMA_STATUS_GLPII 0x40000000 /* GMAC LPI interrupt */ @@ -130,8 +132,8 @@ #define NUM_DWMAC1000_DMA_REGS 23 void dwmac_enable_dma_transmission(void __iomem *ioaddr); -void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan); -void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan); +void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); +void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan); void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan); void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index 1bc25aa86dbd..688d36095333 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -37,14 +37,28 @@ void dwmac_enable_dma_transmission(void __iomem *ioaddr) writel(1, ioaddr + DMA_XMT_POLL_DEMAND); } -void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA); + u32 value = readl(ioaddr + DMA_INTR_ENA); + + if (rx) + value |= DMA_INTR_DEFAULT_RX; + if (tx) + value |= DMA_INTR_DEFAULT_TX; + + writel(value, ioaddr + DMA_INTR_ENA); } -void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(0, ioaddr + DMA_INTR_ENA); + u32 value = readl(ioaddr + DMA_INTR_ENA); + + if (rx) + value &= ~DMA_INTR_DEFAULT_RX; + if (tx) + value &= ~DMA_INTR_DEFAULT_TX; + + writel(value, ioaddr + DMA_INTR_ENA); } void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index ef8a07c68ca7..6c3b8a950f58 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -73,6 +73,9 @@ #define XGMAC_RXQ_CTRL0 0x000000a0 #define XGMAC_RXQEN(x) GENMASK((x) * 2 + 1, (x) * 2) #define XGMAC_RXQEN_SHIFT(x) ((x) * 2) +#define XGMAC_RXQ_CTRL1 0x000000a4 +#define XGMAC_RQ GENMASK(7, 4) +#define XGMAC_RQ_SHIFT 4 #define XGMAC_RXQ_CTRL2 0x000000a8 #define XGMAC_RXQ_CTRL3 0x000000ac #define XGMAC_PSRQ(x) GENMASK((x) * 8 + 7, (x) * 8) @@ -136,6 +139,11 @@ #define XGMAC_HWFEAT_TXQCNT GENMASK(9, 6) #define XGMAC_HWFEAT_RXQCNT GENMASK(3, 0) #define XGMAC_HW_FEATURE3 0x00000128 +#define XGMAC_HWFEAT_TBSSEL BIT(27) +#define XGMAC_HWFEAT_FPESEL BIT(26) +#define XGMAC_HWFEAT_ESTWID GENMASK(24, 23) +#define XGMAC_HWFEAT_ESTDEP GENMASK(22, 20) +#define XGMAC_HWFEAT_ESTSEL BIT(19) #define XGMAC_HWFEAT_ASP GENMASK(15, 14) #define XGMAC_HWFEAT_DVLAN BIT(13) #define XGMAC_HWFEAT_FRPES GENMASK(12, 11) @@ -148,6 +156,8 @@ #define XGMAC_MDIO_ADDR 0x00000200 #define XGMAC_MDIO_DATA 0x00000204 #define XGMAC_MDIO_C22P 0x00000220 +#define XGMAC_FPE_CTRL_STS 0x00000280 +#define XGMAC_EFPE BIT(0) #define XGMAC_ADDRx_HIGH(x) (0x00000300 + (x) * 0x8) #define XGMAC_ADDR_MAX 32 #define XGMAC_AE BIT(31) @@ -237,6 +247,22 @@ #define XGMAC_TC_PRTY_MAP1 0x00001044 #define XGMAC_PSTC(x) GENMASK((x) * 8 + 7, (x) * 8) #define XGMAC_PSTC_SHIFT(x) ((x) * 8) +#define XGMAC_MTL_EST_CONTROL 0x00001050 +#define XGMAC_PTOV GENMASK(31, 23) +#define XGMAC_PTOV_SHIFT 23 +#define XGMAC_SSWL BIT(1) +#define XGMAC_EEST BIT(0) +#define XGMAC_MTL_EST_GCL_CONTROL 0x00001080 +#define XGMAC_BTR_LOW 0x0 +#define XGMAC_BTR_HIGH 0x1 +#define XGMAC_CTR_LOW 0x2 +#define XGMAC_CTR_HIGH 0x3 +#define XGMAC_TER 0x4 +#define XGMAC_LLR 0x5 +#define XGMAC_ADDR_SHIFT 8 +#define XGMAC_GCRR BIT(2) +#define XGMAC_SRWO BIT(0) +#define XGMAC_MTL_EST_GCL_DATA 0x00001084 #define XGMAC_MTL_RXP_CONTROL_STATUS 0x000010a0 #define XGMAC_RXPI BIT(31) #define XGMAC_NPE GENMASK(23, 16) @@ -321,6 +347,13 @@ #define XGMAC_TDPS GENMASK(29, 0) #define XGMAC_RX_EDMA_CTRL 0x00003044 #define XGMAC_RDPS GENMASK(29, 0) +#define XGMAC_DMA_TBS_CTRL0 0x00003054 +#define XGMAC_DMA_TBS_CTRL1 0x00003058 +#define XGMAC_DMA_TBS_CTRL2 0x0000305c +#define XGMAC_DMA_TBS_CTRL3 0x00003060 +#define XGMAC_FTOS GENMASK(31, 8) +#define XGMAC_FTOV BIT(0) +#define XGMAC_DEF_FTOS (XGMAC_FTOS | XGMAC_FTOV) #define XGMAC_DMA_SAFETY_INT_STATUS 0x00003064 #define XGMAC_MCSIS BIT(31) #define XGMAC_MSUIS BIT(29) @@ -335,6 +368,7 @@ #define XGMAC_SPH BIT(24) #define XGMAC_PBLx8 BIT(16) #define XGMAC_DMA_CH_TX_CONTROL(x) (0x00003104 + (0x80 * (x))) +#define XGMAC_EDSE BIT(28) #define XGMAC_TxPBL GENMASK(21, 16) #define XGMAC_TxPBL_SHIFT 16 #define XGMAC_TSE BIT(12) @@ -363,6 +397,8 @@ #define XGMAC_TIE BIT(0) #define XGMAC_DMA_INT_DEFAULT_EN (XGMAC_NIE | XGMAC_AIE | XGMAC_RBUE | \ XGMAC_RIE | XGMAC_TIE) +#define XGMAC_DMA_INT_DEFAULT_RX (XGMAC_RBUE | XGMAC_RIE) +#define XGMAC_DMA_INT_DEFAULT_TX (XGMAC_TIE) #define XGMAC_DMA_CH_Rx_WATCHDOG(x) (0x0000313c + (0x80 * (x))) #define XGMAC_RWT GENMASK(7, 0) #define XGMAC_DMA_CH_STATUS(x) (0x00003160 + (0x80 * (x))) @@ -377,6 +413,9 @@ #define XGMAC_REGSIZE ((0x0000317c + (0x80 * 15)) / 4) /* Descriptors */ +#define XGMAC_TDES0_LTV BIT(31) +#define XGMAC_TDES0_LT GENMASK(7, 0) +#define XGMAC_TDES1_LT GENMASK(31, 8) #define XGMAC_TDES2_IVT GENMASK(31, 16) #define XGMAC_TDES2_IVT_SHIFT 16 #define XGMAC_TDES2_IOC BIT(31) @@ -395,6 +434,7 @@ #define XGMAC_TDES3_TCMSSV BIT(26) #define XGMAC_TDES3_SAIC GENMASK(25, 23) #define XGMAC_TDES3_SAIC_SHIFT 23 +#define XGMAC_TDES3_TBSV BIT(24) #define XGMAC_TDES3_THL GENMASK(22, 19) #define XGMAC_TDES3_THL_SHIFT 19 #define XGMAC_TDES3_IVTIR GENMASK(19, 18) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 082f5ee9e525..2af3ac5409b7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -1359,6 +1359,81 @@ static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en, writel(value, ioaddr + XGMAC_RX_CONFIG); } +static int dwxgmac3_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl) +{ + u32 ctrl; + + writel(val, ioaddr + XGMAC_MTL_EST_GCL_DATA); + + ctrl = (reg << XGMAC_ADDR_SHIFT); + ctrl |= gcl ? 0 : XGMAC_GCRR; + + writel(ctrl, ioaddr + XGMAC_MTL_EST_GCL_CONTROL); + + ctrl |= XGMAC_SRWO; + writel(ctrl, ioaddr + XGMAC_MTL_EST_GCL_CONTROL); + + return readl_poll_timeout_atomic(ioaddr + XGMAC_MTL_EST_GCL_CONTROL, + ctrl, !(ctrl & XGMAC_SRWO), 100, 5000); +} + +static int dwxgmac3_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, + unsigned int ptp_rate) +{ + int i, ret = 0x0; + u32 ctrl; + + ret |= dwxgmac3_est_write(ioaddr, XGMAC_BTR_LOW, cfg->btr[0], false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_BTR_HIGH, cfg->btr[1], false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_TER, cfg->ter, false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_LLR, cfg->gcl_size, false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_CTR_LOW, cfg->ctr[0], false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_CTR_HIGH, cfg->ctr[1], false); + if (ret) + return ret; + + for (i = 0; i < cfg->gcl_size; i++) { + ret = dwxgmac3_est_write(ioaddr, i, cfg->gcl[i], true); + if (ret) + return ret; + } + + ctrl = readl(ioaddr + XGMAC_MTL_EST_CONTROL); + ctrl &= ~XGMAC_PTOV; + ctrl |= ((1000000000 / ptp_rate) * 9) << XGMAC_PTOV_SHIFT; + if (cfg->enable) + ctrl |= XGMAC_EEST | XGMAC_SSWL; + else + ctrl &= ~XGMAC_EEST; + + writel(ctrl, ioaddr + XGMAC_MTL_EST_CONTROL); + return 0; +} + +static void dwxgmac3_fpe_configure(void __iomem *ioaddr, u32 num_txq, + u32 num_rxq, bool enable) +{ + u32 value; + + if (!enable) { + value = readl(ioaddr + XGMAC_FPE_CTRL_STS); + + value &= ~XGMAC_EFPE; + + writel(value, ioaddr + XGMAC_FPE_CTRL_STS); + return; + } + + value = readl(ioaddr + XGMAC_RXQ_CTRL1); + value &= ~XGMAC_RQ; + value |= (num_rxq - 1) << XGMAC_RQ_SHIFT; + writel(value, ioaddr + XGMAC_RXQ_CTRL1); + + value = readl(ioaddr + XGMAC_FPE_CTRL_STS); + value |= XGMAC_EFPE; + writel(value, ioaddr + XGMAC_FPE_CTRL_STS); +} + const struct stmmac_ops dwxgmac210_ops = { .core_init = dwxgmac2_core_init, .set_mac = dwxgmac2_set_mac, @@ -1402,6 +1477,8 @@ const struct stmmac_ops dwxgmac210_ops = { .config_l3_filter = dwxgmac2_config_l3_filter, .config_l4_filter = dwxgmac2_config_l4_filter, .set_arp_offload = dwxgmac2_set_arp_offload, + .est_configure = dwxgmac3_est_configure, + .fpe_configure = dwxgmac3_fpe_configure, }; int dwxgmac2_setup(struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c index bd5838ce1e8a..c3d654cfa9ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -339,6 +339,14 @@ static void dwxgmac2_set_vlan(struct dma_desc *p, u32 type) p->des2 |= cpu_to_le32(type & XGMAC_TDES2_VTIR); } +static void dwxgmac2_set_tbs(struct dma_edesc *p, u32 sec, u32 nsec) +{ + p->des4 = cpu_to_le32((sec & XGMAC_TDES0_LT) | XGMAC_TDES0_LTV); + p->des5 = cpu_to_le32(nsec & XGMAC_TDES1_LT); + p->des6 = 0; + p->des7 = 0; +} + const struct stmmac_desc_ops dwxgmac210_desc_ops = { .tx_status = dwxgmac2_get_tx_status, .rx_status = dwxgmac2_get_rx_status, @@ -368,4 +376,5 @@ const struct stmmac_desc_ops dwxgmac210_desc_ops = { .set_sarc = dwxgmac2_set_sarc, .set_vlan_tag = dwxgmac2_set_vlan_tag, .set_vlan = dwxgmac2_set_vlan, + .set_tbs = dwxgmac2_set_tbs, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index f3f08ccc379b..77308c5c5d29 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -248,14 +248,30 @@ static void dwxgmac2_dma_tx_mode(void __iomem *ioaddr, int mode, writel(value, ioaddr + XGMAC_MTL_TXQ_OPMODE(channel)); } -static void dwxgmac2_enable_dma_irq(void __iomem *ioaddr, u32 chan) +static void dwxgmac2_enable_dma_irq(void __iomem *ioaddr, u32 chan, + bool rx, bool tx) { - writel(XGMAC_DMA_INT_DEFAULT_EN, ioaddr + XGMAC_DMA_CH_INT_EN(chan)); + u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); + + if (rx) + value |= XGMAC_DMA_INT_DEFAULT_RX; + if (tx) + value |= XGMAC_DMA_INT_DEFAULT_TX; + + writel(value, ioaddr + XGMAC_DMA_CH_INT_EN(chan)); } -static void dwxgmac2_disable_dma_irq(void __iomem *ioaddr, u32 chan) +static void dwxgmac2_disable_dma_irq(void __iomem *ioaddr, u32 chan, + bool rx, bool tx) { - writel(0, ioaddr + XGMAC_DMA_CH_INT_EN(chan)); + u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); + + if (rx) + value &= ~XGMAC_DMA_INT_DEFAULT_RX; + if (tx) + value &= ~XGMAC_DMA_INT_DEFAULT_TX; + + writel(value, ioaddr + XGMAC_DMA_CH_INT_EN(chan)); } static void dwxgmac2_dma_start_tx(void __iomem *ioaddr, u32 chan) @@ -413,6 +429,11 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, /* MAC HW feature 3 */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE3); + dma_cap->tbssel = (hw_cap & XGMAC_HWFEAT_TBSSEL) >> 27; + dma_cap->fpesel = (hw_cap & XGMAC_HWFEAT_FPESEL) >> 26; + dma_cap->estwid = (hw_cap & XGMAC_HWFEAT_ESTWID) >> 23; + dma_cap->estdep = (hw_cap & XGMAC_HWFEAT_ESTDEP) >> 20; + dma_cap->estsel = (hw_cap & XGMAC_HWFEAT_ESTSEL) >> 19; dma_cap->asp = (hw_cap & XGMAC_HWFEAT_ASP) >> 14; dma_cap->dvlan = (hw_cap & XGMAC_HWFEAT_DVLAN) >> 13; dma_cap->frpes = (hw_cap & XGMAC_HWFEAT_FRPES) >> 11; @@ -503,6 +524,28 @@ static void dwxgmac2_enable_sph(void __iomem *ioaddr, bool en, u32 chan) writel(value, ioaddr + XGMAC_DMA_CH_CONTROL(chan)); } +static int dwxgmac2_enable_tbs(void __iomem *ioaddr, bool en, u32 chan) +{ + u32 value = readl(ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); + + if (en) + value |= XGMAC_EDSE; + else + value &= ~XGMAC_EDSE; + + writel(value, ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); + + value = readl(ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)) & XGMAC_EDSE; + if (en && !value) + return -EIO; + + writel(XGMAC_DEF_FTOS, ioaddr + XGMAC_DMA_TBS_CTRL0); + writel(XGMAC_DEF_FTOS, ioaddr + XGMAC_DMA_TBS_CTRL1); + writel(XGMAC_DEF_FTOS, ioaddr + XGMAC_DMA_TBS_CTRL2); + writel(XGMAC_DEF_FTOS, ioaddr + XGMAC_DMA_TBS_CTRL3); + return 0; +} + const struct stmmac_dma_ops dwxgmac210_dma_ops = { .reset = dwxgmac2_dma_reset, .init = dwxgmac2_dma_init, @@ -530,4 +573,5 @@ const struct stmmac_dma_ops dwxgmac210_dma_ops = { .qmode = dwxgmac2_qmode, .set_bfsize = dwxgmac2_set_bfsize, .enable_sph = dwxgmac2_enable_sph, + .enable_tbs = dwxgmac2_enable_tbs, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index aa5b917398fe..df63b0367aff 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -29,6 +29,7 @@ struct stmmac_extra_stats; struct stmmac_safety_stats; struct dma_desc; struct dma_extended_desc; +struct dma_edesc; /* Descriptors helpers */ struct stmmac_desc_ops { @@ -95,6 +96,7 @@ struct stmmac_desc_ops { void (*set_vlan_tag)(struct dma_desc *p, u16 tag, u16 inner_tag, u32 inner_type); void (*set_vlan)(struct dma_desc *p, u32 type); + void (*set_tbs)(struct dma_edesc *p, u32 sec, u32 nsec); }; #define stmmac_init_rx_desc(__priv, __args...) \ @@ -157,6 +159,8 @@ struct stmmac_desc_ops { stmmac_do_void_callback(__priv, desc, set_vlan_tag, __args) #define stmmac_set_desc_vlan(__priv, __args...) \ stmmac_do_void_callback(__priv, desc, set_vlan, __args) +#define stmmac_set_desc_tbs(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, set_tbs, __args) struct stmmac_dma_cfg; struct dma_features; @@ -187,8 +191,10 @@ struct stmmac_dma_ops { void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x, void __iomem *ioaddr); void (*enable_dma_transmission) (void __iomem *ioaddr); - void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan); - void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan); + void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan, + bool rx, bool tx); + void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan, + bool rx, bool tx); void (*start_tx)(void __iomem *ioaddr, u32 chan); void (*stop_tx)(void __iomem *ioaddr, u32 chan); void (*start_rx)(void __iomem *ioaddr, u32 chan); @@ -208,6 +214,7 @@ struct stmmac_dma_ops { void (*qmode)(void __iomem *ioaddr, u32 channel, u8 qmode); void (*set_bfsize)(void __iomem *ioaddr, int bfsize, u32 chan); void (*enable_sph)(void __iomem *ioaddr, bool en, u32 chan); + int (*enable_tbs)(void __iomem *ioaddr, bool en, u32 chan); }; #define stmmac_reset(__priv, __args...) \ @@ -266,6 +273,8 @@ struct stmmac_dma_ops { stmmac_do_void_callback(__priv, dma, set_bfsize, __args) #define stmmac_enable_sph(__priv, __args...) \ stmmac_do_void_callback(__priv, dma, enable_sph, __args) +#define stmmac_enable_tbs(__priv, __args...) \ + stmmac_do_callback(__priv, dma, enable_tbs, __args) struct mac_device_info; struct net_device; @@ -274,6 +283,7 @@ struct stmmac_safety_stats; struct stmmac_tc_entry; struct stmmac_pps_cfg; struct stmmac_rss; +struct stmmac_est; /* Helpers to program the MAC core */ struct stmmac_ops { @@ -371,6 +381,10 @@ struct stmmac_ops { bool en, bool udp, bool sa, bool inv, u32 match); void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr); + int (*est_configure)(void __iomem *ioaddr, struct stmmac_est *cfg, + unsigned int ptp_rate); + void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, + bool enable); }; #define stmmac_core_init(__priv, __args...) \ @@ -457,6 +471,10 @@ struct stmmac_ops { stmmac_do_callback(__priv, mac, config_l4_filter, __args) #define stmmac_set_arp_offload(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, set_arp_offload, __args) +#define stmmac_est_configure(__priv, __args...) \ + stmmac_do_callback(__priv, mac, est_configure, __args) +#define stmmac_fpe_configure(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, fpe_configure, __args) /* PTP and HW Timer helpers */ struct stmmac_hwtimestamp { @@ -514,6 +532,8 @@ struct stmmac_priv; struct tc_cls_u32_offload; struct tc_cbs_qopt_offload; struct flow_cls_offload; +struct tc_taprio_qopt_offload; +struct tc_etf_qopt_offload; struct stmmac_tc_ops { int (*init)(struct stmmac_priv *priv); @@ -523,6 +543,10 @@ struct stmmac_tc_ops { struct tc_cbs_qopt_offload *qopt); int (*setup_cls)(struct stmmac_priv *priv, struct flow_cls_offload *cls); + int (*setup_taprio)(struct stmmac_priv *priv, + struct tc_taprio_qopt_offload *qopt); + int (*setup_etf)(struct stmmac_priv *priv, + struct tc_etf_qopt_offload *qopt); }; #define stmmac_tc_init(__priv, __args...) \ @@ -533,6 +557,10 @@ struct stmmac_tc_ops { stmmac_do_callback(__priv, tc, setup_cbs, __args) #define stmmac_tc_setup_cls(__priv, __args...) \ stmmac_do_callback(__priv, tc, setup_cls, __args) +#define stmmac_tc_setup_taprio(__priv, __args...) \ + stmmac_do_callback(__priv, tc, setup_taprio, __args) +#define stmmac_tc_setup_etf(__priv, __args...) \ + stmmac_do_callback(__priv, tc, setup_etf, __args) struct stmmac_counters; diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c index 252cf48c5816..a57b0fa815ab 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c @@ -119,6 +119,13 @@ #define MMC_RX_ICMP_GD_OCTETS 0x180 #define MMC_RX_ICMP_ERR_OCTETS 0x184 +#define MMC_TX_FPE_FRAG 0x1a8 +#define MMC_TX_HOLD_REQ 0x1ac +#define MMC_RX_PKT_ASSEMBLY_ERR 0x1c8 +#define MMC_RX_PKT_SMD_ERR 0x1cc +#define MMC_RX_PKT_ASSEMBLY_OK 0x1d0 +#define MMC_RX_FPE_FRAG 0x1d4 + /* XGMAC MMC Registers */ #define MMC_XGMAC_TX_OCTET_GB 0x14 #define MMC_XGMAC_TX_PKT_GB 0x1c @@ -315,6 +322,15 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc) mmc->mmc_rx_tcp_err_octets += readl(mmcaddr + MMC_RX_TCP_ERR_OCTETS); mmc->mmc_rx_icmp_gd_octets += readl(mmcaddr + MMC_RX_ICMP_GD_OCTETS); mmc->mmc_rx_icmp_err_octets += readl(mmcaddr + MMC_RX_ICMP_ERR_OCTETS); + + mmc->mmc_tx_fpe_fragment_cntr += readl(mmcaddr + MMC_TX_FPE_FRAG); + mmc->mmc_tx_hold_req_cntr += readl(mmcaddr + MMC_TX_HOLD_REQ); + mmc->mmc_rx_packet_assembly_err_cntr += + readl(mmcaddr + MMC_RX_PKT_ASSEMBLY_ERR); + mmc->mmc_rx_packet_smd_err_cntr += readl(mmcaddr + MMC_RX_PKT_SMD_ERR); + mmc->mmc_rx_packet_assembly_ok_cntr += + readl(mmcaddr + MMC_RX_PKT_ASSEMBLY_OK); + mmc->mmc_rx_fpe_fragment_cntr += readl(mmcaddr + MMC_RX_FPE_FRAG); } const struct stmmac_mmc_ops dwmac_mmc_ops = { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index d993fc7e82c3..9c02fc754bf1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -39,13 +39,18 @@ struct stmmac_tx_info { bool is_jumbo; }; +#define STMMAC_TBS_AVAIL BIT(0) +#define STMMAC_TBS_EN BIT(1) + /* Frequently used values are kept adjacent for cache effect */ struct stmmac_tx_queue { u32 tx_count_frames; + int tbs; struct timer_list txtimer; u32 queue_index; struct stmmac_priv *priv_data; struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp; + struct dma_edesc *dma_entx; struct dma_desc *dma_tx; struct sk_buff **tx_skbuff; struct stmmac_tx_info *tx_skbuff_dma; @@ -88,6 +93,7 @@ struct stmmac_channel { struct napi_struct rx_napi ____cacheline_aligned_in_smp; struct napi_struct tx_napi ____cacheline_aligned_in_smp; struct stmmac_priv *priv_data; + spinlock_t lock; u32 index; }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 80d59b775907..2738d97495e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1090,6 +1090,8 @@ static void stmmac_display_tx_rings(struct stmmac_priv *priv) if (priv->extend_desc) head_tx = (void *)tx_q->dma_etx; + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + head_tx = (void *)tx_q->dma_entx; else head_tx = (void *)tx_q->dma_tx; @@ -1163,13 +1165,19 @@ static void stmmac_clear_tx_descriptors(struct stmmac_priv *priv, u32 queue) int i; /* Clear the TX descriptors */ - for (i = 0; i < DMA_TX_SIZE; i++) + for (i = 0; i < DMA_TX_SIZE; i++) { + int last = (i == (DMA_TX_SIZE - 1)); + struct dma_desc *p; + if (priv->extend_desc) - stmmac_init_tx_desc(priv, &tx_q->dma_etx[i].basic, - priv->mode, (i == DMA_TX_SIZE - 1)); + p = &tx_q->dma_etx[i].basic; + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + p = &tx_q->dma_entx[i].basic; else - stmmac_init_tx_desc(priv, &tx_q->dma_tx[i], - priv->mode, (i == DMA_TX_SIZE - 1)); + p = &tx_q->dma_tx[i]; + + stmmac_init_tx_desc(priv, p, priv->mode, last); + } } /** @@ -1383,7 +1391,7 @@ static int init_dma_tx_desc_rings(struct net_device *dev) if (priv->extend_desc) stmmac_mode_init(priv, tx_q->dma_etx, tx_q->dma_tx_phy, DMA_TX_SIZE, 1); - else + else if (!(tx_q->tbs & STMMAC_TBS_AVAIL)) stmmac_mode_init(priv, tx_q->dma_tx, tx_q->dma_tx_phy, DMA_TX_SIZE, 0); } @@ -1392,6 +1400,8 @@ static int init_dma_tx_desc_rings(struct net_device *dev) struct dma_desc *p; if (priv->extend_desc) p = &((tx_q->dma_etx + i)->basic); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + p = &((tx_q->dma_entx + i)->basic); else p = tx_q->dma_tx + i; @@ -1511,19 +1521,26 @@ static void free_dma_tx_desc_resources(struct stmmac_priv *priv) /* Free TX queue resources */ for (queue = 0; queue < tx_count; queue++) { struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + size_t size; + void *addr; /* Release the DMA TX socket buffers */ dma_free_tx_skbufs(priv, queue); - /* Free DMA regions of consistent memory previously allocated */ - if (!priv->extend_desc) - dma_free_coherent(priv->device, - DMA_TX_SIZE * sizeof(struct dma_desc), - tx_q->dma_tx, tx_q->dma_tx_phy); - else - dma_free_coherent(priv->device, DMA_TX_SIZE * - sizeof(struct dma_extended_desc), - tx_q->dma_etx, tx_q->dma_tx_phy); + if (priv->extend_desc) { + size = sizeof(struct dma_extended_desc); + addr = tx_q->dma_etx; + } else if (tx_q->tbs & STMMAC_TBS_AVAIL) { + size = sizeof(struct dma_edesc); + addr = tx_q->dma_entx; + } else { + size = sizeof(struct dma_desc); + addr = tx_q->dma_tx; + } + + size *= DMA_TX_SIZE; + + dma_free_coherent(priv->device, size, addr, tx_q->dma_tx_phy); kfree(tx_q->tx_skbuff_dma); kfree(tx_q->tx_skbuff); @@ -1616,6 +1633,8 @@ static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv) /* TX queues buffers and DMA */ for (queue = 0; queue < tx_count; queue++) { struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + size_t size; + void *addr; tx_q->queue_index = queue; tx_q->priv_data = priv; @@ -1632,28 +1651,32 @@ static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv) if (!tx_q->tx_skbuff) goto err_dma; - if (priv->extend_desc) { - tx_q->dma_etx = dma_alloc_coherent(priv->device, - DMA_TX_SIZE * sizeof(struct dma_extended_desc), - &tx_q->dma_tx_phy, - GFP_KERNEL); - if (!tx_q->dma_etx) - goto err_dma; - } else { - tx_q->dma_tx = dma_alloc_coherent(priv->device, - DMA_TX_SIZE * sizeof(struct dma_desc), - &tx_q->dma_tx_phy, - GFP_KERNEL); - if (!tx_q->dma_tx) - goto err_dma; - } + if (priv->extend_desc) + size = sizeof(struct dma_extended_desc); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + size = sizeof(struct dma_edesc); + else + size = sizeof(struct dma_desc); + + size *= DMA_TX_SIZE; + + addr = dma_alloc_coherent(priv->device, size, + &tx_q->dma_tx_phy, GFP_KERNEL); + if (!addr) + goto err_dma; + + if (priv->extend_desc) + tx_q->dma_etx = addr; + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + tx_q->dma_entx = addr; + else + tx_q->dma_tx = addr; } return 0; err_dma: free_dma_tx_desc_resources(priv); - return ret; } @@ -1885,6 +1908,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) if (priv->extend_desc) p = (struct dma_desc *)(tx_q->dma_etx + entry); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + p = &tx_q->dma_entx[entry].basic; else p = tx_q->dma_tx + entry; @@ -1966,7 +1991,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) /* We still have pending packets, let's call for a new scheduling */ if (tx_q->dirty_tx != tx_q->cur_tx) - mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10)); + mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer)); __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue)); @@ -1983,19 +2008,12 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan) { struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan]; - int i; netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, chan)); stmmac_stop_tx_dma(priv, chan); dma_free_tx_skbufs(priv, chan); - for (i = 0; i < DMA_TX_SIZE; i++) - if (priv->extend_desc) - stmmac_init_tx_desc(priv, &tx_q->dma_etx[i].basic, - priv->mode, (i == DMA_TX_SIZE - 1)); - else - stmmac_init_tx_desc(priv, &tx_q->dma_tx[i], - priv->mode, (i == DMA_TX_SIZE - 1)); + stmmac_clear_tx_descriptors(priv, chan); tx_q->dirty_tx = 0; tx_q->cur_tx = 0; tx_q->mss = 0; @@ -2060,17 +2078,25 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) int status = stmmac_dma_interrupt_status(priv, priv->ioaddr, &priv->xstats, chan); struct stmmac_channel *ch = &priv->channel[chan]; + unsigned long flags; if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) { if (napi_schedule_prep(&ch->rx_napi)) { - stmmac_disable_dma_irq(priv, priv->ioaddr, chan); + spin_lock_irqsave(&ch->lock, flags); + stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 0); + spin_unlock_irqrestore(&ch->lock, flags); __napi_schedule_irqoff(&ch->rx_napi); - status |= handle_tx; } } - if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) - napi_schedule_irqoff(&ch->tx_napi); + if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) { + if (napi_schedule_prep(&ch->tx_napi)) { + spin_lock_irqsave(&ch->lock, flags); + stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 0, 1); + spin_unlock_irqrestore(&ch->lock, flags); + __napi_schedule_irqoff(&ch->tx_napi); + } + } return status; } @@ -2265,14 +2291,14 @@ static void stmmac_tx_timer(struct timer_list *t) ch = &priv->channel[tx_q->queue_index]; - /* - * If NAPI is already running we can miss some events. Let's rearm - * the timer and try again. - */ - if (likely(napi_schedule_prep(&ch->tx_napi))) + if (likely(napi_schedule_prep(&ch->tx_napi))) { + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + stmmac_disable_dma_irq(priv, priv->ioaddr, ch->index, 0, 1); + spin_unlock_irqrestore(&ch->lock, flags); __napi_schedule(&ch->tx_napi); - else - mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10)); + } } /** @@ -2624,6 +2650,14 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) if (priv->dma_cap.vlins) stmmac_enable_vlan(priv, priv->hw, STMMAC_VLAN_INSERT); + /* TBS */ + for (chan = 0; chan < tx_cnt; chan++) { + struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan]; + int enable = tx_q->tbs & STMMAC_TBS_AVAIL; + + stmmac_enable_tbs(priv, priv->ioaddr, enable, chan); + } + /* Start the ball rolling... */ stmmac_start_all_dma(priv); @@ -2681,6 +2715,16 @@ static int stmmac_open(struct net_device *dev) priv->rx_copybreak = STMMAC_RX_COPYBREAK; + /* Earlier check for TBS */ + for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) { + struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan]; + int tbs_en = priv->plat->tx_queues_cfg[chan].tbs_en; + + tx_q->tbs |= tbs_en ? STMMAC_TBS_AVAIL : 0; + if (stmmac_enable_tbs(priv, priv->ioaddr, tbs_en, chan)) + tx_q->tbs &= ~STMMAC_TBS_AVAIL; + } + ret = alloc_dma_desc_resources(priv); if (ret < 0) { netdev_err(priv->dev, "%s: DMA descriptors allocation failed\n", @@ -2829,7 +2873,11 @@ static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb, tag = skb_vlan_tag_get(skb); - p = tx_q->dma_tx + tx_q->cur_tx; + if (tx_q->tbs & STMMAC_TBS_AVAIL) + p = &tx_q->dma_entx[tx_q->cur_tx].basic; + else + p = &tx_q->dma_tx[tx_q->cur_tx]; + if (stmmac_set_desc_vlan_tag(priv, p, tag, inner_tag, inner_type)) return false; @@ -2864,7 +2912,11 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des, tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE); WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]); - desc = tx_q->dma_tx + tx_q->cur_tx; + + if (tx_q->tbs & STMMAC_TBS_AVAIL) + desc = &tx_q->dma_entx[tx_q->cur_tx].basic; + else + desc = &tx_q->dma_tx[tx_q->cur_tx]; curr_addr = des + (total_len - tmp_len); if (priv->dma_cap.addr64 <= 32) @@ -2915,13 +2967,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) { struct dma_desc *desc, *first, *mss_desc = NULL; struct stmmac_priv *priv = netdev_priv(dev); + int desc_size, tmp_pay_len = 0, first_tx; int nfrags = skb_shinfo(skb)->nr_frags; u32 queue = skb_get_queue_mapping(skb); unsigned int first_entry, tx_packets; - int tmp_pay_len = 0, first_tx; struct stmmac_tx_queue *tx_q; - u8 proto_hdr_len, hdr; bool has_vlan, set_ic; + u8 proto_hdr_len, hdr; u32 pay_len, mss; dma_addr_t des; int i; @@ -2958,7 +3010,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) /* set new MSS value if needed */ if (mss != tx_q->mss) { - mss_desc = tx_q->dma_tx + tx_q->cur_tx; + if (tx_q->tbs & STMMAC_TBS_AVAIL) + mss_desc = &tx_q->dma_entx[tx_q->cur_tx].basic; + else + mss_desc = &tx_q->dma_tx[tx_q->cur_tx]; + stmmac_set_mss(priv, mss_desc, mss); tx_q->mss = mss; tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE); @@ -2978,7 +3034,10 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) first_entry = tx_q->cur_tx; WARN_ON(tx_q->tx_skbuff[first_entry]); - desc = tx_q->dma_tx + first_entry; + if (tx_q->tbs & STMMAC_TBS_AVAIL) + desc = &tx_q->dma_entx[first_entry].basic; + else + desc = &tx_q->dma_tx[first_entry]; first = desc; if (has_vlan) @@ -3050,7 +3109,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) set_ic = false; if (set_ic) { - desc = &tx_q->dma_tx[tx_q->cur_tx]; + if (tx_q->tbs & STMMAC_TBS_AVAIL) + desc = &tx_q->dma_entx[tx_q->cur_tx].basic; + else + desc = &tx_q->dma_tx[tx_q->cur_tx]; + tx_q->tx_count_frames = 0; stmmac_set_tx_ic(priv, desc); priv->xstats.tx_set_ic_bit++; @@ -3113,16 +3176,18 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n", __func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry, tx_q->cur_tx, first, nfrags); - - stmmac_display_ring(priv, (void *)tx_q->dma_tx, DMA_TX_SIZE, 0); - pr_info(">>> frame to be transmitted: "); print_pkt(skb->data, skb_headlen(skb)); } netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); - tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc)); + if (tx_q->tbs & STMMAC_TBS_AVAIL) + desc_size = sizeof(struct dma_edesc); + else + desc_size = sizeof(struct dma_desc); + + tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size); stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); stmmac_tx_timer_arm(priv, queue); @@ -3152,10 +3217,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) u32 queue = skb_get_queue_mapping(skb); int nfrags = skb_shinfo(skb)->nr_frags; int gso = skb_shinfo(skb)->gso_type; + struct dma_edesc *tbs_desc = NULL; + int entry, desc_size, first_tx; struct dma_desc *desc, *first; struct stmmac_tx_queue *tx_q; bool has_vlan, set_ic; - int entry, first_tx; dma_addr_t des; tx_q = &priv->tx_queue[queue]; @@ -3195,6 +3261,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (likely(priv->extend_desc)) desc = (struct dma_desc *)(tx_q->dma_etx + entry); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + desc = &tx_q->dma_entx[entry].basic; else desc = tx_q->dma_tx + entry; @@ -3224,6 +3292,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (likely(priv->extend_desc)) desc = (struct dma_desc *)(tx_q->dma_etx + entry); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + desc = &tx_q->dma_entx[entry].basic; else desc = tx_q->dma_tx + entry; @@ -3270,6 +3340,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (set_ic) { if (likely(priv->extend_desc)) desc = &tx_q->dma_etx[entry].basic; + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + desc = &tx_q->dma_entx[entry].basic; else desc = &tx_q->dma_tx[entry]; @@ -3287,20 +3359,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) tx_q->cur_tx = entry; if (netif_msg_pktdata(priv)) { - void *tx_head; - netdev_dbg(priv->dev, "%s: curr=%d dirty=%d f=%d, e=%d, first=%p, nfrags=%d", __func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry, entry, first, nfrags); - if (priv->extend_desc) - tx_head = (void *)tx_q->dma_etx; - else - tx_head = (void *)tx_q->dma_tx; - - stmmac_display_ring(priv, tx_head, DMA_TX_SIZE, false); - netdev_dbg(priv->dev, ">>> frame to be transmitted: "); print_pkt(skb->data, skb->len); } @@ -3346,12 +3409,19 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* Prepare the first descriptor setting the OWN bit too */ stmmac_prepare_tx_desc(priv, first, 1, nopaged_len, - csum_insertion, priv->mode, 1, last_segment, + csum_insertion, priv->mode, 0, last_segment, skb->len); - } else { - stmmac_set_tx_owner(priv, first); } + if (tx_q->tbs & STMMAC_TBS_EN) { + struct timespec64 ts = ns_to_timespec64(skb->tstamp); + + tbs_desc = &tx_q->dma_entx[first_entry]; + stmmac_set_desc_tbs(priv, tbs_desc, ts.tv_sec, ts.tv_nsec); + } + + stmmac_set_tx_owner(priv, first); + /* The own bit must be the latest setting done when prepare the * descriptor and then barrier is needed to make sure that * all is coherent before granting the DMA engine. @@ -3362,7 +3432,14 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_enable_dma_transmission(priv, priv->ioaddr); - tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc)); + if (likely(priv->extend_desc)) + desc_size = sizeof(struct dma_extended_desc); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + desc_size = sizeof(struct dma_edesc); + else + desc_size = sizeof(struct dma_desc); + + tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size); stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); stmmac_tx_timer_arm(priv, queue); @@ -3751,8 +3828,14 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget) priv->xstats.napi_poll++; work_done = stmmac_rx(priv, budget, chan); - if (work_done < budget && napi_complete_done(napi, work_done)) - stmmac_enable_dma_irq(priv, priv->ioaddr, chan); + if (work_done < budget && napi_complete_done(napi, work_done)) { + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 1, 0); + spin_unlock_irqrestore(&ch->lock, flags); + } + return work_done; } @@ -3761,7 +3844,6 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) struct stmmac_channel *ch = container_of(napi, struct stmmac_channel, tx_napi); struct stmmac_priv *priv = ch->priv_data; - struct stmmac_tx_queue *tx_q; u32 chan = ch->index; int work_done; @@ -3770,15 +3852,12 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan); work_done = min(work_done, budget); - if (work_done < budget) - napi_complete_done(napi, work_done); + if (work_done < budget && napi_complete_done(napi, work_done)) { + unsigned long flags; - /* Force transmission restart */ - tx_q = &priv->tx_queue[chan]; - if (tx_q->cur_tx != tx_q->dirty_tx) { - stmmac_enable_dma_transmission(priv, priv->ioaddr); - stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, - chan); + spin_lock_irqsave(&ch->lock, flags); + stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 0, 1); + spin_unlock_irqrestore(&ch->lock, flags); } return work_done; @@ -3792,7 +3871,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) * netdev structure and arrange for the device to be reset to a sane state * in order to transmit a new packet. */ -static void stmmac_tx_timeout(struct net_device *dev) +static void stmmac_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct stmmac_priv *priv = netdev_priv(dev); @@ -4078,6 +4157,10 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type, priv, priv, true); case TC_SETUP_QDISC_CBS: return stmmac_tc_setup_cbs(priv, priv, type_data); + case TC_SETUP_QDISC_TAPRIO: + return stmmac_tc_setup_taprio(priv, priv, type_data); + case TC_SETUP_QDISC_ETF: + return stmmac_tc_setup_etf(priv, priv, type_data); default: return -EOPNOTSUPP; } @@ -4181,7 +4264,7 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v) seq_printf(seq, "Extended descriptor ring:\n"); sysfs_display_ring((void *)tx_q->dma_etx, DMA_TX_SIZE, 1, seq); - } else { + } else if (!(tx_q->tbs & STMMAC_TBS_AVAIL)) { seq_printf(seq, "Descriptor ring:\n"); sysfs_display_ring((void *)tx_q->dma_tx, DMA_TX_SIZE, 0, seq); @@ -4250,9 +4333,44 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v) priv->dma_cap.number_rx_channel); seq_printf(seq, "\tNumber of Additional TX channel: %d\n", priv->dma_cap.number_tx_channel); + seq_printf(seq, "\tNumber of Additional RX queues: %d\n", + priv->dma_cap.number_rx_queues); + seq_printf(seq, "\tNumber of Additional TX queues: %d\n", + priv->dma_cap.number_tx_queues); seq_printf(seq, "\tEnhanced descriptors: %s\n", (priv->dma_cap.enh_desc) ? "Y" : "N"); - + seq_printf(seq, "\tTX Fifo Size: %d\n", priv->dma_cap.tx_fifo_size); + seq_printf(seq, "\tRX Fifo Size: %d\n", priv->dma_cap.rx_fifo_size); + seq_printf(seq, "\tHash Table Size: %d\n", priv->dma_cap.hash_tb_sz); + seq_printf(seq, "\tTSO: %s\n", priv->dma_cap.tsoen ? "Y" : "N"); + seq_printf(seq, "\tNumber of PPS Outputs: %d\n", + priv->dma_cap.pps_out_num); + seq_printf(seq, "\tSafety Features: %s\n", + priv->dma_cap.asp ? "Y" : "N"); + seq_printf(seq, "\tFlexible RX Parser: %s\n", + priv->dma_cap.frpsel ? "Y" : "N"); + seq_printf(seq, "\tEnhanced Addressing: %d\n", + priv->dma_cap.addr64); + seq_printf(seq, "\tReceive Side Scaling: %s\n", + priv->dma_cap.rssen ? "Y" : "N"); + seq_printf(seq, "\tVLAN Hash Filtering: %s\n", + priv->dma_cap.vlhash ? "Y" : "N"); + seq_printf(seq, "\tSplit Header: %s\n", + priv->dma_cap.sphen ? "Y" : "N"); + seq_printf(seq, "\tVLAN TX Insertion: %s\n", + priv->dma_cap.vlins ? "Y" : "N"); + seq_printf(seq, "\tDouble VLAN: %s\n", + priv->dma_cap.dvlan ? "Y" : "N"); + seq_printf(seq, "\tNumber of L3/L4 Filters: %d\n", + priv->dma_cap.l3l4fnum); + seq_printf(seq, "\tARP Offloading: %s\n", + priv->dma_cap.arpoffsel ? "Y" : "N"); + seq_printf(seq, "\tEnhancements to Scheduled Traffic (EST): %s\n", + priv->dma_cap.estsel ? "Y" : "N"); + seq_printf(seq, "\tFrame Preemption (FPE): %s\n", + priv->dma_cap.fpesel ? "Y" : "N"); + seq_printf(seq, "\tTime-Based Scheduling (TBS): %s\n", + priv->dma_cap.tbssel ? "Y" : "N"); return 0; } DEFINE_SHOW_ATTRIBUTE(stmmac_dma_cap); @@ -4728,6 +4846,7 @@ int stmmac_dvr_probe(struct device *device, for (queue = 0; queue < maxq; queue++) { struct stmmac_channel *ch = &priv->channel[queue]; + spin_lock_init(&ch->lock); ch->priv_data = priv; ch->index = queue; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 8237dbc3e991..623521052152 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -65,7 +65,6 @@ static void common_default_data(struct plat_stmmacenet_data *plat) plat->force_sf_dma_mode = 1; plat->mdio_bus_data->needs_reset = true; - plat->mdio_bus_data->phy_mask = 0; /* Set default value for multicast hash bins */ plat->multicast_filter_bins = HASH_TABLE_SIZE; @@ -154,8 +153,6 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->tx_queues_cfg[6].weight = 0x0F; plat->tx_queues_cfg[7].weight = 0x10; - plat->mdio_bus_data->phy_mask = 0; - plat->dma_cfg->pbl = 32; plat->dma_cfg->pblx8 = true; plat->dma_cfg->fixed_burst = 0; @@ -386,8 +383,6 @@ static int snps_gmac5_default_data(struct pci_dev *pdev, plat->tso_en = 1; plat->pmt = 1; - plat->mdio_bus_data->phy_mask = 0; - /* Set default value for multicast hash bins */ plat->multicast_filter_bins = HASH_TABLE_SIZE; @@ -406,6 +401,8 @@ static int snps_gmac5_default_data(struct pci_dev *pdev, plat->tx_queues_cfg[i].use_prio = false; plat->tx_queues_cfg[i].mode_to_use = MTL_QUEUE_DCB; plat->tx_queues_cfg[i].weight = 25; + if (i > 0) + plat->tx_queues_cfg[i].tbs_en = 1; } plat->rx_sched_algorithm = MTL_RX_ALGORITHM_SP; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index 450d7dac3ea6..2aba2673d6c3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -14,6 +14,7 @@ #include <linux/phy.h> #include <linux/udp.h> #include <net/pkt_cls.h> +#include <net/pkt_sched.h> #include <net/tcp.h> #include <net/udp.h> #include <net/tc_act/tc_gact.h> @@ -50,6 +51,7 @@ struct stmmac_packet_attrs { u8 id; int sarc; u16 queue_mapping; + u64 timestamp; }; static u8 stmmac_test_next_id; @@ -208,6 +210,9 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv, skb->pkt_type = PACKET_HOST; skb->dev = priv->dev; + if (attr->timestamp) + skb->tstamp = ns_to_ktime(attr->timestamp); + return skb; } @@ -341,8 +346,7 @@ static int __stmmac_test_loopback(struct stmmac_priv *priv, goto cleanup; } - skb_set_queue_mapping(skb, attr->queue_mapping); - ret = dev_queue_xmit(skb); + ret = dev_direct_xmit(skb, attr->queue_mapping); if (ret) goto cleanup; @@ -932,8 +936,7 @@ static int __stmmac_test_vlanfilt(struct stmmac_priv *priv) goto vlan_del; } - skb_set_queue_mapping(skb, 0); - ret = dev_queue_xmit(skb); + ret = dev_direct_xmit(skb, 0); if (ret) goto vlan_del; @@ -1027,8 +1030,7 @@ static int __stmmac_test_dvlanfilt(struct stmmac_priv *priv) goto vlan_del; } - skb_set_queue_mapping(skb, 0); - ret = dev_queue_xmit(skb); + ret = dev_direct_xmit(skb, 0); if (ret) goto vlan_del; @@ -1298,8 +1300,7 @@ static int stmmac_test_vlanoff_common(struct stmmac_priv *priv, bool svlan) __vlan_hwaccel_put_tag(skb, htons(proto), tpriv->vlan_id); skb->protocol = htons(proto); - skb_set_queue_mapping(skb, 0); - ret = dev_queue_xmit(skb); + ret = dev_direct_xmit(skb, 0); if (ret) goto vlan_del; @@ -1659,8 +1660,7 @@ static int stmmac_test_arpoffload(struct stmmac_priv *priv) if (ret) goto cleanup; - skb_set_queue_mapping(skb, 0); - ret = dev_queue_xmit(skb); + ret = dev_direct_xmit(skb, 0); if (ret) goto cleanup_promisc; @@ -1748,6 +1748,68 @@ static int stmmac_test_sph(struct stmmac_priv *priv) return 0; } +static int stmmac_test_tbs(struct stmmac_priv *priv) +{ +#define STMMAC_TBS_LT_OFFSET (500 * 1000 * 1000) /* 500 ms*/ + struct stmmac_packet_attrs attr = { }; + struct tc_etf_qopt_offload qopt; + u64 start_time, curr_time = 0; + unsigned long flags; + int ret, i; + + if (!priv->hwts_tx_en) + return -EOPNOTSUPP; + + /* Find first TBS enabled Queue, if any */ + for (i = 0; i < priv->plat->tx_queues_to_use; i++) + if (priv->tx_queue[i].tbs & STMMAC_TBS_AVAIL) + break; + + if (i >= priv->plat->tx_queues_to_use) + return -EOPNOTSUPP; + + qopt.enable = true; + qopt.queue = i; + + ret = stmmac_tc_setup_etf(priv, priv, &qopt); + if (ret) + return ret; + + spin_lock_irqsave(&priv->ptp_lock, flags); + stmmac_get_systime(priv, priv->ptpaddr, &curr_time); + spin_unlock_irqrestore(&priv->ptp_lock, flags); + + if (!curr_time) { + ret = -EOPNOTSUPP; + goto fail_disable; + } + + start_time = curr_time; + curr_time += STMMAC_TBS_LT_OFFSET; + + attr.dst = priv->dev->dev_addr; + attr.timestamp = curr_time; + attr.timeout = nsecs_to_jiffies(2 * STMMAC_TBS_LT_OFFSET); + attr.queue_mapping = i; + + ret = __stmmac_test_loopback(priv, &attr); + if (ret) + goto fail_disable; + + /* Check if expected time has elapsed */ + spin_lock_irqsave(&priv->ptp_lock, flags); + stmmac_get_systime(priv, priv->ptpaddr, &curr_time); + spin_unlock_irqrestore(&priv->ptp_lock, flags); + + if ((curr_time - start_time) < STMMAC_TBS_LT_OFFSET) + ret = -EINVAL; + +fail_disable: + qopt.enable = false; + stmmac_tc_setup_etf(priv, priv, &qopt); + return ret; +} + #define STMMAC_LOOPBACK_NONE 0 #define STMMAC_LOOPBACK_MAC 1 #define STMMAC_LOOPBACK_PHY 2 @@ -1881,6 +1943,10 @@ static const struct stmmac_test { .name = "Split Header ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_sph, + }, { + .name = "TBS (ETF Scheduler) ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_tbs, }, }; @@ -1889,7 +1955,6 @@ void stmmac_selftest_run(struct net_device *dev, { struct stmmac_priv *priv = netdev_priv(dev); int count = stmmac_selftest_get_count(priv); - int carrier = netif_carrier_ok(dev); int i, ret; memset(buf, 0, sizeof(*buf) * count); @@ -1899,15 +1964,12 @@ void stmmac_selftest_run(struct net_device *dev, netdev_err(priv->dev, "Only offline tests are supported\n"); etest->flags |= ETH_TEST_FL_FAILED; return; - } else if (!carrier) { + } else if (!netif_carrier_ok(dev)) { netdev_err(priv->dev, "You need valid Link to execute tests\n"); etest->flags |= ETH_TEST_FL_FAILED; return; } - /* We don't want extra traffic */ - netif_carrier_off(dev); - /* Wait for queues drain */ msleep(200); @@ -1962,10 +2024,6 @@ void stmmac_selftest_run(struct net_device *dev, break; } } - - /* Restart everything */ - if (carrier) - netif_carrier_on(dev); } void stmmac_selftest_get_strings(struct stmmac_priv *priv, u8 *data) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 9ffae12a2122..7a01dee2f9a8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -595,9 +595,167 @@ static int tc_setup_cls(struct stmmac_priv *priv, return ret; } +static int tc_setup_taprio(struct stmmac_priv *priv, + struct tc_taprio_qopt_offload *qopt) +{ + u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep; + struct plat_stmmacenet_data *plat = priv->plat; + struct timespec64 time; + bool fpe = false; + int i, ret = 0; + u64 ctr; + + if (!priv->dma_cap.estsel) + return -EOPNOTSUPP; + + switch (wid) { + case 0x1: + wid = 16; + break; + case 0x2: + wid = 20; + break; + case 0x3: + wid = 24; + break; + default: + return -EOPNOTSUPP; + } + + switch (dep) { + case 0x1: + dep = 64; + break; + case 0x2: + dep = 128; + break; + case 0x3: + dep = 256; + break; + case 0x4: + dep = 512; + break; + case 0x5: + dep = 1024; + break; + default: + return -EOPNOTSUPP; + } + + if (!qopt->enable) + goto disable; + if (qopt->num_entries >= dep) + return -EINVAL; + if (!qopt->base_time) + return -ERANGE; + if (!qopt->cycle_time) + return -ERANGE; + + if (!plat->est) { + plat->est = devm_kzalloc(priv->device, sizeof(*plat->est), + GFP_KERNEL); + if (!plat->est) + return -ENOMEM; + } else { + memset(plat->est, 0, sizeof(*plat->est)); + } + + size = qopt->num_entries; + + priv->plat->est->gcl_size = size; + priv->plat->est->enable = qopt->enable; + + for (i = 0; i < size; i++) { + s64 delta_ns = qopt->entries[i].interval; + u32 gates = qopt->entries[i].gate_mask; + + if (delta_ns > GENMASK(wid, 0)) + return -ERANGE; + if (gates > GENMASK(31 - wid, 0)) + return -ERANGE; + + switch (qopt->entries[i].command) { + case TC_TAPRIO_CMD_SET_GATES: + if (fpe) + return -EINVAL; + break; + case TC_TAPRIO_CMD_SET_AND_HOLD: + gates |= BIT(0); + fpe = true; + break; + case TC_TAPRIO_CMD_SET_AND_RELEASE: + gates &= ~BIT(0); + fpe = true; + break; + default: + return -EOPNOTSUPP; + } + + priv->plat->est->gcl[i] = delta_ns | (gates << wid); + } + + /* Adjust for real system time */ + time = ktime_to_timespec64(qopt->base_time); + priv->plat->est->btr[0] = (u32)time.tv_nsec; + priv->plat->est->btr[1] = (u32)time.tv_sec; + + ctr = qopt->cycle_time; + priv->plat->est->ctr[0] = do_div(ctr, NSEC_PER_SEC); + priv->plat->est->ctr[1] = (u32)ctr; + + if (fpe && !priv->dma_cap.fpesel) + return -EOPNOTSUPP; + + ret = stmmac_fpe_configure(priv, priv->ioaddr, + priv->plat->tx_queues_to_use, + priv->plat->rx_queues_to_use, fpe); + if (ret && fpe) { + netdev_err(priv->dev, "failed to enable Frame Preemption\n"); + return ret; + } + + ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, + priv->plat->clk_ptp_rate); + if (ret) { + netdev_err(priv->dev, "failed to configure EST\n"); + goto disable; + } + + netdev_info(priv->dev, "configured EST\n"); + return 0; + +disable: + priv->plat->est->enable = false; + stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, + priv->plat->clk_ptp_rate); + return ret; +} + +static int tc_setup_etf(struct stmmac_priv *priv, + struct tc_etf_qopt_offload *qopt) +{ + if (!priv->dma_cap.tbssel) + return -EOPNOTSUPP; + if (qopt->queue >= priv->plat->tx_queues_to_use) + return -EINVAL; + if (!(priv->tx_queue[qopt->queue].tbs & STMMAC_TBS_AVAIL)) + return -EINVAL; + + if (qopt->enable) + priv->tx_queue[qopt->queue].tbs |= STMMAC_TBS_EN; + else + priv->tx_queue[qopt->queue].tbs &= ~STMMAC_TBS_EN; + + netdev_info(priv->dev, "%s ETF for Queue %d\n", + qopt->enable ? "enabled" : "disabled", qopt->queue); + return 0; +} + const struct stmmac_tc_ops dwmac510_tc_ops = { .init = tc_init, .setup_cls_u32 = tc_setup_cls_u32, .setup_cbs = tc_setup_cbs, .setup_cls = tc_setup_cls, + .setup_taprio = tc_setup_taprio, + .setup_etf = tc_setup_etf, }; diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index c91876f8c536..6ec9163e232c 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -2666,7 +2666,7 @@ static void cas_netpoll(struct net_device *dev) } #endif -static void cas_tx_timeout(struct net_device *dev) +static void cas_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct cas *cp = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index f5fd1f3c07cc..9a5004f674c7 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6517,7 +6517,7 @@ static void niu_reset_task(struct work_struct *work) spin_unlock_irqrestore(&np->lock, flags); } -static void niu_tx_timeout(struct net_device *dev) +static void niu_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct niu *np = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index e9b757b03b56..c5add0b45eed 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -941,7 +941,7 @@ static int bigmac_close(struct net_device *dev) return 0; } -static void bigmac_tx_timeout(struct net_device *dev) +static void bigmac_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct bigmac *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index 3e7631160384..8358064fbd48 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -970,7 +970,7 @@ static void gem_poll_controller(struct net_device *dev) } #endif -static void gem_tx_timeout(struct net_device *dev) +static void gem_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gem *gp = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index d007dfeba5c3..f0fe7bb2a750 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -2246,7 +2246,7 @@ static int happy_meal_close(struct net_device *dev) #define SXD(x) #endif -static void happy_meal_tx_timeout(struct net_device *dev) +static void happy_meal_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct happy_meal *hp = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c index 1468fa0a54e9..2102b95ec347 100644 --- a/drivers/net/ethernet/sun/sunqe.c +++ b/drivers/net/ethernet/sun/sunqe.c @@ -544,7 +544,7 @@ static void qe_tx_reclaim(struct sunqe *qep) qep->tx_old = elem; } -static void qe_tx_timeout(struct net_device *dev) +static void qe_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sunqe *qep = netdev_priv(dev); int tx_full; diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index 8b94d9ad9e2b..c23ce838ff63 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1223,7 +1223,7 @@ vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb, { struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; - struct sk_buff *segs; + struct sk_buff *segs, *curr, *next; int maclen, datalen; int status; int gso_size, gso_type, gso_segs; @@ -1282,11 +1282,8 @@ vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb, skb_reset_mac_header(skb); status = 0; - while (segs) { - struct sk_buff *curr = segs; - - segs = segs->next; - curr->next = NULL; + skb_list_walk_safe(segs, curr, next) { + skb_mark_not_on_list(curr); if (port->tso && curr->len > dev->mtu) { skb_shinfo(curr)->gso_size = gso_size; skb_shinfo(curr)->gso_type = gso_type; @@ -1539,7 +1536,7 @@ out_dropped: } EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common); -void sunvnet_tx_timeout_common(struct net_device *dev) +void sunvnet_tx_timeout_common(struct net_device *dev, unsigned int txqueue) { /* XXX Implement me XXX */ } diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h index 2b808d2482d6..5416a3cb9e7d 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.h +++ b/drivers/net/ethernet/sun/sunvnet_common.h @@ -135,7 +135,7 @@ int sunvnet_open_common(struct net_device *dev); int sunvnet_close_common(struct net_device *dev); void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp); int sunvnet_set_mac_addr_common(struct net_device *dev, void *p); -void sunvnet_tx_timeout_common(struct net_device *dev); +void sunvnet_tx_timeout_common(struct net_device *dev, unsigned int txqueue); netdev_tx_t sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, struct vnet_port *(*vnet_tx_port) diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c index a1f5a1e61040..07046a2370b3 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c @@ -689,7 +689,7 @@ static int xlgmac_close(struct net_device *netdev) return 0; } -static void xlgmac_tx_timeout(struct net_device *netdev) +static void xlgmac_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct xlgmac_pdata *pdata = netdev_priv(netdev); diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index 3a655a4dc10e..5e1b8292cd3f 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -797,7 +797,7 @@ static irqreturn_t cpmac_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static void cpmac_tx_timeout(struct net_device *dev) +static void cpmac_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct cpmac_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index 707d5eb480ce..97a058ca60ac 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -272,7 +272,7 @@ void soft_reset(const char *module, void __iomem *reg) WARN(readl_relaxed(reg) & 1, "failed to soft-reset %s\n", module); } -void cpsw_ndo_tx_timeout(struct net_device *ndev) +void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index bc726356a72c..b8d7b924ee3d 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -449,7 +449,7 @@ int cpsw_rx_poll(struct napi_struct *napi_rx, int budget); void cpsw_rx_vlan_encap(struct sk_buff *skb); void soft_reset(const char *module, void __iomem *reg); void cpsw_set_slave_mac(struct cpsw_slave *slave, struct cpsw_priv *priv); -void cpsw_ndo_tx_timeout(struct net_device *ndev); +void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue); int cpsw_need_resplit(struct cpsw_common *cpsw); int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd); int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate); diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index ae27be85e363..75d4e16c692b 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -983,7 +983,7 @@ fail_tx: * error and re-initialize the TX channel for hardware operation * */ -static void emac_dev_tx_timeout(struct net_device *ndev) +static void emac_dev_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct emac_priv *priv = netdev_priv(ndev); struct device *emac_dev = &ndev->dev; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 1b2702f74455..432645e86495 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1811,7 +1811,7 @@ out: return (ret == 0) ? 0 : err; } -static void netcp_ndo_tx_timeout(struct net_device *ndev) +static void netcp_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct netcp_intf *netcp = netdev_priv(ndev); unsigned int descs = knav_pool_count(netcp->tx_pool); diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index d6a192c1f337..fb36115e9c51 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -2533,8 +2533,6 @@ static int gbe_del_vid(void *intf_priv, int vid) } #if IS_ENABLED(CONFIG_TI_CPTS) -#define HAS_PHY_TXTSTAMP(p) ((p)->drv && (p)->drv->txtstamp) -#define HAS_PHY_RXTSTAMP(p) ((p)->drv && (p)->drv->rxtstamp) static void gbe_txtstamp(void *context, struct sk_buff *skb) { @@ -2566,7 +2564,7 @@ static int gbe_txtstamp_mark_pkt(struct gbe_intf *gbe_intf, * We mark it here because skb_tx_timestamp() is called * after all the txhooks are called. */ - if (phydev && HAS_PHY_TXTSTAMP(phydev)) { + if (phy_has_txtstamp(phydev)) { skb_shinfo(p_info->skb)->tx_flags |= SKBTX_IN_PROGRESS; return 0; } @@ -2588,7 +2586,7 @@ static int gbe_rxtstamp(struct gbe_intf *gbe_intf, struct netcp_packet *p_info) if (p_info->rxtstamp_complete) return 0; - if (phydev && HAS_PHY_RXTSTAMP(phydev)) { + if (phy_has_rxtstamp(phydev)) { p_info->rxtstamp_complete = true; return 0; } @@ -2830,7 +2828,7 @@ static int gbe_ioctl(void *intf_priv, struct ifreq *req, int cmd) struct gbe_intf *gbe_intf = intf_priv; struct phy_device *phy = gbe_intf->slave->phy; - if (!phy || !phy->drv->hwtstamp) { + if (!phy_has_hwtstamp(phy)) { switch (cmd) { case SIOCGHWTSTAMP: return gbe_hwtstamp_get(gbe_intf, req); diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index 78f0f2d59e22..ad465202980a 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -161,7 +161,7 @@ static void tlan_set_multicast_list(struct net_device *); static int tlan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int tlan_probe1(struct pci_dev *pdev, long ioaddr, int irq, int rev, const struct pci_device_id *ent); -static void tlan_tx_timeout(struct net_device *dev); +static void tlan_tx_timeout(struct net_device *dev, unsigned int txqueue); static void tlan_tx_timeout_work(struct work_struct *work); static int tlan_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); @@ -997,7 +997,7 @@ static int tlan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) * **************************************************************/ -static void tlan_tx_timeout(struct net_device *dev) +static void tlan_tx_timeout(struct net_device *dev, unsigned int txqueue) { TLAN_DBG(TLAN_DEBUG_GNRL, "%s: Transmit timed out.\n", dev->name); @@ -1028,7 +1028,7 @@ static void tlan_tx_timeout_work(struct work_struct *work) struct tlan_priv *priv = container_of(work, struct tlan_priv, tlan_tqueue); - tlan_tx_timeout(priv->dev); + tlan_tx_timeout(priv->dev, UINT_MAX); } diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 9d9f8acb7ee3..070dd6fa9401 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1405,7 +1405,7 @@ out: * * called, if tx hangs. Schedules a task that resets the interface */ -void gelic_net_tx_timeout(struct net_device *netdev) +void gelic_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct gelic_card *card; diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h index 051033580f0a..805903dbddcc 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h @@ -359,7 +359,7 @@ int gelic_net_open(struct net_device *netdev); int gelic_net_stop(struct net_device *netdev); netdev_tx_t gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev); void gelic_net_set_multi(struct net_device *netdev); -void gelic_net_tx_timeout(struct net_device *netdev); +void gelic_net_tx_timeout(struct net_device *netdev, unsigned int txqueue); int gelic_net_setup_netdev(struct net_device *netdev, struct gelic_card *card); /* shared ethtool ops */ diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index 538e70810d3d..6576271642c1 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -2180,7 +2180,7 @@ out: * called, if tx hangs. Schedules a task that resets the interface */ static void -spider_net_tx_timeout(struct net_device *netdev) +spider_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct spider_net_card *card; diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 12466a72cefc..708de826200e 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -483,7 +483,7 @@ static void tc35815_txdone(struct net_device *dev); static int tc35815_close(struct net_device *dev); static struct net_device_stats *tc35815_get_stats(struct net_device *dev); static void tc35815_set_multicast_list(struct net_device *dev); -static void tc35815_tx_timeout(struct net_device *dev); +static void tc35815_tx_timeout(struct net_device *dev, unsigned int txqueue); static int tc35815_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); #ifdef CONFIG_NET_POLL_CONTROLLER static void tc35815_poll_controller(struct net_device *dev); @@ -1189,7 +1189,7 @@ static void tc35815_schedule_restart(struct net_device *dev) spin_unlock_irqrestore(&lp->lock, flags); } -static void tc35815_tx_timeout(struct net_device *dev) +static void tc35815_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct tc35815_regs __iomem *tr = (struct tc35815_regs __iomem *)dev->base_addr; diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index ed12dbd156f0..803247d51fe9 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -506,7 +506,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int rhine_open(struct net_device *dev); static void rhine_reset_task(struct work_struct *work); static void rhine_slow_event_task(struct work_struct *work); -static void rhine_tx_timeout(struct net_device *dev); +static void rhine_tx_timeout(struct net_device *dev, unsigned int txqueue); static netdev_tx_t rhine_start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t rhine_interrupt(int irq, void *dev_instance); @@ -1761,7 +1761,7 @@ out_unlock: mutex_unlock(&rp->task_lock); } -static void rhine_tx_timeout(struct net_device *dev) +static void rhine_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct rhine_private *rp = netdev_priv(dev); void __iomem *ioaddr = rp->base; diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 346e44115c4e..4b556b74541a 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -3257,12 +3257,16 @@ static struct platform_driver velocity_platform_driver = { * @dev: network device * * Called before an ethtool operation. We need to make sure the - * chip is out of D3 state before we poke at it. + * chip is out of D3 state before we poke at it. In case of ethtool + * ops nesting, only wake the device up in the outermost block. */ static int velocity_ethtool_up(struct net_device *dev) { struct velocity_info *vptr = netdev_priv(dev); - if (!netif_running(dev)) + + if (vptr->ethtool_ops_nesting == U32_MAX) + return -EBUSY; + if (!vptr->ethtool_ops_nesting++ && !netif_running(dev)) velocity_set_power_state(vptr, PCI_D0); return 0; } @@ -3272,12 +3276,14 @@ static int velocity_ethtool_up(struct net_device *dev) * @dev: network device * * Called after an ethtool operation. Restore the chip back to D3 - * state if it isn't running. + * state if it isn't running. In case of ethtool ops nesting, only + * put the device to sleep in the outermost block. */ static void velocity_ethtool_down(struct net_device *dev) { struct velocity_info *vptr = netdev_priv(dev); - if (!netif_running(dev)) + + if (!--vptr->ethtool_ops_nesting && !netif_running(dev)) velocity_set_power_state(vptr, PCI_D3hot); } diff --git a/drivers/net/ethernet/via/via-velocity.h b/drivers/net/ethernet/via/via-velocity.h index cdfe7809e3c1..f196e71d2c04 100644 --- a/drivers/net/ethernet/via/via-velocity.h +++ b/drivers/net/ethernet/via/via-velocity.h @@ -1483,6 +1483,7 @@ struct velocity_info { struct velocity_context context; u32 ticks; + u32 ethtool_ops_nesting; u8 rev_id; diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index bede1ff289c5..c0d181a7f83a 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -790,7 +790,7 @@ static void w5100_restart_work(struct work_struct *work) w5100_restart(priv->ndev); } -static void w5100_tx_timeout(struct net_device *ndev) +static void w5100_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct w5100_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c index 6ba2747779ce..46aae30c4636 100644 --- a/drivers/net/ethernet/wiznet/w5300.c +++ b/drivers/net/ethernet/wiznet/w5300.c @@ -341,7 +341,7 @@ static void w5300_get_regs(struct net_device *ndev, } } -static void w5300_tx_timeout(struct net_device *ndev) +static void w5300_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct w5300_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 0de52e70abcc..0c26f5bcc523 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -521,7 +521,7 @@ static int xemaclite_set_mac_address(struct net_device *dev, void *address) * * This function is called when Tx time out occurs for Emaclite device. */ -static void xemaclite_tx_timeout(struct net_device *dev) +static void xemaclite_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct net_local *lp = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index fd5288ff53b5..480ab7251515 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -288,7 +288,7 @@ struct local_info { */ static netdev_tx_t do_start_xmit(struct sk_buff *skb, struct net_device *dev); -static void xirc_tx_timeout(struct net_device *dev); +static void xirc_tx_timeout(struct net_device *dev, unsigned int txqueue); static void xirc2ps_tx_timeout_task(struct work_struct *work); static void set_addresses(struct net_device *dev); static void set_multicast_list(struct net_device *dev); @@ -1203,7 +1203,7 @@ xirc2ps_tx_timeout_task(struct work_struct *work) } static void -xirc_tx_timeout(struct net_device *dev) +xirc_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct local_info *lp = netdev_priv(dev); dev->stats.tx_errors++; diff --git a/drivers/net/ethernet/xscale/Kconfig b/drivers/net/ethernet/xscale/Kconfig index cd0a8f46e7c6..98aa7b8ddb06 100644 --- a/drivers/net/ethernet/xscale/Kconfig +++ b/drivers/net/ethernet/xscale/Kconfig @@ -27,4 +27,18 @@ config IXP4XX_ETH Say Y here if you want to use built-in Ethernet ports on IXP4xx processor. +config PTP_1588_CLOCK_IXP46X + tristate "Intel IXP46x as PTP clock" + depends on IXP4XX_ETH + depends on PTP_1588_CLOCK + default y + help + This driver adds support for using the IXP46X as a PTP + clock. This clock is only useful if your PTP programs are + getting hardware time stamps on the PTP Ethernet packets + using the SO_TIMESTAMPING API. + + To compile this driver as a module, choose M here: the module + will be called ptp_ixp46x. + endif # NET_VENDOR_XSCALE diff --git a/drivers/net/ethernet/xscale/Makefile b/drivers/net/ethernet/xscale/Makefile index 794a519d07b3..607f91b1e878 100644 --- a/drivers/net/ethernet/xscale/Makefile +++ b/drivers/net/ethernet/xscale/Makefile @@ -3,4 +3,5 @@ # Makefile for the Intel XScale IXP device drivers. # -obj-$(CONFIG_IXP4XX_ETH) += ixp4xx_eth.o +obj-$(CONFIG_IXP4XX_ETH) += ixp4xx_eth.o +obj-$(CONFIG_PTP_1588_CLOCK_IXP46X) += ptp_ixp46x.o diff --git a/drivers/net/ethernet/xscale/ixp46x_ts.h b/drivers/net/ethernet/xscale/ixp46x_ts.h new file mode 100644 index 000000000000..d792130e27b0 --- /dev/null +++ b/drivers/net/ethernet/xscale/ixp46x_ts.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * PTP 1588 clock using the IXP46X + * + * Copyright (C) 2010 OMICRON electronics GmbH + */ + +#ifndef _IXP46X_TS_H_ +#define _IXP46X_TS_H_ + +#define DEFAULT_ADDEND 0xF0000029 +#define TICKS_NS_SHIFT 4 + +struct ixp46x_channel_ctl { + u32 ch_control; /* 0x40 Time Synchronization Channel Control */ + u32 ch_event; /* 0x44 Time Synchronization Channel Event */ + u32 tx_snap_lo; /* 0x48 Transmit Snapshot Low Register */ + u32 tx_snap_hi; /* 0x4C Transmit Snapshot High Register */ + u32 rx_snap_lo; /* 0x50 Receive Snapshot Low Register */ + u32 rx_snap_hi; /* 0x54 Receive Snapshot High Register */ + u32 src_uuid_lo; /* 0x58 Source UUID0 Low Register */ + u32 src_uuid_hi; /* 0x5C Sequence Identifier/Source UUID0 High */ +}; + +struct ixp46x_ts_regs { + u32 control; /* 0x00 Time Sync Control Register */ + u32 event; /* 0x04 Time Sync Event Register */ + u32 addend; /* 0x08 Time Sync Addend Register */ + u32 accum; /* 0x0C Time Sync Accumulator Register */ + u32 test; /* 0x10 Time Sync Test Register */ + u32 unused; /* 0x14 */ + u32 rsystime_lo; /* 0x18 RawSystemTime_Low Register */ + u32 rsystime_hi; /* 0x1C RawSystemTime_High Register */ + u32 systime_lo; /* 0x20 SystemTime_Low Register */ + u32 systime_hi; /* 0x24 SystemTime_High Register */ + u32 trgt_lo; /* 0x28 TargetTime_Low Register */ + u32 trgt_hi; /* 0x2C TargetTime_High Register */ + u32 asms_lo; /* 0x30 Auxiliary Slave Mode Snapshot Low */ + u32 asms_hi; /* 0x34 Auxiliary Slave Mode Snapshot High */ + u32 amms_lo; /* 0x38 Auxiliary Master Mode Snapshot Low */ + u32 amms_hi; /* 0x3C Auxiliary Master Mode Snapshot High */ + + struct ixp46x_channel_ctl channel[3]; +}; + +/* 0x00 Time Sync Control Register Bits */ +#define TSCR_AMM (1<<3) +#define TSCR_ASM (1<<2) +#define TSCR_TTM (1<<1) +#define TSCR_RST (1<<0) + +/* 0x04 Time Sync Event Register Bits */ +#define TSER_SNM (1<<3) +#define TSER_SNS (1<<2) +#define TTIPEND (1<<1) + +/* 0x40 Time Synchronization Channel Control Register Bits */ +#define MASTER_MODE (1<<0) +#define TIMESTAMP_ALL (1<<1) + +/* 0x44 Time Synchronization Channel Event Register Bits */ +#define TX_SNAPSHOT_LOCKED (1<<0) +#define RX_SNAPSHOT_LOCKED (1<<1) + +/* The ptp_ixp46x module will set this variable */ +extern int ixp46x_phc_index; + +#endif diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 6fc04ffb22c2..269596c15133 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -29,14 +29,16 @@ #include <linux/net_tstamp.h> #include <linux/of.h> #include <linux/phy.h> +#include <linux/platform_data/eth_ixp4xx.h> #include <linux/platform_device.h> #include <linux/ptp_classify.h> #include <linux/slab.h> #include <linux/module.h> -#include <mach/ixp46x_ts.h> #include <linux/soc/ixp4xx/npe.h> #include <linux/soc/ixp4xx/qmgr.h> +#include "ixp46x_ts.h" + #define DEBUG_DESC 0 #define DEBUG_RX 0 #define DEBUG_TX 0 @@ -517,25 +519,14 @@ static int ixp4xx_mdio_write(struct mii_bus *bus, int phy_id, int location, return ret; } -static int ixp4xx_mdio_register(void) +static int ixp4xx_mdio_register(struct eth_regs __iomem *regs) { int err; if (!(mdio_bus = mdiobus_alloc())) return -ENOMEM; - if (cpu_is_ixp43x()) { - /* IXP43x lacks NPE-B and uses NPE-C for MII PHY access */ - if (!(ixp4xx_read_feature_bits() & IXP4XX_FEATURE_NPEC_ETH)) - return -ENODEV; - mdio_regs = (struct eth_regs __iomem *)IXP4XX_EthC_BASE_VIRT; - } else { - /* All MII PHY accesses use NPE-B Ethernet registers */ - if (!(ixp4xx_read_feature_bits() & IXP4XX_FEATURE_NPEB_ETH0)) - return -ENODEV; - mdio_regs = (struct eth_regs __iomem *)IXP4XX_EthB_BASE_VIRT; - } - + mdio_regs = regs; __raw_writel(DEFAULT_CORE_CNTRL, &mdio_regs->core_control); spin_lock_init(&mdio_lock); mdio_bus->name = "IXP4xx MII Bus"; @@ -581,8 +572,8 @@ static void ixp4xx_adjust_link(struct net_device *dev) __raw_writel(DEFAULT_TX_CNTRL0 | TX_CNTRL0_HALFDUPLEX, &port->regs->tx_control[0]); - printk(KERN_INFO "%s: link up, speed %u Mb/s, %s duplex\n", - dev->name, port->speed, port->duplex ? "full" : "half"); + netdev_info(dev, "%s: link up, speed %u Mb/s, %s duplex\n", + dev->name, port->speed, port->duplex ? "full" : "half"); } @@ -592,7 +583,7 @@ static inline void debug_pkt(struct net_device *dev, const char *func, #if DEBUG_PKT_BYTES int i; - printk(KERN_DEBUG "%s: %s(%i) ", dev->name, func, len); + netdev_debug(dev, "%s(%i) ", func, len); for (i = 0; i < len; i++) { if (i >= DEBUG_PKT_BYTES) break; @@ -683,7 +674,7 @@ static int eth_poll(struct napi_struct *napi, int budget) int received = 0; #if DEBUG_RX - printk(KERN_DEBUG "%s: eth_poll\n", dev->name); + netdev_debug(dev, "eth_poll\n"); #endif while (received < budget) { @@ -697,23 +688,20 @@ static int eth_poll(struct napi_struct *napi, int budget) if ((n = queue_get_desc(rxq, port, 0)) < 0) { #if DEBUG_RX - printk(KERN_DEBUG "%s: eth_poll napi_complete\n", - dev->name); + netdev_debug(dev, "eth_poll napi_complete\n"); #endif napi_complete(napi); qmgr_enable_irq(rxq); if (!qmgr_stat_below_low_watermark(rxq) && napi_reschedule(napi)) { /* not empty again */ #if DEBUG_RX - printk(KERN_DEBUG "%s: eth_poll napi_reschedule succeeded\n", - dev->name); + netdev_debug(dev, "eth_poll napi_reschedule succeeded\n"); #endif qmgr_disable_irq(rxq); continue; } #if DEBUG_RX - printk(KERN_DEBUG "%s: eth_poll all done\n", - dev->name); + netdev_debug(dev, "eth_poll all done\n"); #endif return received; /* all work done */ } @@ -778,7 +766,7 @@ static int eth_poll(struct napi_struct *napi, int budget) } #if DEBUG_RX - printk(KERN_DEBUG "eth_poll(): end, not all work done\n"); + netdev_debug(dev, "eth_poll(): end, not all work done\n"); #endif return received; /* not all work done */ } @@ -842,7 +830,7 @@ static int eth_xmit(struct sk_buff *skb, struct net_device *dev) struct desc *desc; #if DEBUG_TX - printk(KERN_DEBUG "%s: eth_xmit\n", dev->name); + netdev_debug(dev, "eth_xmit\n"); #endif if (unlikely(skb->len > MAX_MRU)) { @@ -897,22 +885,21 @@ static int eth_xmit(struct sk_buff *skb, struct net_device *dev) if (qmgr_stat_below_low_watermark(txreadyq)) { /* empty */ #if DEBUG_TX - printk(KERN_DEBUG "%s: eth_xmit queue full\n", dev->name); + netdev_debug(dev, "eth_xmit queue full\n"); #endif netif_stop_queue(dev); /* we could miss TX ready interrupt */ /* really empty in fact */ if (!qmgr_stat_below_low_watermark(txreadyq)) { #if DEBUG_TX - printk(KERN_DEBUG "%s: eth_xmit ready again\n", - dev->name); + netdev_debug(dev, "eth_xmit ready again\n"); #endif netif_wake_queue(dev); } } #if DEBUG_TX - printk(KERN_DEBUG "%s: eth_xmit end\n", dev->name); + netdev_debug(dev, "eth_xmit end\n"); #endif ixp_tx_timestamp(port, skb); @@ -1099,7 +1086,7 @@ static int init_queues(struct port *port) int i; if (!ports_open) { - dma_pool = dma_pool_create(DRV_NAME, &port->netdev->dev, + dma_pool = dma_pool_create(DRV_NAME, port->netdev->dev.parent, POOL_ALLOC_SIZE, 32, 0); if (!dma_pool) return -ENOMEM; @@ -1186,8 +1173,7 @@ static int eth_open(struct net_device *dev) return err; if (npe_recv_message(npe, &msg, "ETH_GET_STATUS")) { - printk(KERN_ERR "%s: %s not responding\n", dev->name, - npe_name(npe)); + netdev_err(dev, "%s not responding\n", npe_name(npe)); return -EIO; } port->firmware[0] = msg.byte4; @@ -1299,7 +1285,7 @@ static int eth_close(struct net_device *dev) msg.eth_id = port->id; msg.byte3 = 1; if (npe_send_recv_message(port->npe, &msg, "ETH_ENABLE_LOOPBACK")) - printk(KERN_CRIT "%s: unable to enable loopback\n", dev->name); + netdev_crit(dev, "unable to enable loopback\n"); i = 0; do { /* drain RX buffers */ @@ -1323,11 +1309,11 @@ static int eth_close(struct net_device *dev) } while (++i < MAX_CLOSE_WAIT); if (buffs) - printk(KERN_CRIT "%s: unable to drain RX queue, %i buffer(s)" - " left in NPE\n", dev->name, buffs); + netdev_crit(dev, "unable to drain RX queue, %i buffer(s)" + " left in NPE\n", buffs); #if DEBUG_CLOSE if (!buffs) - printk(KERN_DEBUG "Draining RX queue took %i cycles\n", i); + netdev_debug(dev, "draining RX queue took %i cycles\n", i); #endif buffs = TX_DESCS; @@ -1343,17 +1329,16 @@ static int eth_close(struct net_device *dev) } while (++i < MAX_CLOSE_WAIT); if (buffs) - printk(KERN_CRIT "%s: unable to drain TX queue, %i buffer(s) " - "left in NPE\n", dev->name, buffs); + netdev_crit(dev, "unable to drain TX queue, %i buffer(s) " + "left in NPE\n", buffs); #if DEBUG_CLOSE if (!buffs) - printk(KERN_DEBUG "Draining TX queues took %i cycles\n", i); + netdev_debug(dev, "draining TX queues took %i cycles\n", i); #endif msg.byte3 = 0; if (npe_send_recv_message(port->npe, &msg, "ETH_DISABLE_LOOPBACK")) - printk(KERN_CRIT "%s: unable to disable loopback\n", - dev->name); + netdev_crit(dev, "unable to disable loopback\n"); phy_stop(dev->phydev); @@ -1374,54 +1359,88 @@ static const struct net_device_ops ixp4xx_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -static int eth_init_one(struct platform_device *pdev) +static int ixp4xx_eth_probe(struct platform_device *pdev) { - struct port *port; - struct net_device *dev; - struct eth_plat_info *plat = dev_get_platdata(&pdev->dev); - struct phy_device *phydev = NULL; - u32 regs_phys; char phy_id[MII_BUS_ID_SIZE + 3]; + struct phy_device *phydev = NULL; + struct device *dev = &pdev->dev; + struct eth_plat_info *plat; + resource_size_t regs_phys; + struct net_device *ndev; + struct resource *res; + struct port *port; int err; - if (!(dev = alloc_etherdev(sizeof(struct port)))) + plat = dev_get_platdata(dev); + + if (!(ndev = devm_alloc_etherdev(dev, sizeof(struct port)))) return -ENOMEM; - SET_NETDEV_DEV(dev, &pdev->dev); - port = netdev_priv(dev); - port->netdev = dev; + SET_NETDEV_DEV(ndev, dev); + port = netdev_priv(ndev); + port->netdev = ndev; port->id = pdev->id; + /* Get the port resource and remap */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + regs_phys = res->start; + port->regs = devm_ioremap_resource(dev, res); + switch (port->id) { case IXP4XX_ETH_NPEA: - port->regs = (struct eth_regs __iomem *)IXP4XX_EthA_BASE_VIRT; - regs_phys = IXP4XX_EthA_BASE_PHYS; + /* If the MDIO bus is not up yet, defer probe */ + if (!mdio_bus) + return -EPROBE_DEFER; break; case IXP4XX_ETH_NPEB: - port->regs = (struct eth_regs __iomem *)IXP4XX_EthB_BASE_VIRT; - regs_phys = IXP4XX_EthB_BASE_PHYS; + /* + * On all except IXP43x, NPE-B is used for the MDIO bus. + * If there is no NPE-B in the feature set, bail out, else + * register the MDIO bus. + */ + if (!cpu_is_ixp43x()) { + if (!(ixp4xx_read_feature_bits() & + IXP4XX_FEATURE_NPEB_ETH0)) + return -ENODEV; + /* Else register the MDIO bus on NPE-B */ + if ((err = ixp4xx_mdio_register(port->regs))) + return err; + } + if (!mdio_bus) + return -EPROBE_DEFER; break; case IXP4XX_ETH_NPEC: - port->regs = (struct eth_regs __iomem *)IXP4XX_EthC_BASE_VIRT; - regs_phys = IXP4XX_EthC_BASE_PHYS; + /* + * IXP43x lacks NPE-B and uses NPE-C for the MDIO bus access, + * of there is no NPE-C, no bus, nothing works, so bail out. + */ + if (cpu_is_ixp43x()) { + if (!(ixp4xx_read_feature_bits() & + IXP4XX_FEATURE_NPEC_ETH)) + return -ENODEV; + /* Else register the MDIO bus on NPE-C */ + if ((err = ixp4xx_mdio_register(port->regs))) + return err; + } + if (!mdio_bus) + return -EPROBE_DEFER; break; default: - err = -ENODEV; - goto err_free; + return -ENODEV; } - dev->netdev_ops = &ixp4xx_netdev_ops; - dev->ethtool_ops = &ixp4xx_ethtool_ops; - dev->tx_queue_len = 100; + ndev->netdev_ops = &ixp4xx_netdev_ops; + ndev->ethtool_ops = &ixp4xx_ethtool_ops; + ndev->tx_queue_len = 100; - netif_napi_add(dev, &port->napi, eth_poll, NAPI_WEIGHT); + netif_napi_add(ndev, &port->napi, eth_poll, NAPI_WEIGHT); - if (!(port->npe = npe_request(NPE_ID(port->id)))) { - err = -EIO; - goto err_free; - } + if (!(port->npe = npe_request(NPE_ID(port->id)))) + return -EIO; - port->mem_res = request_mem_region(regs_phys, REGS_SIZE, dev->name); + port->mem_res = request_mem_region(regs_phys, REGS_SIZE, ndev->name); if (!port->mem_res) { err = -EBUSY; goto err_npe_rel; @@ -1429,9 +1448,9 @@ static int eth_init_one(struct platform_device *pdev) port->plat = plat; npe_port_tab[NPE_ID(port->id)] = port; - memcpy(dev->dev_addr, plat->hwaddr, ETH_ALEN); + memcpy(ndev->dev_addr, plat->hwaddr, ETH_ALEN); - platform_set_drvdata(pdev, dev); + platform_set_drvdata(pdev, ndev); __raw_writel(DEFAULT_CORE_CNTRL | CORE_RESET, &port->regs->core_control); @@ -1441,7 +1460,7 @@ static int eth_init_one(struct platform_device *pdev) snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, mdio_bus->id, plat->phy); - phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link, + phydev = phy_connect(ndev, phy_id, &ixp4xx_adjust_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(phydev)) { err = PTR_ERR(phydev); @@ -1450,11 +1469,11 @@ static int eth_init_one(struct platform_device *pdev) phydev->irq = PHY_POLL; - if ((err = register_netdev(dev))) + if ((err = register_netdev(ndev))) goto err_phy_dis; - printk(KERN_INFO "%s: MII PHY %i on %s\n", dev->name, plat->phy, - npe_name(port->npe)); + netdev_info(ndev, "%s: MII PHY %i on %s\n", ndev->name, plat->phy, + npe_name(port->npe)); return 0; @@ -1465,58 +1484,32 @@ err_free_mem: release_resource(port->mem_res); err_npe_rel: npe_release(port->npe); -err_free: - free_netdev(dev); return err; } -static int eth_remove_one(struct platform_device *pdev) +static int ixp4xx_eth_remove(struct platform_device *pdev) { - struct net_device *dev = platform_get_drvdata(pdev); - struct phy_device *phydev = dev->phydev; - struct port *port = netdev_priv(dev); + struct net_device *ndev = platform_get_drvdata(pdev); + struct phy_device *phydev = ndev->phydev; + struct port *port = netdev_priv(ndev); - unregister_netdev(dev); + unregister_netdev(ndev); phy_disconnect(phydev); + ixp4xx_mdio_remove(); npe_port_tab[NPE_ID(port->id)] = NULL; npe_release(port->npe); release_resource(port->mem_res); - free_netdev(dev); return 0; } static struct platform_driver ixp4xx_eth_driver = { .driver.name = DRV_NAME, - .probe = eth_init_one, - .remove = eth_remove_one, + .probe = ixp4xx_eth_probe, + .remove = ixp4xx_eth_remove, }; - -static int __init eth_init_module(void) -{ - int err; - - /* - * FIXME: we bail out on device tree boot but this really needs - * to be fixed in a nicer way: this registers the MDIO bus before - * even matching the driver infrastructure, we should only probe - * detected hardware. - */ - if (of_have_populated_dt()) - return -ENODEV; - if ((err = ixp4xx_mdio_register())) - return err; - return platform_driver_register(&ixp4xx_eth_driver); -} - -static void __exit eth_cleanup_module(void) -{ - platform_driver_unregister(&ixp4xx_eth_driver); - ixp4xx_mdio_remove(); -} +module_platform_driver(ixp4xx_eth_driver); MODULE_AUTHOR("Krzysztof Halasa"); MODULE_DESCRIPTION("Intel IXP4xx Ethernet driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:ixp4xx_eth"); -module_init(eth_init_module); -module_exit(eth_cleanup_module); diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c new file mode 100644 index 000000000000..9ecc395239e9 --- /dev/null +++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PTP 1588 clock using the IXP46X + * + * Copyright (C) 2010 OMICRON electronics GmbH + */ +#include <linux/device.h> +#include <linux/err.h> +#include <linux/gpio.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <linux/ptp_clock_kernel.h> + +#include "ixp46x_ts.h" + +#define DRIVER "ptp_ixp46x" +#define N_EXT_TS 2 +#define MASTER_GPIO 8 +#define MASTER_IRQ 25 +#define SLAVE_GPIO 7 +#define SLAVE_IRQ 24 + +struct ixp_clock { + struct ixp46x_ts_regs *regs; + struct ptp_clock *ptp_clock; + struct ptp_clock_info caps; + int exts0_enabled; + int exts1_enabled; +}; + +DEFINE_SPINLOCK(register_lock); + +/* + * Register access functions + */ + +static u64 ixp_systime_read(struct ixp46x_ts_regs *regs) +{ + u64 ns; + u32 lo, hi; + + lo = __raw_readl(®s->systime_lo); + hi = __raw_readl(®s->systime_hi); + + ns = ((u64) hi) << 32; + ns |= lo; + ns <<= TICKS_NS_SHIFT; + + return ns; +} + +static void ixp_systime_write(struct ixp46x_ts_regs *regs, u64 ns) +{ + u32 hi, lo; + + ns >>= TICKS_NS_SHIFT; + hi = ns >> 32; + lo = ns & 0xffffffff; + + __raw_writel(lo, ®s->systime_lo); + __raw_writel(hi, ®s->systime_hi); +} + +/* + * Interrupt service routine + */ + +static irqreturn_t isr(int irq, void *priv) +{ + struct ixp_clock *ixp_clock = priv; + struct ixp46x_ts_regs *regs = ixp_clock->regs; + struct ptp_clock_event event; + u32 ack = 0, lo, hi, val; + + val = __raw_readl(®s->event); + + if (val & TSER_SNS) { + ack |= TSER_SNS; + if (ixp_clock->exts0_enabled) { + hi = __raw_readl(®s->asms_hi); + lo = __raw_readl(®s->asms_lo); + event.type = PTP_CLOCK_EXTTS; + event.index = 0; + event.timestamp = ((u64) hi) << 32; + event.timestamp |= lo; + event.timestamp <<= TICKS_NS_SHIFT; + ptp_clock_event(ixp_clock->ptp_clock, &event); + } + } + + if (val & TSER_SNM) { + ack |= TSER_SNM; + if (ixp_clock->exts1_enabled) { + hi = __raw_readl(®s->amms_hi); + lo = __raw_readl(®s->amms_lo); + event.type = PTP_CLOCK_EXTTS; + event.index = 1; + event.timestamp = ((u64) hi) << 32; + event.timestamp |= lo; + event.timestamp <<= TICKS_NS_SHIFT; + ptp_clock_event(ixp_clock->ptp_clock, &event); + } + } + + if (val & TTIPEND) + ack |= TTIPEND; /* this bit seems to be always set */ + + if (ack) { + __raw_writel(ack, ®s->event); + return IRQ_HANDLED; + } else + return IRQ_NONE; +} + +/* + * PTP clock operations + */ + +static int ptp_ixp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +{ + u64 adj; + u32 diff, addend; + int neg_adj = 0; + struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps); + struct ixp46x_ts_regs *regs = ixp_clock->regs; + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + addend = DEFAULT_ADDEND; + adj = addend; + adj *= ppb; + diff = div_u64(adj, 1000000000ULL); + + addend = neg_adj ? addend - diff : addend + diff; + + __raw_writel(addend, ®s->addend); + + return 0; +} + +static int ptp_ixp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + s64 now; + unsigned long flags; + struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps); + struct ixp46x_ts_regs *regs = ixp_clock->regs; + + spin_lock_irqsave(®ister_lock, flags); + + now = ixp_systime_read(regs); + now += delta; + ixp_systime_write(regs, now); + + spin_unlock_irqrestore(®ister_lock, flags); + + return 0; +} + +static int ptp_ixp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) +{ + u64 ns; + unsigned long flags; + struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps); + struct ixp46x_ts_regs *regs = ixp_clock->regs; + + spin_lock_irqsave(®ister_lock, flags); + + ns = ixp_systime_read(regs); + + spin_unlock_irqrestore(®ister_lock, flags); + + *ts = ns_to_timespec64(ns); + return 0; +} + +static int ptp_ixp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + u64 ns; + unsigned long flags; + struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps); + struct ixp46x_ts_regs *regs = ixp_clock->regs; + + ns = timespec64_to_ns(ts); + + spin_lock_irqsave(®ister_lock, flags); + + ixp_systime_write(regs, ns); + + spin_unlock_irqrestore(®ister_lock, flags); + + return 0; +} + +static int ptp_ixp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps); + + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + switch (rq->extts.index) { + case 0: + ixp_clock->exts0_enabled = on ? 1 : 0; + break; + case 1: + ixp_clock->exts1_enabled = on ? 1 : 0; + break; + default: + return -EINVAL; + } + return 0; + default: + break; + } + + return -EOPNOTSUPP; +} + +static const struct ptp_clock_info ptp_ixp_caps = { + .owner = THIS_MODULE, + .name = "IXP46X timer", + .max_adj = 66666655, + .n_ext_ts = N_EXT_TS, + .n_pins = 0, + .pps = 0, + .adjfreq = ptp_ixp_adjfreq, + .adjtime = ptp_ixp_adjtime, + .gettime64 = ptp_ixp_gettime, + .settime64 = ptp_ixp_settime, + .enable = ptp_ixp_enable, +}; + +/* module operations */ + +static struct ixp_clock ixp_clock; + +static int setup_interrupt(int gpio) +{ + int irq; + int err; + + err = gpio_request(gpio, "ixp4-ptp"); + if (err) + return err; + + err = gpio_direction_input(gpio); + if (err) + return err; + + irq = gpio_to_irq(gpio); + if (irq < 0) + return irq; + + err = irq_set_irq_type(irq, IRQF_TRIGGER_FALLING); + if (err) { + pr_err("cannot set trigger type for irq %d\n", irq); + return err; + } + + err = request_irq(irq, isr, 0, DRIVER, &ixp_clock); + if (err) { + pr_err("request_irq failed for irq %d\n", irq); + return err; + } + + return irq; +} + +static void __exit ptp_ixp_exit(void) +{ + free_irq(MASTER_IRQ, &ixp_clock); + free_irq(SLAVE_IRQ, &ixp_clock); + ixp46x_phc_index = -1; + ptp_clock_unregister(ixp_clock.ptp_clock); +} + +static int __init ptp_ixp_init(void) +{ + if (!cpu_is_ixp46x()) + return -ENODEV; + + ixp_clock.regs = + (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT; + + ixp_clock.caps = ptp_ixp_caps; + + ixp_clock.ptp_clock = ptp_clock_register(&ixp_clock.caps, NULL); + + if (IS_ERR(ixp_clock.ptp_clock)) + return PTR_ERR(ixp_clock.ptp_clock); + + ixp46x_phc_index = ptp_clock_index(ixp_clock.ptp_clock); + + __raw_writel(DEFAULT_ADDEND, &ixp_clock.regs->addend); + __raw_writel(1, &ixp_clock.regs->trgt_lo); + __raw_writel(0, &ixp_clock.regs->trgt_hi); + __raw_writel(TTIPEND, &ixp_clock.regs->event); + + if (MASTER_IRQ != setup_interrupt(MASTER_GPIO)) { + pr_err("failed to setup gpio %d as irq\n", MASTER_GPIO); + goto no_master; + } + if (SLAVE_IRQ != setup_interrupt(SLAVE_GPIO)) { + pr_err("failed to setup gpio %d as irq\n", SLAVE_GPIO); + goto no_slave; + } + + return 0; +no_slave: + free_irq(MASTER_IRQ, &ixp_clock); +no_master: + ptp_clock_unregister(ixp_clock.ptp_clock); + return -ENODEV; +} + +module_init(ptp_ixp_init); +module_exit(ptp_ixp_exit); + +MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>"); +MODULE_DESCRIPTION("PTP clock using the IXP46X timer"); +MODULE_LICENSE("GPL"); |