diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-16 18:09:44 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-16 18:09:44 +0100 |
commit | ce69b4019001407f9cd738dd2ba217b3a8ab831b (patch) | |
tree | 02f78838ce7298b7b1321a4dfefcf9d529a75fff | |
parent | Merge tag 'pm-6.13-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/rafa... (diff) | |
parent | netdev: avoid CFI problems with sock priv helpers (diff) | |
download | linux-ce69b4019001407f9cd738dd2ba217b3a8ab831b.tar.xz linux-ce69b4019001407f9cd738dd2ba217b3a8ab831b.zip |
Merge tag 'net-6.13-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni:
"Notably this includes fixes for a few regressions spotted very
recently. No known outstanding ones.
Current release - regressions:
- core: avoid CFI problems with sock priv helpers
- xsk: bring back busy polling support
- netpoll: ensure skb_pool list is always initialized
Current release - new code bugs:
- core: make page_pool_ref_netmem work with net iovs
- ipv4: route: fix drop reason being overridden in
ip_route_input_slow
- udp: make rehash4 independent in udp_lib_rehash()
Previous releases - regressions:
- bpf: fix bpf_sk_select_reuseport() memory leak
- openvswitch: fix lockup on tx to unregistering netdev with carrier
- mptcp: be sure to send ack when mptcp-level window re-opens
- eth:
- bnxt: always recalculate features after XDP clearing, fix
null-deref
- mlx5: fix sub-function add port error handling
- fec: handle page_pool_dev_alloc_pages error
Previous releases - always broken:
- vsock: some fixes due to transport de-assignment
- eth:
- ice: fix E825 initialization
- mlx5e: fix inversion dependency warning while enabling IPsec
tunnel
- gtp: destroy device along with udp socket's netns dismantle.
- xilinx: axienet: Fix IRQ coalescing packet count overflow"
* tag 'net-6.13-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (44 commits)
netdev: avoid CFI problems with sock priv helpers
net/mlx5e: Always start IPsec sequence number from 1
net/mlx5e: Rely on reqid in IPsec tunnel mode
net/mlx5e: Fix inversion dependency warning while enabling IPsec tunnel
net/mlx5: Clear port select structure when fail to create
net/mlx5: SF, Fix add port error handling
net/mlx5: Fix a lockdep warning as part of the write combining test
net/mlx5: Fix RDMA TX steering prio
net: make page_pool_ref_netmem work with net iovs
net: ethernet: xgbe: re-add aneg to supported features in PHY quirks
net: pcs: xpcs: actively unset DW_VR_MII_DIG_CTRL1_2G5_EN for 1G SGMII
net: pcs: xpcs: fix DW_VR_MII_DIG_CTRL1_2G5_EN bit being set for 1G SGMII w/o inband
selftests: net: Adapt ethtool mq tests to fix in qdisc graft
net: fec: handle page_pool_dev_alloc_pages error
net: netpoll: ensure skb_pool list is always initialized
net: xilinx: axienet: Fix IRQ coalescing packet count overflow
nfp: bpf: prevent integer overflow in nfp_bpf_event_output()
selftests: mptcp: avoid spurious errors on disconnect
mptcp: fix spurious wake-up on under memory pressure
mptcp: be sure to send ack when mptcp-level window re-opens
...
54 files changed, 552 insertions, 399 deletions
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 6a716337f48b..268399dfcf22 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -923,7 +923,6 @@ static void xgbe_phy_free_phy_device(struct xgbe_prv_data *pdata) static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, }; struct xgbe_phy_data *phy_data = pdata->phy_data; unsigned int phy_id = phy_data->phydev->phy_id; @@ -945,14 +944,7 @@ static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata) phy_write(phy_data->phydev, 0x04, 0x0d01); phy_write(phy_data->phydev, 0x00, 0x9140); - linkmode_set_bit_array(phy_10_100_features_array, - ARRAY_SIZE(phy_10_100_features_array), - supported); - linkmode_set_bit_array(phy_gbit_features_array, - ARRAY_SIZE(phy_gbit_features_array), - supported); - - linkmode_copy(phy_data->phydev->supported, supported); + linkmode_copy(phy_data->phydev->supported, PHY_GBIT_FEATURES); phy_support_asym_pause(phy_data->phydev); @@ -964,7 +956,6 @@ static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata) static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, }; struct xgbe_phy_data *phy_data = pdata->phy_data; struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom; unsigned int phy_id = phy_data->phydev->phy_id; @@ -1028,13 +1019,7 @@ static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata) reg = phy_read(phy_data->phydev, 0x00); phy_write(phy_data->phydev, 0x00, reg & ~0x00800); - linkmode_set_bit_array(phy_10_100_features_array, - ARRAY_SIZE(phy_10_100_features_array), - supported); - linkmode_set_bit_array(phy_gbit_features_array, - ARRAY_SIZE(phy_gbit_features_array), - supported); - linkmode_copy(phy_data->phydev->supported, supported); + linkmode_copy(phy_data->phydev->supported, PHY_GBIT_FEATURES); phy_support_asym_pause(phy_data->phydev); netif_dbg(pdata, drv, pdata->netdev, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index aeaa74f03046..b6f844cac80e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4708,7 +4708,7 @@ void bnxt_set_ring_params(struct bnxt *bp) /* Changing allocation mode of RX rings. * TODO: Update when extending xdp_rxq_info to support allocation modes. */ -int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) +static void __bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) { struct net_device *dev = bp->dev; @@ -4729,15 +4729,30 @@ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) bp->rx_skb_func = bnxt_rx_page_skb; } bp->rx_dir = DMA_BIDIRECTIONAL; - /* Disable LRO or GRO_HW */ - netdev_update_features(dev); } else { dev->max_mtu = bp->max_mtu; bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE; bp->rx_dir = DMA_FROM_DEVICE; bp->rx_skb_func = bnxt_rx_skb; } - return 0; +} + +void bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) +{ + __bnxt_set_rx_skb_mode(bp, page_mode); + + if (!page_mode) { + int rx, tx; + + bnxt_get_max_rings(bp, &rx, &tx, true); + if (rx > 1) { + bp->flags &= ~BNXT_FLAG_NO_AGG_RINGS; + bp->dev->hw_features |= NETIF_F_LRO; + } + } + + /* Update LRO and GRO_HW availability */ + netdev_update_features(bp->dev); } static void bnxt_free_vnic_attributes(struct bnxt *bp) @@ -16214,7 +16229,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (bp->max_fltr < BNXT_MAX_FLTR) bp->max_fltr = BNXT_MAX_FLTR; bnxt_init_l2_fltr_tbl(bp); - bnxt_set_rx_skb_mode(bp, false); + __bnxt_set_rx_skb_mode(bp, false); bnxt_set_tpa_flags(bp); bnxt_set_ring_params(bp); bnxt_rdma_aux_device_init(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 7df7a2233307..f11ed59203d9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2846,7 +2846,7 @@ u32 bnxt_fw_health_readl(struct bnxt *bp, int reg_idx); bool bnxt_bs_trace_avail(struct bnxt *bp, u16 type); void bnxt_set_tpa_flags(struct bnxt *bp); void bnxt_set_ring_params(struct bnxt *); -int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode); +void bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode); void bnxt_insert_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr); void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr); int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index f88b641533fc..dc51dce209d5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -422,15 +422,8 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) bnxt_set_rx_skb_mode(bp, true); xdp_features_set_redirect_target(dev, true); } else { - int rx, tx; - xdp_features_clear_redirect_target(dev); bnxt_set_rx_skb_mode(bp, false); - bnxt_get_max_rings(bp, &rx, &tx, true); - if (rx > 1) { - bp->flags &= ~BNXT_FLAG_NO_AGG_RINGS; - bp->dev->hw_features |= NETIF_F_LRO; - } } bp->tx_nr_rings_xdp = tx_xdp; bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1b55047c0237..4566848e1d7c 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1591,19 +1591,22 @@ static void fec_enet_tx(struct net_device *ndev, int budget) fec_enet_tx_queue(ndev, i, budget); } -static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq, +static int fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq, struct bufdesc *bdp, int index) { struct page *new_page; dma_addr_t phys_addr; new_page = page_pool_dev_alloc_pages(rxq->page_pool); - WARN_ON(!new_page); - rxq->rx_skb_info[index].page = new_page; + if (unlikely(!new_page)) + return -ENOMEM; + rxq->rx_skb_info[index].page = new_page; rxq->rx_skb_info[index].offset = FEC_ENET_XDP_HEADROOM; phys_addr = page_pool_get_dma_addr(new_page) + FEC_ENET_XDP_HEADROOM; bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); + + return 0; } static u32 @@ -1698,6 +1701,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) int cpu = smp_processor_id(); struct xdp_buff xdp; struct page *page; + __fec32 cbd_bufaddr; u32 sub_len = 4; #if !defined(CONFIG_M5272) @@ -1766,12 +1770,17 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) index = fec_enet_get_bd_index(bdp, &rxq->bd); page = rxq->rx_skb_info[index].page; + cbd_bufaddr = bdp->cbd_bufaddr; + if (fec_enet_update_cbd(rxq, bdp, index)) { + ndev->stats.rx_dropped++; + goto rx_processing_done; + } + dma_sync_single_for_cpu(&fep->pdev->dev, - fec32_to_cpu(bdp->cbd_bufaddr), + fec32_to_cpu(cbd_bufaddr), pkt_len, DMA_FROM_DEVICE); prefetch(page_address(page)); - fec_enet_update_cbd(rxq, bdp, index); if (xdp_prog) { xdp_buff_clear_frags_flag(&xdp); diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index ef14cff9a333..46f9726d9a8a 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1665,6 +1665,7 @@ struct ice_aqc_get_port_options_elem { #define ICE_AQC_PORT_OPT_MAX_LANE_25G 5 #define ICE_AQC_PORT_OPT_MAX_LANE_50G 6 #define ICE_AQC_PORT_OPT_MAX_LANE_100G 7 +#define ICE_AQC_PORT_OPT_MAX_LANE_200G 8 u8 global_scid[2]; u8 phy_scid[2]; diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 496d86cbd13f..532024f34ce4 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -4096,6 +4096,57 @@ ice_aq_set_port_option(struct ice_hw *hw, u8 lport, u8 lport_valid, } /** + * ice_get_phy_lane_number - Get PHY lane number for current adapter + * @hw: pointer to the hw struct + * + * Return: PHY lane number on success, negative error code otherwise. + */ +int ice_get_phy_lane_number(struct ice_hw *hw) +{ + struct ice_aqc_get_port_options_elem *options; + unsigned int lport = 0; + unsigned int lane; + int err; + + options = kcalloc(ICE_AQC_PORT_OPT_MAX, sizeof(*options), GFP_KERNEL); + if (!options) + return -ENOMEM; + + for (lane = 0; lane < ICE_MAX_PORT_PER_PCI_DEV; lane++) { + u8 options_count = ICE_AQC_PORT_OPT_MAX; + u8 speed, active_idx, pending_idx; + bool active_valid, pending_valid; + + err = ice_aq_get_port_options(hw, options, &options_count, lane, + true, &active_idx, &active_valid, + &pending_idx, &pending_valid); + if (err) + goto err; + + if (!active_valid) + continue; + + speed = options[active_idx].max_lane_speed; + /* If we don't get speed for this lane, it's unoccupied */ + if (speed > ICE_AQC_PORT_OPT_MAX_LANE_200G) + continue; + + if (hw->pf_id == lport) { + kfree(options); + return lane; + } + + lport++; + } + + /* PHY lane not found */ + err = -ENXIO; +err: + kfree(options); + return err; +} + +/** * ice_aq_sff_eeprom * @hw: pointer to the HW struct * @lport: bits [7:0] = logical port, bit [8] = logical port valid diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 27208a60cece..fe6f88cfd948 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -193,6 +193,7 @@ ice_aq_get_port_options(struct ice_hw *hw, int ice_aq_set_port_option(struct ice_hw *hw, u8 lport, u8 lport_valid, u8 new_option); +int ice_get_phy_lane_number(struct ice_hw *hw); int ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length, diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 0ab35607e5d5..89fa3d53d317 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1144,7 +1144,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, if (link_up == old_link && link_speed == old_link_speed) return 0; - ice_ptp_link_change(pf, pf->hw.pf_id, link_up); + ice_ptp_link_change(pf, link_up); if (ice_is_dcb_active(pf)) { if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) @@ -6790,7 +6790,7 @@ static int ice_up_complete(struct ice_vsi *vsi) ice_print_link_msg(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); - ice_ptp_link_change(pf, pf->hw.pf_id, true); + ice_ptp_link_change(pf, true); } /* Perform an initial read of the statistics registers now to @@ -7260,7 +7260,7 @@ int ice_down(struct ice_vsi *vsi) if (vsi->netdev) { vlan_err = ice_vsi_del_vlan_zero(vsi); - ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); + ice_ptp_link_change(vsi->back, false); netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index a999fface272..efd770dfec44 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1388,10 +1388,9 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) /** * ice_ptp_link_change - Reconfigure PTP after link status change * @pf: Board private structure - * @port: Port for which the PHY start is set * @linkup: Link is up or down */ -void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +void ice_ptp_link_change(struct ice_pf *pf, bool linkup) { struct ice_ptp_port *ptp_port; struct ice_hw *hw = &pf->hw; @@ -1399,14 +1398,7 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) if (pf->ptp.state != ICE_PTP_READY) return; - if (WARN_ON_ONCE(port >= hw->ptp.num_lports)) - return; - ptp_port = &pf->ptp.port; - if (ice_is_e825c(hw) && hw->ptp.is_2x50g_muxed_topo) - port *= 2; - if (WARN_ON_ONCE(ptp_port->port_num != port)) - return; /* Update cached link status for this port immediately */ ptp_port->link_up = linkup; @@ -3164,10 +3156,17 @@ void ice_ptp_init(struct ice_pf *pf) { struct ice_ptp *ptp = &pf->ptp; struct ice_hw *hw = &pf->hw; - int err; + int lane_num, err; ptp->state = ICE_PTP_INITIALIZING; + lane_num = ice_get_phy_lane_number(hw); + if (lane_num < 0) { + err = lane_num; + goto err_exit; + } + + ptp->port.port_num = (u8)lane_num; ice_ptp_init_hw(hw); ice_ptp_init_tx_interrupt_mode(pf); @@ -3188,10 +3187,6 @@ void ice_ptp_init(struct ice_pf *pf) if (err) goto err_exit; - ptp->port.port_num = hw->pf_id; - if (ice_is_e825c(hw) && hw->ptp.is_2x50g_muxed_topo) - ptp->port.port_num = hw->pf_id * 2; - err = ice_ptp_init_port(pf, &ptp->port); if (err) goto err_exit; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 824e73b677a4..c490d98fd9c6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -310,7 +310,7 @@ void ice_ptp_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type); void ice_ptp_init(struct ice_pf *pf); void ice_ptp_release(struct ice_pf *pf); -void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup); +void ice_ptp_link_change(struct ice_pf *pf, bool linkup); #else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) { @@ -358,7 +358,7 @@ static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf, } static inline void ice_ptp_init(struct ice_pf *pf) { } static inline void ice_ptp_release(struct ice_pf *pf) { } -static inline void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +static inline void ice_ptp_link_change(struct ice_pf *pf, bool linkup) { } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_consts.h b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h index d75f0eddd631..a8e57cf05a9c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_consts.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h @@ -131,7 +131,7 @@ struct ice_eth56g_mac_reg_cfg eth56g_mac_cfg[NUM_ICE_ETH56G_LNK_SPD] = { .rx_offset = { .serdes = 0xffffeb27, /* -10.42424 */ .no_fec = 0xffffcccd, /* -25.6 */ - .fc = 0xfffe0014, /* -255.96 */ + .fc = 0xfffc557b, /* -469.26 */ .sfd = 0x4a4, /* 2.32 */ .bs_ds = 0x32 /* 0.0969697 */ } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 518893f23372..02e84f5b1d45 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -901,30 +901,45 @@ static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw) */ /** + * ice_ptp_get_dest_dev_e825 - get destination PHY for given port number + * @hw: pointer to the HW struct + * @port: destination port + * + * Return: destination sideband queue PHY device. + */ +static enum ice_sbq_msg_dev ice_ptp_get_dest_dev_e825(struct ice_hw *hw, + u8 port) +{ + /* On a single complex E825, PHY 0 is always destination device phy_0 + * and PHY 1 is phy_0_peer. + */ + if (port >= hw->ptp.ports_per_phy) + return eth56g_phy_1; + else + return eth56g_phy_0; +} + +/** * ice_write_phy_eth56g - Write a PHY port register * @hw: pointer to the HW struct - * @phy_idx: PHY index + * @port: destination port * @addr: PHY register address * @val: Value to write * * Return: 0 on success, other error codes when failed to write to PHY */ -static int ice_write_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr, - u32 val) +static int ice_write_phy_eth56g(struct ice_hw *hw, u8 port, u32 addr, u32 val) { - struct ice_sbq_msg_input phy_msg; + struct ice_sbq_msg_input msg = { + .dest_dev = ice_ptp_get_dest_dev_e825(hw, port), + .opcode = ice_sbq_msg_wr, + .msg_addr_low = lower_16_bits(addr), + .msg_addr_high = upper_16_bits(addr), + .data = val + }; int err; - phy_msg.opcode = ice_sbq_msg_wr; - - phy_msg.msg_addr_low = lower_16_bits(addr); - phy_msg.msg_addr_high = upper_16_bits(addr); - - phy_msg.data = val; - phy_msg.dest_dev = hw->ptp.phy.eth56g.phy_addr[phy_idx]; - - err = ice_sbq_rw_reg(hw, &phy_msg, ICE_AQ_FLAG_RD); - + err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD); if (err) ice_debug(hw, ICE_DBG_PTP, "PTP failed to send msg to phy %d\n", err); @@ -935,41 +950,36 @@ static int ice_write_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr, /** * ice_read_phy_eth56g - Read a PHY port register * @hw: pointer to the HW struct - * @phy_idx: PHY index + * @port: destination port * @addr: PHY register address * @val: Value to write * * Return: 0 on success, other error codes when failed to read from PHY */ -static int ice_read_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr, - u32 *val) +static int ice_read_phy_eth56g(struct ice_hw *hw, u8 port, u32 addr, u32 *val) { - struct ice_sbq_msg_input phy_msg; + struct ice_sbq_msg_input msg = { + .dest_dev = ice_ptp_get_dest_dev_e825(hw, port), + .opcode = ice_sbq_msg_rd, + .msg_addr_low = lower_16_bits(addr), + .msg_addr_high = upper_16_bits(addr) + }; int err; - phy_msg.opcode = ice_sbq_msg_rd; - - phy_msg.msg_addr_low = lower_16_bits(addr); - phy_msg.msg_addr_high = upper_16_bits(addr); - - phy_msg.data = 0; - phy_msg.dest_dev = hw->ptp.phy.eth56g.phy_addr[phy_idx]; - - err = ice_sbq_rw_reg(hw, &phy_msg, ICE_AQ_FLAG_RD); - if (err) { + err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD); + if (err) ice_debug(hw, ICE_DBG_PTP, "PTP failed to send msg to phy %d\n", err); - return err; - } - - *val = phy_msg.data; + else + *val = msg.data; - return 0; + return err; } /** * ice_phy_res_address_eth56g - Calculate a PHY port register address - * @port: Port number to be written + * @hw: pointer to the HW struct + * @lane: Lane number to be written * @res_type: resource type (register/memory) * @offset: Offset from PHY port register base * @addr: The result address @@ -978,17 +988,19 @@ static int ice_read_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr, * * %0 - success * * %EINVAL - invalid port number or resource type */ -static int ice_phy_res_address_eth56g(u8 port, enum eth56g_res_type res_type, - u32 offset, u32 *addr) +static int ice_phy_res_address_eth56g(struct ice_hw *hw, u8 lane, + enum eth56g_res_type res_type, + u32 offset, + u32 *addr) { - u8 lane = port % ICE_PORTS_PER_QUAD; - u8 phy = ICE_GET_QUAD_NUM(port); - if (res_type >= NUM_ETH56G_PHY_RES) return -EINVAL; - *addr = eth56g_phy_res[res_type].base[phy] + + /* Lanes 4..7 are in fact 0..3 on a second PHY */ + lane %= hw->ptp.ports_per_phy; + *addr = eth56g_phy_res[res_type].base[0] + lane * eth56g_phy_res[res_type].step + offset; + return 0; } @@ -1008,19 +1020,17 @@ static int ice_phy_res_address_eth56g(u8 port, enum eth56g_res_type res_type, static int ice_write_port_eth56g(struct ice_hw *hw, u8 port, u32 offset, u32 val, enum eth56g_res_type res_type) { - u8 phy_port = port % hw->ptp.ports_per_phy; - u8 phy_idx = port / hw->ptp.ports_per_phy; u32 addr; int err; if (port >= hw->ptp.num_lports) return -EINVAL; - err = ice_phy_res_address_eth56g(phy_port, res_type, offset, &addr); + err = ice_phy_res_address_eth56g(hw, port, res_type, offset, &addr); if (err) return err; - return ice_write_phy_eth56g(hw, phy_idx, addr, val); + return ice_write_phy_eth56g(hw, port, addr, val); } /** @@ -1039,19 +1049,17 @@ static int ice_write_port_eth56g(struct ice_hw *hw, u8 port, u32 offset, static int ice_read_port_eth56g(struct ice_hw *hw, u8 port, u32 offset, u32 *val, enum eth56g_res_type res_type) { - u8 phy_port = port % hw->ptp.ports_per_phy; - u8 phy_idx = port / hw->ptp.ports_per_phy; u32 addr; int err; if (port >= hw->ptp.num_lports) return -EINVAL; - err = ice_phy_res_address_eth56g(phy_port, res_type, offset, &addr); + err = ice_phy_res_address_eth56g(hw, port, res_type, offset, &addr); if (err) return err; - return ice_read_phy_eth56g(hw, phy_idx, addr, val); + return ice_read_phy_eth56g(hw, port, addr, val); } /** @@ -1201,6 +1209,56 @@ static int ice_write_port_mem_eth56g(struct ice_hw *hw, u8 port, u16 offset, } /** + * ice_write_quad_ptp_reg_eth56g - Write a PHY quad register + * @hw: pointer to the HW struct + * @offset: PHY register offset + * @port: Port number + * @val: Value to write + * + * Return: + * * %0 - success + * * %EIO - invalid port number or resource type + * * %other - failed to write to PHY + */ +static int ice_write_quad_ptp_reg_eth56g(struct ice_hw *hw, u8 port, + u32 offset, u32 val) +{ + u32 addr; + + if (port >= hw->ptp.num_lports) + return -EIO; + + addr = eth56g_phy_res[ETH56G_PHY_REG_PTP].base[0] + offset; + + return ice_write_phy_eth56g(hw, port, addr, val); +} + +/** + * ice_read_quad_ptp_reg_eth56g - Read a PHY quad register + * @hw: pointer to the HW struct + * @offset: PHY register offset + * @port: Port number + * @val: Value to read + * + * Return: + * * %0 - success + * * %EIO - invalid port number or resource type + * * %other - failed to read from PHY + */ +static int ice_read_quad_ptp_reg_eth56g(struct ice_hw *hw, u8 port, + u32 offset, u32 *val) +{ + u32 addr; + + if (port >= hw->ptp.num_lports) + return -EIO; + + addr = eth56g_phy_res[ETH56G_PHY_REG_PTP].base[0] + offset; + + return ice_read_phy_eth56g(hw, port, addr, val); +} + +/** * ice_is_64b_phy_reg_eth56g - Check if this is a 64bit PHY register * @low_addr: the low address to check * @high_addr: on return, contains the high address of the 64bit register @@ -1919,7 +1977,6 @@ ice_phy_get_speed_eth56g(struct ice_link_status *li) */ static int ice_phy_cfg_parpcs_eth56g(struct ice_hw *hw, u8 port) { - u8 port_blk = port & ~(ICE_PORTS_PER_QUAD - 1); u32 val; int err; @@ -1934,8 +1991,8 @@ static int ice_phy_cfg_parpcs_eth56g(struct ice_hw *hw, u8 port) switch (ice_phy_get_speed_eth56g(&hw->port_info->phy.link_info)) { case ICE_ETH56G_LNK_SPD_1G: case ICE_ETH56G_LNK_SPD_2_5G: - err = ice_read_ptp_reg_eth56g(hw, port_blk, - PHY_GPCS_CONFIG_REG0, &val); + err = ice_read_quad_ptp_reg_eth56g(hw, port, + PHY_GPCS_CONFIG_REG0, &val); if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to read PHY_GPCS_CONFIG_REG0, status: %d", err); @@ -1946,8 +2003,8 @@ static int ice_phy_cfg_parpcs_eth56g(struct ice_hw *hw, u8 port) val |= FIELD_PREP(PHY_GPCS_CONFIG_REG0_TX_THR_M, ICE_ETH56G_NOMINAL_TX_THRESH); - err = ice_write_ptp_reg_eth56g(hw, port_blk, - PHY_GPCS_CONFIG_REG0, val); + err = ice_write_quad_ptp_reg_eth56g(hw, port, + PHY_GPCS_CONFIG_REG0, val); if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to write PHY_GPCS_CONFIG_REG0, status: %d", err); @@ -1988,50 +2045,47 @@ static int ice_phy_cfg_parpcs_eth56g(struct ice_hw *hw, u8 port) */ int ice_phy_cfg_ptp_1step_eth56g(struct ice_hw *hw, u8 port) { - u8 port_blk = port & ~(ICE_PORTS_PER_QUAD - 1); - u8 blk_port = port & (ICE_PORTS_PER_QUAD - 1); + u8 quad_lane = port % ICE_PORTS_PER_QUAD; + u32 addr, val, peer_delay; bool enable, sfd_ena; - u32 val, peer_delay; int err; enable = hw->ptp.phy.eth56g.onestep_ena; peer_delay = hw->ptp.phy.eth56g.peer_delay; sfd_ena = hw->ptp.phy.eth56g.sfd_ena; - /* PHY_PTP_1STEP_CONFIG */ - err = ice_read_ptp_reg_eth56g(hw, port_blk, PHY_PTP_1STEP_CONFIG, &val); + addr = PHY_PTP_1STEP_CONFIG; + err = ice_read_quad_ptp_reg_eth56g(hw, port, addr, &val); if (err) return err; if (enable) - val |= blk_port; + val |= BIT(quad_lane); else - val &= ~blk_port; + val &= ~BIT(quad_lane); val &= ~(PHY_PTP_1STEP_T1S_UP64_M | PHY_PTP_1STEP_T1S_DELTA_M); - err = ice_write_ptp_reg_eth56g(hw, port_blk, PHY_PTP_1STEP_CONFIG, val); + err = ice_write_quad_ptp_reg_eth56g(hw, port, addr, val); if (err) return err; - /* PHY_PTP_1STEP_PEER_DELAY */ + addr = PHY_PTP_1STEP_PEER_DELAY(quad_lane); val = FIELD_PREP(PHY_PTP_1STEP_PD_DELAY_M, peer_delay); if (peer_delay) val |= PHY_PTP_1STEP_PD_ADD_PD_M; val |= PHY_PTP_1STEP_PD_DLY_V_M; - err = ice_write_ptp_reg_eth56g(hw, port_blk, - PHY_PTP_1STEP_PEER_DELAY(blk_port), val); + err = ice_write_quad_ptp_reg_eth56g(hw, port, addr, val); if (err) return err; val &= ~PHY_PTP_1STEP_PD_DLY_V_M; - err = ice_write_ptp_reg_eth56g(hw, port_blk, - PHY_PTP_1STEP_PEER_DELAY(blk_port), val); + err = ice_write_quad_ptp_reg_eth56g(hw, port, addr, val); if (err) return err; - /* PHY_MAC_XIF_MODE */ - err = ice_read_mac_reg_eth56g(hw, port, PHY_MAC_XIF_MODE, &val); + addr = PHY_MAC_XIF_MODE; + err = ice_read_mac_reg_eth56g(hw, port, addr, &val); if (err) return err; @@ -2051,7 +2105,7 @@ int ice_phy_cfg_ptp_1step_eth56g(struct ice_hw *hw, u8 port) FIELD_PREP(PHY_MAC_XIF_TS_BIN_MODE_M, enable) | FIELD_PREP(PHY_MAC_XIF_TS_SFD_ENA_M, sfd_ena); - return ice_write_mac_reg_eth56g(hw, port, PHY_MAC_XIF_MODE, val); + return ice_write_mac_reg_eth56g(hw, port, addr, val); } /** @@ -2093,21 +2147,22 @@ static u32 ice_ptp_calc_bitslip_eth56g(struct ice_hw *hw, u8 port, u32 bs, bool fc, bool rs, enum ice_eth56g_link_spd spd) { - u8 port_offset = port & (ICE_PORTS_PER_QUAD - 1); - u8 port_blk = port & ~(ICE_PORTS_PER_QUAD - 1); u32 bitslip; int err; if (!bs || rs) return 0; - if (spd == ICE_ETH56G_LNK_SPD_1G || spd == ICE_ETH56G_LNK_SPD_2_5G) + if (spd == ICE_ETH56G_LNK_SPD_1G || spd == ICE_ETH56G_LNK_SPD_2_5G) { err = ice_read_gpcs_reg_eth56g(hw, port, PHY_GPCS_BITSLIP, &bitslip); - else - err = ice_read_ptp_reg_eth56g(hw, port_blk, - PHY_REG_SD_BIT_SLIP(port_offset), - &bitslip); + } else { + u8 quad_lane = port % ICE_PORTS_PER_QUAD; + u32 addr; + + addr = PHY_REG_SD_BIT_SLIP(quad_lane); + err = ice_read_quad_ptp_reg_eth56g(hw, port, addr, &bitslip); + } if (err) return 0; @@ -2667,59 +2722,29 @@ static int ice_get_phy_tx_tstamp_ready_eth56g(struct ice_hw *hw, u8 port, } /** - * ice_is_muxed_topo - detect breakout 2x50G topology for E825C - * @hw: pointer to the HW struct - * - * Return: true if it's 2x50 breakout topology, false otherwise - */ -static bool ice_is_muxed_topo(struct ice_hw *hw) -{ - u8 link_topo; - bool mux; - u32 val; - - val = rd32(hw, GLGEN_SWITCH_MODE_CONFIG); - mux = FIELD_GET(GLGEN_SWITCH_MODE_CONFIG_25X4_QUAD_M, val); - val = rd32(hw, GLGEN_MAC_LINK_TOPO); - link_topo = FIELD_GET(GLGEN_MAC_LINK_TOPO_LINK_TOPO_M, val); - - return (mux && link_topo == ICE_LINK_TOPO_UP_TO_2_LINKS); -} - -/** - * ice_ptp_init_phy_e825c - initialize PHY parameters + * ice_ptp_init_phy_e825 - initialize PHY parameters * @hw: pointer to the HW struct */ -static void ice_ptp_init_phy_e825c(struct ice_hw *hw) +static void ice_ptp_init_phy_e825(struct ice_hw *hw) { struct ice_ptp_hw *ptp = &hw->ptp; struct ice_eth56g_params *params; - u8 phy; + u32 phy_rev; + int err; ptp->phy_model = ICE_PHY_ETH56G; params = &ptp->phy.eth56g; params->onestep_ena = false; params->peer_delay = 0; params->sfd_ena = false; - params->phy_addr[0] = eth56g_phy_0; - params->phy_addr[1] = eth56g_phy_1; params->num_phys = 2; ptp->ports_per_phy = 4; ptp->num_lports = params->num_phys * ptp->ports_per_phy; ice_sb_access_ena_eth56g(hw, true); - for (phy = 0; phy < params->num_phys; phy++) { - u32 phy_rev; - int err; - - err = ice_read_phy_eth56g(hw, phy, PHY_REG_REVISION, &phy_rev); - if (err || phy_rev != PHY_REVISION_ETH56G) { - ptp->phy_model = ICE_PHY_UNSUP; - return; - } - } - - ptp->is_2x50g_muxed_topo = ice_is_muxed_topo(hw); + err = ice_read_phy_eth56g(hw, hw->pf_id, PHY_REG_REVISION, &phy_rev); + if (err || phy_rev != PHY_REVISION_ETH56G) + ptp->phy_model = ICE_PHY_UNSUP; } /* E822 family functions @@ -2738,10 +2763,9 @@ static void ice_fill_phy_msg_e82x(struct ice_hw *hw, struct ice_sbq_msg_input *msg, u8 port, u16 offset) { - int phy_port, phy, quadtype; + int phy_port, quadtype; phy_port = port % hw->ptp.ports_per_phy; - phy = port / hw->ptp.ports_per_phy; quadtype = ICE_GET_QUAD_NUM(port) % ICE_GET_QUAD_NUM(hw->ptp.ports_per_phy); @@ -2753,12 +2777,7 @@ static void ice_fill_phy_msg_e82x(struct ice_hw *hw, msg->msg_addr_high = P_Q1_H(P_4_BASE + offset, phy_port); } - if (phy == 0) - msg->dest_dev = rmn_0; - else if (phy == 1) - msg->dest_dev = rmn_1; - else - msg->dest_dev = rmn_2; + msg->dest_dev = rmn_0; } /** @@ -5478,7 +5497,7 @@ void ice_ptp_init_hw(struct ice_hw *hw) else if (ice_is_e810(hw)) ice_ptp_init_phy_e810(ptp); else if (ice_is_e825c(hw)) - ice_ptp_init_phy_e825c(hw); + ice_ptp_init_phy_e825(hw); else ptp->phy_model = ICE_PHY_UNSUP; } diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index adb168860711..4a9ef722635f 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -850,7 +850,6 @@ struct ice_mbx_data { struct ice_eth56g_params { u8 num_phys; - u8 phy_addr[2]; bool onestep_ena; bool sfd_ena; u32 peer_delay; @@ -881,7 +880,6 @@ struct ice_ptp_hw { union ice_phy_params phy; u8 num_lports; u8 ports_per_phy; - bool is_2x50g_muxed_topo; }; /* Port hardware description */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index ca92e518be76..1baf8933a07c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -724,6 +724,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, /* check esn */ if (x->props.flags & XFRM_STATE_ESN) mlx5e_ipsec_update_esn_state(sa_entry); + else + /* According to RFC4303, section "3.3.3. Sequence Number Generation", + * the first packet sent using a given SA will contain a sequence + * number of 1. + */ + sa_entry->esn_state.esn = 1; mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs); @@ -768,9 +774,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, MLX5_IPSEC_RESCHED); if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && - x->props.mode == XFRM_MODE_TUNNEL) - xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id, - MLX5E_IPSEC_TUNNEL_SA); + x->props.mode == XFRM_MODE_TUNNEL) { + xa_lock_bh(&ipsec->sadb); + __xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id, + MLX5E_IPSEC_TUNNEL_SA); + xa_unlock_bh(&ipsec->sadb); + } out: x->xso.offload_handle = (unsigned long)sa_entry; @@ -797,7 +806,6 @@ err_xfrm: static void mlx5e_xfrm_del_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; struct mlx5e_ipsec *ipsec = sa_entry->ipsec; struct mlx5e_ipsec_sa_entry *old; @@ -806,12 +814,6 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x) old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id); WARN_ON(old != sa_entry); - - if (attrs->mode == XFRM_MODE_TUNNEL && - attrs->type == XFRM_DEV_OFFLOAD_PACKET) - /* Make sure that no ARP requests are running in parallel */ - flush_workqueue(ipsec->wq); - } static void mlx5e_xfrm_free_state(struct xfrm_state *x) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index e51b03d4c717..57861d34d46f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -1718,23 +1718,21 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) goto err_alloc; } - if (attrs->family == AF_INET) - setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); - else - setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); - setup_fte_no_frags(spec); setup_fte_upper_proto_match(spec, &attrs->upspec); switch (attrs->type) { case XFRM_DEV_OFFLOAD_CRYPTO: + if (attrs->family == AF_INET) + setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + else + setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); setup_fte_spi(spec, attrs->spi, false); setup_fte_esp(spec); setup_fte_reg_a(spec); break; case XFRM_DEV_OFFLOAD_PACKET: - if (attrs->reqid) - setup_fte_reg_c4(spec, attrs->reqid); + setup_fte_reg_c4(spec, attrs->reqid); err = setup_pkt_reformat(ipsec, attrs, &flow_act); if (err) goto err_pkt_reformat; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 53cfa39188cb..820debf3fbbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -91,8 +91,9 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps); static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, - struct mlx5_accel_esp_xfrm_attrs *attrs) + struct mlx5e_ipsec_sa_entry *sa_entry) { + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; void *aso_ctx; aso_ctx = MLX5_ADDR_OF(ipsec_obj, obj, ipsec_aso); @@ -120,8 +121,12 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, * active. */ MLX5_SET(ipsec_obj, obj, aso_return_reg, MLX5_IPSEC_ASO_REG_C_4_5); - if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) + if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) { MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_INC_SN); + if (!attrs->replay_esn.trigger) + MLX5_SET(ipsec_aso, aso_ctx, mode_parameter, + sa_entry->esn_state.esn); + } if (attrs->lft.hard_packet_limit != XFRM_INF) { MLX5_SET(ipsec_aso, aso_ctx, remove_flow_pkt_cnt, @@ -175,7 +180,7 @@ static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry) res = &mdev->mlx5e_res.hw_objs; if (attrs->type == XFRM_DEV_OFFLOAD_PACKET) - mlx5e_ipsec_packet_setup(obj, res->pdn, attrs); + mlx5e_ipsec_packet_setup(obj, res->pdn, sa_entry); err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (!err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 2eabfcc247c6..0ce999706d41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2709,6 +2709,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, break; case MLX5_FLOW_NAMESPACE_RDMA_TX: root_ns = steering->rdma_tx_root_ns; + prio = RDMA_TX_BYPASS_PRIO; break; case MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS: root_ns = steering->rdma_rx_root_ns; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index ab2717012b79..39e80704b1c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -530,7 +530,7 @@ int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, set_tt_map(port_sel, hash_type); err = mlx5_lag_create_definers(ldev, hash_type, ports); if (err) - return err; + goto clear_port_sel; if (port_sel->tunnel) { err = mlx5_lag_create_inner_ttc_table(ldev); @@ -549,6 +549,8 @@ destroy_inner: mlx5_destroy_ttc_table(port_sel->inner.ttc); destroy_definers: mlx5_lag_destroy_definers(ldev); +clear_port_sel: + memset(port_sel, 0, sizeof(*port_sel)); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index a96be98be032..b96909fbeb12 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -257,6 +257,7 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, return 0; esw_err: + mlx5_sf_function_id_erase(table, sf); mlx5_sf_free(table, sf); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c index 1bed75eca97d..740b719e7072 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c @@ -382,6 +382,7 @@ err_alloc_bfreg: bool mlx5_wc_support_get(struct mlx5_core_dev *mdev) { + struct mutex *wc_state_lock = &mdev->wc_state_lock; struct mlx5_core_dev *parent = NULL; if (!MLX5_CAP_GEN(mdev, bf)) { @@ -400,32 +401,31 @@ bool mlx5_wc_support_get(struct mlx5_core_dev *mdev) */ goto out; - mutex_lock(&mdev->wc_state_lock); - - if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED) - goto unlock; - #ifdef CONFIG_MLX5_SF - if (mlx5_core_is_sf(mdev)) + if (mlx5_core_is_sf(mdev)) { parent = mdev->priv.parent_mdev; + wc_state_lock = &parent->wc_state_lock; + } #endif - if (parent) { - mutex_lock(&parent->wc_state_lock); + mutex_lock(wc_state_lock); + if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED) + goto unlock; + + if (parent) { mlx5_core_test_wc(parent); mlx5_core_dbg(mdev, "parent set wc_state=%d\n", parent->wc_state); mdev->wc_state = parent->wc_state; - mutex_unlock(&parent->wc_state_lock); + } else { + mlx5_core_test_wc(mdev); } - mlx5_core_test_wc(mdev); - unlock: - mutex_unlock(&mdev->wc_state_lock); + mutex_unlock(wc_state_lock); out: mlx5_core_dbg(mdev, "wc_state=%d\n", mdev->wc_state); diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 2dc0c6ad54be..be95336ce089 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1656,9 +1656,9 @@ static int __init mana_driver_init(void) static void __exit mana_driver_exit(void) { - debugfs_remove(mana_debugfs_root); - pci_unregister_driver(&mana_driver); + + debugfs_remove(mana_debugfs_root); } module_init(mana_driver_init); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 9d97cd281f18..c03558adda91 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -458,7 +458,8 @@ int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, map_id_full = be64_to_cpu(cbe->map_ptr); map_id = map_id_full; - if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) + if (size_add(pkt_size, data_size) > INT_MAX || + len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) return -EINVAL; if (cbe->hdr.ver != NFP_CCM_ABI_VERSION) return -EINVAL; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 739707a7b40f..8a3959bb2360 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -16,7 +16,6 @@ #include <linux/clk.h> #include <linux/delay.h> #include <linux/ethtool.h> -#include <linux/hwmon.h> #include <linux/phy.h> #include <linux/if_vlan.h> #include <linux/in.h> @@ -5347,43 +5346,6 @@ static bool rtl_aspm_is_safe(struct rtl8169_private *tp) return false; } -static umode_t r8169_hwmon_is_visible(const void *drvdata, - enum hwmon_sensor_types type, - u32 attr, int channel) -{ - return 0444; -} - -static int r8169_hwmon_read(struct device *dev, enum hwmon_sensor_types type, - u32 attr, int channel, long *val) -{ - struct rtl8169_private *tp = dev_get_drvdata(dev); - int val_raw; - - val_raw = phy_read_paged(tp->phydev, 0xbd8, 0x12) & 0x3ff; - if (val_raw >= 512) - val_raw -= 1024; - - *val = 1000 * val_raw / 2; - - return 0; -} - -static const struct hwmon_ops r8169_hwmon_ops = { - .is_visible = r8169_hwmon_is_visible, - .read = r8169_hwmon_read, -}; - -static const struct hwmon_channel_info * const r8169_hwmon_info[] = { - HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), - NULL -}; - -static const struct hwmon_chip_info r8169_hwmon_chip_info = { - .ops = &r8169_hwmon_ops, - .info = r8169_hwmon_info, -}; - static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { struct rtl8169_private *tp; @@ -5563,12 +5525,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) return rc; - /* The temperature sensor is available from RTl8125B */ - if (IS_REACHABLE(CONFIG_HWMON) && tp->mac_version >= RTL_GIGA_MAC_VER_63) - /* ignore errors */ - devm_hwmon_device_register_with_info(&pdev->dev, "nic_temp", tp, - &r8169_hwmon_chip_info, - NULL); rc = register_netdev(dev); if (rc) return rc; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index ac0f093f647a..bc395294a32d 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2763,6 +2763,7 @@ static const struct ravb_hw_info ravb_rzv2m_hw_info = { .net_features = NETIF_F_RXCSUM, .stats_len = ARRAY_SIZE(ravb_gstrings_stats), .tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3, + .tx_max_frame_size = SZ_2K, .rx_max_frame_size = SZ_2K, .rx_buffer_size = SZ_2K + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 52e4e350b734..5cc72a91f220 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -127,15 +127,15 @@ struct cpsw_ale_dev_id { static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits) { - int idx, idx2; + int idx, idx2, index; u32 hi_val = 0; idx = start / 32; idx2 = (start + bits - 1) / 32; /* Check if bits to be fetched exceed a word */ if (idx != idx2) { - idx2 = 2 - idx2; /* flip */ - hi_val = ale_entry[idx2] << ((idx2 * 32) - start); + index = 2 - idx2; /* flip */ + hi_val = ale_entry[index] << ((idx2 * 32) - start); } start -= idx * 32; idx = 2 - idx; /* flip */ @@ -145,16 +145,16 @@ static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits) static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits, u32 value) { - int idx, idx2; + int idx, idx2, index; value &= BITMASK(bits); idx = start / 32; idx2 = (start + bits - 1) / 32; /* Check if bits to be set exceed a word */ if (idx != idx2) { - idx2 = 2 - idx2; /* flip */ - ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32))); - ale_entry[idx2] |= (value >> ((idx2 * 32) - start)); + index = 2 - idx2; /* flip */ + ale_entry[index] &= ~(BITMASK(bits + start - (idx2 * 32))); + ale_entry[index] |= (value >> ((idx2 * 32) - start)); } start -= idx * 32; idx = 2 - idx; /* flip */ diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 0f4b02fe6f85..ae743991117c 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2056,6 +2056,12 @@ axienet_ethtools_set_coalesce(struct net_device *ndev, return -EBUSY; } + if (ecoalesce->rx_max_coalesced_frames > 255 || + ecoalesce->tx_max_coalesced_frames > 255) { + NL_SET_ERR_MSG(extack, "frames must be less than 256"); + return -EINVAL; + } + if (ecoalesce->rx_max_coalesced_frames) lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames; if (ecoalesce->rx_coalesce_usecs) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 89a996ad8cd0..fbabada7d3ba 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1524,8 +1524,8 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, goto out_encap; } - gn = net_generic(dev_net(dev), gtp_net_id); - list_add_rcu(>p->list, &gn->gtp_dev_list); + gn = net_generic(src_net, gtp_net_id); + list_add(>p->list, &gn->gtp_dev_list); dev->priv_destructor = gtp_destructor; netdev_dbg(dev, "registered new GTP interface\n"); @@ -1551,7 +1551,7 @@ static void gtp_dellink(struct net_device *dev, struct list_head *head) hlist_for_each_entry_safe(pctx, next, >p->tid_hash[i], hlist_tid) pdp_context_delete(pctx); - list_del_rcu(>p->list); + list_del(>p->list); unregister_netdevice_queue(dev, head); } @@ -2271,16 +2271,19 @@ static int gtp_genl_dump_pdp(struct sk_buff *skb, struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp; int i, j, bucket = cb->args[0], skip = cb->args[1]; struct net *net = sock_net(skb->sk); + struct net_device *dev; struct pdp_ctx *pctx; - struct gtp_net *gn; - - gn = net_generic(net, gtp_net_id); if (cb->args[4]) return 0; rcu_read_lock(); - list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) { + for_each_netdev_rcu(net, dev) { + if (dev->rtnl_link_ops != >p_link_ops) + continue; + + gtp = netdev_priv(dev); + if (last_gtp && last_gtp != gtp) continue; else @@ -2475,9 +2478,14 @@ static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list, list_for_each_entry(net, net_list, exit_list) { struct gtp_net *gn = net_generic(net, gtp_net_id); - struct gtp_dev *gtp; + struct gtp_dev *gtp, *gtp_next; + struct net_device *dev; + + for_each_netdev(net, dev) + if (dev->rtnl_link_ops == >p_link_ops) + gtp_dellink(dev, dev_to_kill); - list_for_each_entry(gtp, &gn->gtp_dev_list, list) + list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list) gtp_dellink(gtp->dev, dev_to_kill); } } diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 7246a910728d..3059435af596 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -684,7 +684,9 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, if (ret < 0) return ret; - mask = DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW; + val = 0; + mask = DW_VR_MII_DIG_CTRL1_2G5_EN | DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW; + if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) val = DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW; diff --git a/drivers/net/pfcp.c b/drivers/net/pfcp.c index 69434fd13f96..68d0d9e92a22 100644 --- a/drivers/net/pfcp.c +++ b/drivers/net/pfcp.c @@ -206,8 +206,8 @@ static int pfcp_newlink(struct net *net, struct net_device *dev, goto exit_del_pfcp_sock; } - pn = net_generic(dev_net(dev), pfcp_net_id); - list_add_rcu(&pfcp->list, &pn->pfcp_dev_list); + pn = net_generic(net, pfcp_net_id); + list_add(&pfcp->list, &pn->pfcp_dev_list); netdev_dbg(dev, "registered new PFCP interface\n"); @@ -224,7 +224,7 @@ static void pfcp_dellink(struct net_device *dev, struct list_head *head) { struct pfcp_dev *pfcp = netdev_priv(dev); - list_del_rcu(&pfcp->list); + list_del(&pfcp->list); unregister_netdevice_queue(dev, head); } @@ -247,11 +247,16 @@ static int __net_init pfcp_net_init(struct net *net) static void __net_exit pfcp_net_exit(struct net *net) { struct pfcp_net *pn = net_generic(net, pfcp_net_id); - struct pfcp_dev *pfcp; + struct pfcp_dev *pfcp, *pfcp_next; + struct net_device *dev; LIST_HEAD(list); rtnl_lock(); - list_for_each_entry(pfcp, &pn->pfcp_dev_list, list) + for_each_netdev(net, dev) + if (dev->rtnl_link_ops == &pfcp_link_ops) + pfcp_dellink(dev, &list); + + list_for_each_entry_safe(pfcp, pfcp_next, &pn->pfcp_dev_list, list) pfcp_dellink(pfcp->dev, &list); unregister_netdevice_many(&list); diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index c858270141bc..c39a426ebf52 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -174,12 +174,4 @@ static inline void sk_mark_napi_id_once(struct sock *sk, #endif } -static inline void sk_mark_napi_id_once_xdp(struct sock *sk, - const struct xdp_buff *xdp) -{ -#ifdef CONFIG_NET_RX_BUSY_POLL - __sk_mark_napi_id_once(sk, xdp->rxq->napi_id); -#endif -} - #endif /* _LINUX_NET_BUSY_POLL_H */ diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 793e6fd78bc5..60a5347922be 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -294,7 +294,7 @@ static inline long page_pool_unref_page(struct page *page, long nr) static inline void page_pool_ref_netmem(netmem_ref netmem) { - atomic_long_inc(&netmem_to_page(netmem)->pp_ref_count); + atomic_long_inc(netmem_get_pp_ref_count_ref(netmem)); } static inline void page_pool_ref_page(struct page *page) diff --git a/include/net/xdp.h b/include/net/xdp.h index e6770dd40c91..b5b10f2b88e5 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -62,7 +62,6 @@ struct xdp_rxq_info { u32 queue_index; u32 reg_state; struct xdp_mem_info mem; - unsigned int napi_id; u32 frag_size; } ____cacheline_aligned; /* perf critical, avoid false-sharing */ diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 40085afd9160..7a7316d9c0da 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -59,15 +59,6 @@ static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool, xp_fill_cb(pool, desc); } -static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) -{ -#ifdef CONFIG_NET_RX_BUSY_POLL - return pool->heads[0].xdp.rxq->napi_id; -#else - return 0; -#endif -} - static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { @@ -306,11 +297,6 @@ static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool, { } -static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) -{ - return 0; -} - static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { diff --git a/net/core/filter.c b/net/core/filter.c index 834614071727..2fb45a86f3dd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11251,6 +11251,7 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY; struct sock_reuseport *reuse; struct sock *selected_sk; + int err; selected_sk = map->ops->map_lookup_elem(map, key); if (!selected_sk) @@ -11258,10 +11259,6 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, reuse = rcu_dereference(selected_sk->sk_reuseport_cb); if (!reuse) { - /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ - if (sk_is_refcounted(selected_sk)) - sock_put(selected_sk); - /* reuseport_array has only sk with non NULL sk_reuseport_cb. * The only (!reuse) case here is - the sk has already been * unhashed (e.g. by close()), so treat it as -ENOENT. @@ -11269,24 +11266,33 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, * Other maps (e.g. sock_map) do not provide this guarantee and * the sk may never be in the reuseport group to begin with. */ - return is_sockarray ? -ENOENT : -EINVAL; + err = is_sockarray ? -ENOENT : -EINVAL; + goto error; } if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) { struct sock *sk = reuse_kern->sk; - if (sk->sk_protocol != selected_sk->sk_protocol) - return -EPROTOTYPE; - else if (sk->sk_family != selected_sk->sk_family) - return -EAFNOSUPPORT; - - /* Catch all. Likely bound to a different sockaddr. */ - return -EBADFD; + if (sk->sk_protocol != selected_sk->sk_protocol) { + err = -EPROTOTYPE; + } else if (sk->sk_family != selected_sk->sk_family) { + err = -EAFNOSUPPORT; + } else { + /* Catch all. Likely bound to a different sockaddr. */ + err = -EBADFD; + } + goto error; } reuse_kern->selected_sk = selected_sk; return 0; +error: + /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ + if (sk_is_refcounted(selected_sk)) + sock_put(selected_sk); + + return err; } static const struct bpf_func_proto sk_select_reuseport_proto = { diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index a89cbd8d87c3..996ac6a449eb 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -197,6 +197,16 @@ static const struct genl_multicast_group netdev_nl_mcgrps[] = { [NETDEV_NLGRP_PAGE_POOL] = { "page-pool", }, }; +static void __netdev_nl_sock_priv_init(void *priv) +{ + netdev_nl_sock_priv_init(priv); +} + +static void __netdev_nl_sock_priv_destroy(void *priv) +{ + netdev_nl_sock_priv_destroy(priv); +} + struct genl_family netdev_nl_family __ro_after_init = { .name = NETDEV_FAMILY_NAME, .version = NETDEV_FAMILY_VERSION, @@ -208,6 +218,6 @@ struct genl_family netdev_nl_family __ro_after_init = { .mcgrps = netdev_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps), .sock_priv_size = sizeof(struct list_head), - .sock_priv_init = (void *)netdev_nl_sock_priv_init, - .sock_priv_destroy = (void *)netdev_nl_sock_priv_destroy, + .sock_priv_init = __netdev_nl_sock_priv_init, + .sock_priv_destroy = __netdev_nl_sock_priv_destroy, }; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2e459b9d88eb..96a6ed37d4cc 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -627,6 +627,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) const struct net_device_ops *ops; int err; + skb_queue_head_init(&np->skb_pool); + if (ndev->priv_flags & IFF_DISABLE_NETPOLL) { np_err(np, "%s doesn't support polling, aborting\n", ndev->name); @@ -662,6 +664,9 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) strscpy(np->dev_name, ndev->name, IFNAMSIZ); npinfo->netpoll = np; + /* fill up the skb queue */ + refill_skbs(np); + /* last thing to do is link it to the net device structure */ rcu_assign_pointer(ndev->npinfo, npinfo); @@ -681,8 +686,6 @@ int netpoll_setup(struct netpoll *np) struct in_device *in_dev; int err; - skb_queue_head_init(&np->skb_pool); - rtnl_lock(); if (np->dev_name[0]) { struct net *net = current->nsproxy->net_ns; @@ -782,9 +785,6 @@ put_noaddr: } } - /* fill up the skb queue */ - refill_skbs(np); - err = __netpoll_setup(np, ndev); if (err) goto flush; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 7e23cacbe66e..4cb547fae91f 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -851,6 +851,9 @@ static ssize_t get_imix_entries(const char __user *buffer, unsigned long weight; unsigned long size; + if (pkt_dev->n_imix_entries >= MAX_IMIX_ENTRIES) + return -E2BIG; + len = num_arg(&buffer[i], max_digits, &size); if (len < 0) return len; @@ -880,9 +883,6 @@ static ssize_t get_imix_entries(const char __user *buffer, i++; pkt_dev->n_imix_entries++; - - if (pkt_dev->n_imix_entries > MAX_IMIX_ENTRIES) - return -E2BIG; } while (c == ' '); return i; diff --git a/net/core/xdp.c b/net/core/xdp.c index bcc5551c6424..2315feed94ef 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -186,7 +186,6 @@ int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, xdp_rxq_info_init(xdp_rxq); xdp_rxq->dev = dev; xdp_rxq->queue_index = queue_index; - xdp_rxq->napi_id = napi_id; xdp_rxq->frag_size = frag_size; xdp_rxq->reg_state = REG_STATE_REGISTERED; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0fbec3509618..e1564b95fab0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2445,6 +2445,7 @@ martian_destination: net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n", &daddr, &saddr, dev->name); #endif + goto out; e_nobufs: reason = SKB_DROP_REASON_NOMEM; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e8953e88efef..86d282618515 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -533,7 +533,7 @@ begin: return NULL; } -/* In hash4, rehash can happen in connect(), where hash4_cnt keeps unchanged. */ +/* udp_rehash4() only checks hslot4, and hash4_cnt is not processed. */ static void udp_rehash4(struct udp_table *udptable, struct sock *sk, u16 newhash4) { @@ -582,15 +582,13 @@ void udp_lib_hash4(struct sock *sk, u16 hash) struct net *net = sock_net(sk); struct udp_table *udptable; - /* Connected udp socket can re-connect to another remote address, - * so rehash4 is needed. + /* Connected udp socket can re-connect to another remote address, which + * will be handled by rehash. Thus no need to redo hash4 here. */ - udptable = net->ipv4.udp_table; - if (udp_hashed4(sk)) { - udp_rehash4(udptable, sk, hash); + if (udp_hashed4(sk)) return; - } + udptable = net->ipv4.udp_table; hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); hslot4 = udp_hashslot4(udptable, hash); @@ -2173,14 +2171,14 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2, *nhslot2; + hslot = udp_hashslot(udptable, sock_net(sk), + udp_sk(sk)->udp_port_hash); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); nhslot2 = udp_hashslot2(udptable, newhash); udp_sk(sk)->udp_portaddr_hash = newhash; if (hslot2 != nhslot2 || rcu_access_pointer(sk->sk_reuseport_cb)) { - hslot = udp_hashslot(udptable, sock_net(sk), - udp_sk(sk)->udp_port_hash); /* we must lock primary chain too */ spin_lock_bh(&hslot->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) @@ -2199,19 +2197,29 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) spin_unlock(&nhslot2->lock); } - if (udp_hashed4(sk)) { - udp_rehash4(udptable, sk, newhash4); + spin_unlock_bh(&hslot->lock); + } + + /* Now process hash4 if necessary: + * (1) update hslot4; + * (2) update hslot2->hash4_cnt. + * Note that hslot2/hslot4 should be checked separately, as + * either of them may change with the other unchanged. + */ + if (udp_hashed4(sk)) { + spin_lock_bh(&hslot->lock); - if (hslot2 != nhslot2) { - spin_lock(&hslot2->lock); - udp_hash4_dec(hslot2); - spin_unlock(&hslot2->lock); + udp_rehash4(udptable, sk, newhash4); + if (hslot2 != nhslot2) { + spin_lock(&hslot2->lock); + udp_hash4_dec(hslot2); + spin_unlock(&hslot2->lock); - spin_lock(&nhslot2->lock); - udp_hash4_inc(nhslot2); - spin_unlock(&nhslot2->lock); - } + spin_lock(&nhslot2->lock); + udp_hash4_inc(nhslot2); + spin_unlock(&nhslot2->lock); } + spin_unlock_bh(&hslot->lock); } } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index a62bc874bf1e..123f3f297284 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -607,7 +607,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, } opts->ext_copy.use_ack = 1; opts->suboptions = OPTION_MPTCP_DSS; - WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk)); /* Add kind/length/subtype/flag overhead if mapping is not populated */ if (dss_size == 0) @@ -1288,7 +1287,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) } MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT); } - return; + goto update_wspace; } if (rcv_wnd_new != rcv_wnd_old) { @@ -1313,6 +1312,9 @@ raise_win: th->window = htons(new_win); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED); } + +update_wspace: + WRITE_ONCE(msk->old_wspace, tp->rcv_wnd); } __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a93e661ef5c4..73526f1d768f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -760,10 +760,15 @@ static inline u64 mptcp_data_avail(const struct mptcp_sock *msk) static inline bool mptcp_epollin_ready(const struct sock *sk) { + u64 data_avail = mptcp_data_avail(mptcp_sk(sk)); + + if (!data_avail) + return false; + /* mptcp doesn't have to deal with small skbs in the receive queue, - * at it can always coalesce them + * as it can always coalesce them */ - return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) || + return (data_avail >= sk->sk_rcvlowat) || (mem_cgroup_sockets_enabled && sk->sk_memcg && mem_cgroup_under_socket_pressure(sk->sk_memcg)) || READ_ONCE(tcp_memory_pressure); diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index ef0f8f73826f..4e0842df5234 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -289,6 +289,7 @@ enum { ncsi_dev_state_config_sp = 0x0301, ncsi_dev_state_config_cis, ncsi_dev_state_config_oem_gma, + ncsi_dev_state_config_apply_mac, ncsi_dev_state_config_clear_vids, ncsi_dev_state_config_svf, ncsi_dev_state_config_ev, @@ -322,6 +323,7 @@ struct ncsi_dev_priv { #define NCSI_DEV_RESHUFFLE 4 #define NCSI_DEV_RESET 8 /* Reset state of NC */ unsigned int gma_flag; /* OEM GMA flag */ + struct sockaddr pending_mac; /* MAC address received from GMA */ spinlock_t lock; /* Protect the NCSI device */ unsigned int package_probe_id;/* Current ID during probe */ unsigned int package_num; /* Number of packages */ diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 5cf55bde366d..bf276eaf9330 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1038,7 +1038,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) : ncsi_dev_state_config_clear_vids; break; case ncsi_dev_state_config_oem_gma: - nd->state = ncsi_dev_state_config_clear_vids; + nd->state = ncsi_dev_state_config_apply_mac; nca.package = np->id; nca.channel = nc->id; @@ -1050,10 +1050,22 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) nca.type = NCSI_PKT_CMD_OEM; ret = ncsi_gma_handler(&nca, nc->version.mf_id); } - if (ret < 0) + if (ret < 0) { + nd->state = ncsi_dev_state_config_clear_vids; schedule_work(&ndp->work); + } break; + case ncsi_dev_state_config_apply_mac: + rtnl_lock(); + ret = dev_set_mac_address(dev, &ndp->pending_mac, NULL); + rtnl_unlock(); + if (ret < 0) + netdev_warn(dev, "NCSI: 'Writing MAC address to device failed\n"); + + nd->state = ncsi_dev_state_config_clear_vids; + + fallthrough; case ncsi_dev_state_config_clear_vids: case ncsi_dev_state_config_svf: case ncsi_dev_state_config_ev: diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index e28be33bdf2c..14bd66909ca4 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -628,16 +628,14 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr) static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id) { struct ncsi_dev_priv *ndp = nr->ndp; + struct sockaddr *saddr = &ndp->pending_mac; struct net_device *ndev = ndp->ndev.dev; struct ncsi_rsp_oem_pkt *rsp; - struct sockaddr saddr; u32 mac_addr_off = 0; - int ret = 0; /* Get the response header */ rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp); - saddr.sa_family = ndev->type; ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; if (mfr_id == NCSI_OEM_MFR_BCM_ID) mac_addr_off = BCM_MAC_ADDR_OFFSET; @@ -646,22 +644,17 @@ static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id) else if (mfr_id == NCSI_OEM_MFR_INTEL_ID) mac_addr_off = INTEL_MAC_ADDR_OFFSET; - memcpy(saddr.sa_data, &rsp->data[mac_addr_off], ETH_ALEN); + saddr->sa_family = ndev->type; + memcpy(saddr->sa_data, &rsp->data[mac_addr_off], ETH_ALEN); if (mfr_id == NCSI_OEM_MFR_BCM_ID || mfr_id == NCSI_OEM_MFR_INTEL_ID) - eth_addr_inc((u8 *)saddr.sa_data); - if (!is_valid_ether_addr((const u8 *)saddr.sa_data)) + eth_addr_inc((u8 *)saddr->sa_data); + if (!is_valid_ether_addr((const u8 *)saddr->sa_data)) return -ENXIO; /* Set the flag for GMA command which should only be called once */ ndp->gma_flag = 1; - rtnl_lock(); - ret = dev_set_mac_address(ndev, &saddr, NULL); - rtnl_unlock(); - if (ret < 0) - netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n"); - - return ret; + return 0; } /* Response handler for Mellanox card */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 16e260014684..704c858cf209 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -934,7 +934,9 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, { struct vport *vport = ovs_vport_rcu(dp, out_port); - if (likely(vport && netif_carrier_ok(vport->dev))) { + if (likely(vport && + netif_running(vport->dev) && + netif_carrier_ok(vport->dev))) { u16 mru = OVS_CB(skb)->mru; u32 cutlen = OVS_CB(skb)->cutlen; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 5cf8109f672a..fa9d1b49599b 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -491,6 +491,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) */ vsk->transport->release(vsk); vsock_deassign_transport(vsk); + + /* transport's release() and destruct() can touch some socket + * state, since we are reassigning the socket to a new transport + * during vsock_connect(), let's reset these fields to have a + * clean state. + */ + sock_reset_flag(sk, SOCK_DONE); + sk->sk_state = TCP_CLOSE; + vsk->peer_shutdown = 0; } /* We increase the module refcnt to prevent the transport unloading @@ -870,6 +879,9 @@ EXPORT_SYMBOL_GPL(vsock_create_connected); s64 vsock_stream_has_data(struct vsock_sock *vsk) { + if (WARN_ON(!vsk->transport)) + return 0; + return vsk->transport->stream_has_data(vsk); } EXPORT_SYMBOL_GPL(vsock_stream_has_data); @@ -878,6 +890,9 @@ s64 vsock_connectible_has_data(struct vsock_sock *vsk) { struct sock *sk = sk_vsock(vsk); + if (WARN_ON(!vsk->transport)) + return 0; + if (sk->sk_type == SOCK_SEQPACKET) return vsk->transport->seqpacket_has_data(vsk); else @@ -887,6 +902,9 @@ EXPORT_SYMBOL_GPL(vsock_connectible_has_data); s64 vsock_stream_has_space(struct vsock_sock *vsk) { + if (WARN_ON(!vsk->transport)) + return 0; + return vsk->transport->stream_has_space(vsk); } EXPORT_SYMBOL_GPL(vsock_stream_has_space); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 9acc13ab3f82..7f7de6d88096 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -26,6 +26,9 @@ /* Threshold for detecting small packets to copy */ #define GOOD_COPY_LEN 128 +static void virtio_transport_cancel_close_work(struct vsock_sock *vsk, + bool cancel_timeout); + static const struct virtio_transport * virtio_transport_get_ops(struct vsock_sock *vsk) { @@ -1109,6 +1112,8 @@ void virtio_transport_destruct(struct vsock_sock *vsk) { struct virtio_vsock_sock *vvs = vsk->trans; + virtio_transport_cancel_close_work(vsk, true); + kfree(vvs); vsk->trans = NULL; } @@ -1204,17 +1209,11 @@ static void virtio_transport_wait_close(struct sock *sk, long timeout) } } -static void virtio_transport_do_close(struct vsock_sock *vsk, - bool cancel_timeout) +static void virtio_transport_cancel_close_work(struct vsock_sock *vsk, + bool cancel_timeout) { struct sock *sk = sk_vsock(vsk); - sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; - if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = TCP_CLOSING; - sk->sk_state_change(sk); - if (vsk->close_work_scheduled && (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { vsk->close_work_scheduled = false; @@ -1226,6 +1225,20 @@ static void virtio_transport_do_close(struct vsock_sock *vsk, } } +static void virtio_transport_do_close(struct vsock_sock *vsk, + bool cancel_timeout) +{ + struct sock *sk = sk_vsock(vsk); + + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = TCP_CLOSING; + sk->sk_state_change(sk); + + virtio_transport_cancel_close_work(vsk, cancel_timeout); +} + static void virtio_transport_close_timeout(struct work_struct *work) { struct vsock_sock *vsk = @@ -1628,8 +1641,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, lock_sock(sk); - /* Check if sk has been closed before lock_sock */ - if (sock_flag(sk, SOCK_DONE)) { + /* Check if sk has been closed or assigned to another transport before + * lock_sock (note: listener sockets are not assigned to any transport) + */ + if (sock_flag(sk, SOCK_DONE) || + (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) { (void)virtio_transport_reset_no_sock(t, skb); release_sock(sk); sock_put(sk); diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index 4aa6e74ec295..f201d9eca1df 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -77,6 +77,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct sk_psock *psock; + struct vsock_sock *vsk; int copied; psock = sk_psock_get(sk); @@ -84,6 +85,13 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, return __vsock_recvmsg(sk, msg, len, flags); lock_sock(sk); + vsk = vsock_sk(sk); + + if (!vsk->transport) { + copied = -ENODEV; + goto out; + } + if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) { release_sock(sk); sk_psock_put(sk, psock); @@ -108,6 +116,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, copied = sk_msg_recvmsg(sk, psock, msg, len, flags); } +out: release_sock(sk); sk_psock_put(sk, psock); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 3fa70286c846..89d2bef96469 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -322,7 +322,6 @@ static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) return -ENOSPC; } - sk_mark_napi_id_once_xdp(&xs->sk, xdp); return 0; } @@ -908,11 +907,8 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len if (unlikely(!xs->tx)) return -ENOBUFS; - if (sk_can_busy_loop(sk)) { - if (xs->zc) - __sk_mark_napi_id_once(sk, xsk_pool_get_napi_id(xs->pool)); + if (sk_can_busy_loop(sk)) sk_busy_loop(sk, 1); /* only support non-blocking sockets */ - } if (xs->zc && xsk_no_wakeup(sk)) return 0; @@ -1298,6 +1294,14 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xs->queue_id = qid; xp_add_xsk(xs->pool, xs); + if (xs->zc && qid < dev->real_num_rx_queues) { + struct netdev_rx_queue *rxq; + + rxq = __netif_get_rx_queue(dev, qid); + if (rxq->napi) + __sk_mark_napi_id_once(sk, rxq->napi->napi_id); + } + out_unlock: if (err) { dev_put(dev); diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index d8201c4b1520..6750fdb42564 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2384,6 +2384,17 @@ def print_kernel_family_struct_src(family, cw): if not kernel_can_gen_family_struct(family): return + if 'sock-priv' in family.kernel_family: + # Generate "trampolines" to make CFI happy + cw.write_func("static void", f"__{family.c_name}_nl_sock_priv_init", + [f"{family.c_name}_nl_sock_priv_init(priv);"], + ["void *priv"]) + cw.nl() + cw.write_func("static void", f"__{family.c_name}_nl_sock_priv_destroy", + [f"{family.c_name}_nl_sock_priv_destroy(priv);"], + ["void *priv"]) + cw.nl() + cw.block_start(f"struct genl_family {family.ident_name}_nl_family __ro_after_init =") cw.p('.name\t\t= ' + family.fam_key + ',') cw.p('.version\t= ' + family.ver_key + ',') @@ -2401,9 +2412,8 @@ def print_kernel_family_struct_src(family, cw): cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.c_name}_nl_mcgrps),') if 'sock-priv' in family.kernel_family: cw.p(f'.sock_priv_size\t= sizeof({family.kernel_family["sock-priv"]}),') - # Force cast here, actual helpers take pointer to the real type. - cw.p(f'.sock_priv_init\t= (void *){family.c_name}_nl_sock_priv_init,') - cw.p(f'.sock_priv_destroy = (void *){family.c_name}_nl_sock_priv_destroy,') + cw.p(f'.sock_priv_init\t= __{family.c_name}_nl_sock_priv_init,') + cw.p(f'.sock_priv_destroy = __{family.c_name}_nl_sock_priv_destroy,') cw.block_end(';') diff --git a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh index fd13c8cfb7a8..b411fe66510f 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh @@ -58,9 +58,12 @@ for root in mq mqprio; do ethtool -L $NDEV combined 4 n_child_assert 4 "One real queue, rest default" - # Graft some - tcq replace parent 100:1 handle 204: - n_child_assert 3 "Grafted" + # Remove real one + tcq del parent 100:4 handle 204: + + # Replace default with pfifo + tcq replace parent 100:1 handle 205: pfifo limit 1000 + n_child_assert 3 "Deleting real one, replacing default one with pfifo" ethtool -L $NDEV combined 1 n_child_assert 1 "Grafted, one" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 4209b9569039..414addef9a45 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -25,6 +25,8 @@ #include <sys/types.h> #include <sys/mman.h> +#include <arpa/inet.h> + #include <netdb.h> #include <netinet/in.h> @@ -1211,23 +1213,42 @@ static void parse_setsock_options(const char *name) exit(1); } -void xdisconnect(int fd, int addrlen) +void xdisconnect(int fd) { - struct sockaddr_storage empty; + socklen_t addrlen = sizeof(struct sockaddr_storage); + struct sockaddr_storage addr, empty; int msec_sleep = 10; - int queued = 1; - int i; + void *raw_addr; + int i, cmdlen; + char cmd[128]; + + /* get the local address and convert it to string */ + if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0) + xerror("getsockname"); + + if (addr.ss_family == AF_INET) + raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr); + else if (addr.ss_family == AF_INET6) + raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr); + else + xerror("bad family"); + + strcpy(cmd, "ss -M | grep -q "); + cmdlen = strlen(cmd); + if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen], + sizeof(cmd) - cmdlen)) + xerror("inet_ntop"); shutdown(fd, SHUT_WR); - /* while until the pending data is completely flushed, the later + /* + * wait until the pending data is completely flushed and all + * the MPTCP sockets reached the closed status. * disconnect will bypass/ignore/drop any pending data. */ for (i = 0; ; i += msec_sleep) { - if (ioctl(fd, SIOCOUTQ, &queued) < 0) - xerror("can't query out socket queue: %d", errno); - - if (!queued) + /* closed socket are not listed by 'ss' */ + if (system(cmd) != 0) break; if (i > poll_timeout) @@ -1281,9 +1302,9 @@ again: return ret; if (cfg_truncate > 0) { - xdisconnect(fd, peer->ai_addrlen); + xdisconnect(fd); } else if (--cfg_repeat > 0) { - xdisconnect(fd, peer->ai_addrlen); + xdisconnect(fd); /* the socket could be unblocking at this point, we need the * connect to be blocking |