summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-16 18:09:44 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-16 18:09:44 +0100
commitce69b4019001407f9cd738dd2ba217b3a8ab831b (patch)
tree02f78838ce7298b7b1321a4dfefcf9d529a75fff
parentMerge tag 'pm-6.13-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/rafa... (diff)
parentnetdev: avoid CFI problems with sock priv helpers (diff)
downloadlinux-ce69b4019001407f9cd738dd2ba217b3a8ab831b.tar.xz
linux-ce69b4019001407f9cd738dd2ba217b3a8ab831b.zip
Merge tag 'net-6.13-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni: "Notably this includes fixes for a few regressions spotted very recently. No known outstanding ones. Current release - regressions: - core: avoid CFI problems with sock priv helpers - xsk: bring back busy polling support - netpoll: ensure skb_pool list is always initialized Current release - new code bugs: - core: make page_pool_ref_netmem work with net iovs - ipv4: route: fix drop reason being overridden in ip_route_input_slow - udp: make rehash4 independent in udp_lib_rehash() Previous releases - regressions: - bpf: fix bpf_sk_select_reuseport() memory leak - openvswitch: fix lockup on tx to unregistering netdev with carrier - mptcp: be sure to send ack when mptcp-level window re-opens - eth: - bnxt: always recalculate features after XDP clearing, fix null-deref - mlx5: fix sub-function add port error handling - fec: handle page_pool_dev_alloc_pages error Previous releases - always broken: - vsock: some fixes due to transport de-assignment - eth: - ice: fix E825 initialization - mlx5e: fix inversion dependency warning while enabling IPsec tunnel - gtp: destroy device along with udp socket's netns dismantle. - xilinx: axienet: Fix IRQ coalescing packet count overflow" * tag 'net-6.13-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (44 commits) netdev: avoid CFI problems with sock priv helpers net/mlx5e: Always start IPsec sequence number from 1 net/mlx5e: Rely on reqid in IPsec tunnel mode net/mlx5e: Fix inversion dependency warning while enabling IPsec tunnel net/mlx5: Clear port select structure when fail to create net/mlx5: SF, Fix add port error handling net/mlx5: Fix a lockdep warning as part of the write combining test net/mlx5: Fix RDMA TX steering prio net: make page_pool_ref_netmem work with net iovs net: ethernet: xgbe: re-add aneg to supported features in PHY quirks net: pcs: xpcs: actively unset DW_VR_MII_DIG_CTRL1_2G5_EN for 1G SGMII net: pcs: xpcs: fix DW_VR_MII_DIG_CTRL1_2G5_EN bit being set for 1G SGMII w/o inband selftests: net: Adapt ethtool mq tests to fix in qdisc graft net: fec: handle page_pool_dev_alloc_pages error net: netpoll: ensure skb_pool list is always initialized net: xilinx: axienet: Fix IRQ coalescing packet count overflow nfp: bpf: prevent integer overflow in nfp_bpf_event_output() selftests: mptcp: avoid spurious errors on disconnect mptcp: fix spurious wake-up on under memory pressure mptcp: be sure to send ack when mptcp-level window re-opens ...
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c19
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c25
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c7
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c19
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c51
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c23
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_consts.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_hw.c263
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wc.c24
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c4
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c3
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c44
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c1
-rw-r--r--drivers/net/ethernet/ti/cpsw_ale.c14
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c6
-rw-r--r--drivers/net/gtp.c26
-rw-r--r--drivers/net/pcs/pcs-xpcs.c4
-rw-r--r--drivers/net/pfcp.c15
-rw-r--r--include/net/busy_poll.h8
-rw-r--r--include/net/page_pool/helpers.h2
-rw-r--r--include/net/xdp.h1
-rw-r--r--include/net/xdp_sock_drv.h14
-rw-r--r--net/core/filter.c30
-rw-r--r--net/core/netdev-genl-gen.c14
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/pktgen.c6
-rw-r--r--net/core/xdp.c1
-rw-r--r--net/ipv4/route.c1
-rw-r--r--net/ipv4/udp.c46
-rw-r--r--net/mptcp/options.c6
-rw-r--r--net/mptcp/protocol.h9
-rw-r--r--net/ncsi/internal.h2
-rw-r--r--net/ncsi/ncsi-manage.c16
-rw-r--r--net/ncsi/ncsi-rsp.c19
-rw-r--r--net/openvswitch/actions.c4
-rw-r--r--net/vmw_vsock/af_vsock.c18
-rw-r--r--net/vmw_vsock/virtio_transport_common.c36
-rw-r--r--net/vmw_vsock/vsock_bpf.c9
-rw-r--r--net/xdp/xsk.c14
-rwxr-xr-xtools/net/ynl/ynl-gen-c.py16
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh9
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c43
54 files changed, 552 insertions, 399 deletions
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 6a716337f48b..268399dfcf22 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -923,7 +923,6 @@ static void xgbe_phy_free_phy_device(struct xgbe_prv_data *pdata)
static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata)
{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
struct xgbe_phy_data *phy_data = pdata->phy_data;
unsigned int phy_id = phy_data->phydev->phy_id;
@@ -945,14 +944,7 @@ static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata)
phy_write(phy_data->phydev, 0x04, 0x0d01);
phy_write(phy_data->phydev, 0x00, 0x9140);
- linkmode_set_bit_array(phy_10_100_features_array,
- ARRAY_SIZE(phy_10_100_features_array),
- supported);
- linkmode_set_bit_array(phy_gbit_features_array,
- ARRAY_SIZE(phy_gbit_features_array),
- supported);
-
- linkmode_copy(phy_data->phydev->supported, supported);
+ linkmode_copy(phy_data->phydev->supported, PHY_GBIT_FEATURES);
phy_support_asym_pause(phy_data->phydev);
@@ -964,7 +956,6 @@ static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata)
static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata)
{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
struct xgbe_phy_data *phy_data = pdata->phy_data;
struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom;
unsigned int phy_id = phy_data->phydev->phy_id;
@@ -1028,13 +1019,7 @@ static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata)
reg = phy_read(phy_data->phydev, 0x00);
phy_write(phy_data->phydev, 0x00, reg & ~0x00800);
- linkmode_set_bit_array(phy_10_100_features_array,
- ARRAY_SIZE(phy_10_100_features_array),
- supported);
- linkmode_set_bit_array(phy_gbit_features_array,
- ARRAY_SIZE(phy_gbit_features_array),
- supported);
- linkmode_copy(phy_data->phydev->supported, supported);
+ linkmode_copy(phy_data->phydev->supported, PHY_GBIT_FEATURES);
phy_support_asym_pause(phy_data->phydev);
netif_dbg(pdata, drv, pdata->netdev,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index aeaa74f03046..b6f844cac80e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4708,7 +4708,7 @@ void bnxt_set_ring_params(struct bnxt *bp)
/* Changing allocation mode of RX rings.
* TODO: Update when extending xdp_rxq_info to support allocation modes.
*/
-int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
+static void __bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
{
struct net_device *dev = bp->dev;
@@ -4729,15 +4729,30 @@ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
bp->rx_skb_func = bnxt_rx_page_skb;
}
bp->rx_dir = DMA_BIDIRECTIONAL;
- /* Disable LRO or GRO_HW */
- netdev_update_features(dev);
} else {
dev->max_mtu = bp->max_mtu;
bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE;
bp->rx_dir = DMA_FROM_DEVICE;
bp->rx_skb_func = bnxt_rx_skb;
}
- return 0;
+}
+
+void bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
+{
+ __bnxt_set_rx_skb_mode(bp, page_mode);
+
+ if (!page_mode) {
+ int rx, tx;
+
+ bnxt_get_max_rings(bp, &rx, &tx, true);
+ if (rx > 1) {
+ bp->flags &= ~BNXT_FLAG_NO_AGG_RINGS;
+ bp->dev->hw_features |= NETIF_F_LRO;
+ }
+ }
+
+ /* Update LRO and GRO_HW availability */
+ netdev_update_features(bp->dev);
}
static void bnxt_free_vnic_attributes(struct bnxt *bp)
@@ -16214,7 +16229,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (bp->max_fltr < BNXT_MAX_FLTR)
bp->max_fltr = BNXT_MAX_FLTR;
bnxt_init_l2_fltr_tbl(bp);
- bnxt_set_rx_skb_mode(bp, false);
+ __bnxt_set_rx_skb_mode(bp, false);
bnxt_set_tpa_flags(bp);
bnxt_set_ring_params(bp);
bnxt_rdma_aux_device_init(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 7df7a2233307..f11ed59203d9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2846,7 +2846,7 @@ u32 bnxt_fw_health_readl(struct bnxt *bp, int reg_idx);
bool bnxt_bs_trace_avail(struct bnxt *bp, u16 type);
void bnxt_set_tpa_flags(struct bnxt *bp);
void bnxt_set_ring_params(struct bnxt *);
-int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
+void bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
void bnxt_insert_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr);
void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr);
int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index f88b641533fc..dc51dce209d5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -422,15 +422,8 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
bnxt_set_rx_skb_mode(bp, true);
xdp_features_set_redirect_target(dev, true);
} else {
- int rx, tx;
-
xdp_features_clear_redirect_target(dev);
bnxt_set_rx_skb_mode(bp, false);
- bnxt_get_max_rings(bp, &rx, &tx, true);
- if (rx > 1) {
- bp->flags &= ~BNXT_FLAG_NO_AGG_RINGS;
- bp->dev->hw_features |= NETIF_F_LRO;
- }
}
bp->tx_nr_rings_xdp = tx_xdp;
bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 1b55047c0237..4566848e1d7c 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1591,19 +1591,22 @@ static void fec_enet_tx(struct net_device *ndev, int budget)
fec_enet_tx_queue(ndev, i, budget);
}
-static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
+static int fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
struct bufdesc *bdp, int index)
{
struct page *new_page;
dma_addr_t phys_addr;
new_page = page_pool_dev_alloc_pages(rxq->page_pool);
- WARN_ON(!new_page);
- rxq->rx_skb_info[index].page = new_page;
+ if (unlikely(!new_page))
+ return -ENOMEM;
+ rxq->rx_skb_info[index].page = new_page;
rxq->rx_skb_info[index].offset = FEC_ENET_XDP_HEADROOM;
phys_addr = page_pool_get_dma_addr(new_page) + FEC_ENET_XDP_HEADROOM;
bdp->cbd_bufaddr = cpu_to_fec32(phys_addr);
+
+ return 0;
}
static u32
@@ -1698,6 +1701,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
int cpu = smp_processor_id();
struct xdp_buff xdp;
struct page *page;
+ __fec32 cbd_bufaddr;
u32 sub_len = 4;
#if !defined(CONFIG_M5272)
@@ -1766,12 +1770,17 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
index = fec_enet_get_bd_index(bdp, &rxq->bd);
page = rxq->rx_skb_info[index].page;
+ cbd_bufaddr = bdp->cbd_bufaddr;
+ if (fec_enet_update_cbd(rxq, bdp, index)) {
+ ndev->stats.rx_dropped++;
+ goto rx_processing_done;
+ }
+
dma_sync_single_for_cpu(&fep->pdev->dev,
- fec32_to_cpu(bdp->cbd_bufaddr),
+ fec32_to_cpu(cbd_bufaddr),
pkt_len,
DMA_FROM_DEVICE);
prefetch(page_address(page));
- fec_enet_update_cbd(rxq, bdp, index);
if (xdp_prog) {
xdp_buff_clear_frags_flag(&xdp);
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index ef14cff9a333..46f9726d9a8a 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1665,6 +1665,7 @@ struct ice_aqc_get_port_options_elem {
#define ICE_AQC_PORT_OPT_MAX_LANE_25G 5
#define ICE_AQC_PORT_OPT_MAX_LANE_50G 6
#define ICE_AQC_PORT_OPT_MAX_LANE_100G 7
+#define ICE_AQC_PORT_OPT_MAX_LANE_200G 8
u8 global_scid[2];
u8 phy_scid[2];
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 496d86cbd13f..532024f34ce4 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -4096,6 +4096,57 @@ ice_aq_set_port_option(struct ice_hw *hw, u8 lport, u8 lport_valid,
}
/**
+ * ice_get_phy_lane_number - Get PHY lane number for current adapter
+ * @hw: pointer to the hw struct
+ *
+ * Return: PHY lane number on success, negative error code otherwise.
+ */
+int ice_get_phy_lane_number(struct ice_hw *hw)
+{
+ struct ice_aqc_get_port_options_elem *options;
+ unsigned int lport = 0;
+ unsigned int lane;
+ int err;
+
+ options = kcalloc(ICE_AQC_PORT_OPT_MAX, sizeof(*options), GFP_KERNEL);
+ if (!options)
+ return -ENOMEM;
+
+ for (lane = 0; lane < ICE_MAX_PORT_PER_PCI_DEV; lane++) {
+ u8 options_count = ICE_AQC_PORT_OPT_MAX;
+ u8 speed, active_idx, pending_idx;
+ bool active_valid, pending_valid;
+
+ err = ice_aq_get_port_options(hw, options, &options_count, lane,
+ true, &active_idx, &active_valid,
+ &pending_idx, &pending_valid);
+ if (err)
+ goto err;
+
+ if (!active_valid)
+ continue;
+
+ speed = options[active_idx].max_lane_speed;
+ /* If we don't get speed for this lane, it's unoccupied */
+ if (speed > ICE_AQC_PORT_OPT_MAX_LANE_200G)
+ continue;
+
+ if (hw->pf_id == lport) {
+ kfree(options);
+ return lane;
+ }
+
+ lport++;
+ }
+
+ /* PHY lane not found */
+ err = -ENXIO;
+err:
+ kfree(options);
+ return err;
+}
+
+/**
* ice_aq_sff_eeprom
* @hw: pointer to the HW struct
* @lport: bits [7:0] = logical port, bit [8] = logical port valid
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 27208a60cece..fe6f88cfd948 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -193,6 +193,7 @@ ice_aq_get_port_options(struct ice_hw *hw,
int
ice_aq_set_port_option(struct ice_hw *hw, u8 lport, u8 lport_valid,
u8 new_option);
+int ice_get_phy_lane_number(struct ice_hw *hw);
int
ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 0ab35607e5d5..89fa3d53d317 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1144,7 +1144,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
if (link_up == old_link && link_speed == old_link_speed)
return 0;
- ice_ptp_link_change(pf, pf->hw.pf_id, link_up);
+ ice_ptp_link_change(pf, link_up);
if (ice_is_dcb_active(pf)) {
if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
@@ -6790,7 +6790,7 @@ static int ice_up_complete(struct ice_vsi *vsi)
ice_print_link_msg(vsi, true);
netif_tx_start_all_queues(vsi->netdev);
netif_carrier_on(vsi->netdev);
- ice_ptp_link_change(pf, pf->hw.pf_id, true);
+ ice_ptp_link_change(pf, true);
}
/* Perform an initial read of the statistics registers now to
@@ -7260,7 +7260,7 @@ int ice_down(struct ice_vsi *vsi)
if (vsi->netdev) {
vlan_err = ice_vsi_del_vlan_zero(vsi);
- ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false);
+ ice_ptp_link_change(vsi->back, false);
netif_carrier_off(vsi->netdev);
netif_tx_disable(vsi->netdev);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index a999fface272..efd770dfec44 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -1388,10 +1388,9 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port)
/**
* ice_ptp_link_change - Reconfigure PTP after link status change
* @pf: Board private structure
- * @port: Port for which the PHY start is set
* @linkup: Link is up or down
*/
-void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
+void ice_ptp_link_change(struct ice_pf *pf, bool linkup)
{
struct ice_ptp_port *ptp_port;
struct ice_hw *hw = &pf->hw;
@@ -1399,14 +1398,7 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
if (pf->ptp.state != ICE_PTP_READY)
return;
- if (WARN_ON_ONCE(port >= hw->ptp.num_lports))
- return;
-
ptp_port = &pf->ptp.port;
- if (ice_is_e825c(hw) && hw->ptp.is_2x50g_muxed_topo)
- port *= 2;
- if (WARN_ON_ONCE(ptp_port->port_num != port))
- return;
/* Update cached link status for this port immediately */
ptp_port->link_up = linkup;
@@ -3164,10 +3156,17 @@ void ice_ptp_init(struct ice_pf *pf)
{
struct ice_ptp *ptp = &pf->ptp;
struct ice_hw *hw = &pf->hw;
- int err;
+ int lane_num, err;
ptp->state = ICE_PTP_INITIALIZING;
+ lane_num = ice_get_phy_lane_number(hw);
+ if (lane_num < 0) {
+ err = lane_num;
+ goto err_exit;
+ }
+
+ ptp->port.port_num = (u8)lane_num;
ice_ptp_init_hw(hw);
ice_ptp_init_tx_interrupt_mode(pf);
@@ -3188,10 +3187,6 @@ void ice_ptp_init(struct ice_pf *pf)
if (err)
goto err_exit;
- ptp->port.port_num = hw->pf_id;
- if (ice_is_e825c(hw) && hw->ptp.is_2x50g_muxed_topo)
- ptp->port.port_num = hw->pf_id * 2;
-
err = ice_ptp_init_port(pf, &ptp->port);
if (err)
goto err_exit;
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index 824e73b677a4..c490d98fd9c6 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -310,7 +310,7 @@ void ice_ptp_prepare_for_reset(struct ice_pf *pf,
enum ice_reset_req reset_type);
void ice_ptp_init(struct ice_pf *pf);
void ice_ptp_release(struct ice_pf *pf);
-void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup);
+void ice_ptp_link_change(struct ice_pf *pf, bool linkup);
#else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr)
{
@@ -358,7 +358,7 @@ static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf,
}
static inline void ice_ptp_init(struct ice_pf *pf) { }
static inline void ice_ptp_release(struct ice_pf *pf) { }
-static inline void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
+static inline void ice_ptp_link_change(struct ice_pf *pf, bool linkup)
{
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_consts.h b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h
index d75f0eddd631..a8e57cf05a9c 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_consts.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h
@@ -131,7 +131,7 @@ struct ice_eth56g_mac_reg_cfg eth56g_mac_cfg[NUM_ICE_ETH56G_LNK_SPD] = {
.rx_offset = {
.serdes = 0xffffeb27, /* -10.42424 */
.no_fec = 0xffffcccd, /* -25.6 */
- .fc = 0xfffe0014, /* -255.96 */
+ .fc = 0xfffc557b, /* -469.26 */
.sfd = 0x4a4, /* 2.32 */
.bs_ds = 0x32 /* 0.0969697 */
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
index 518893f23372..02e84f5b1d45 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -901,30 +901,45 @@ static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw)
*/
/**
+ * ice_ptp_get_dest_dev_e825 - get destination PHY for given port number
+ * @hw: pointer to the HW struct
+ * @port: destination port
+ *
+ * Return: destination sideband queue PHY device.
+ */
+static enum ice_sbq_msg_dev ice_ptp_get_dest_dev_e825(struct ice_hw *hw,
+ u8 port)
+{
+ /* On a single complex E825, PHY 0 is always destination device phy_0
+ * and PHY 1 is phy_0_peer.
+ */
+ if (port >= hw->ptp.ports_per_phy)
+ return eth56g_phy_1;
+ else
+ return eth56g_phy_0;
+}
+
+/**
* ice_write_phy_eth56g - Write a PHY port register
* @hw: pointer to the HW struct
- * @phy_idx: PHY index
+ * @port: destination port
* @addr: PHY register address
* @val: Value to write
*
* Return: 0 on success, other error codes when failed to write to PHY
*/
-static int ice_write_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr,
- u32 val)
+static int ice_write_phy_eth56g(struct ice_hw *hw, u8 port, u32 addr, u32 val)
{
- struct ice_sbq_msg_input phy_msg;
+ struct ice_sbq_msg_input msg = {
+ .dest_dev = ice_ptp_get_dest_dev_e825(hw, port),
+ .opcode = ice_sbq_msg_wr,
+ .msg_addr_low = lower_16_bits(addr),
+ .msg_addr_high = upper_16_bits(addr),
+ .data = val
+ };
int err;
- phy_msg.opcode = ice_sbq_msg_wr;
-
- phy_msg.msg_addr_low = lower_16_bits(addr);
- phy_msg.msg_addr_high = upper_16_bits(addr);
-
- phy_msg.data = val;
- phy_msg.dest_dev = hw->ptp.phy.eth56g.phy_addr[phy_idx];
-
- err = ice_sbq_rw_reg(hw, &phy_msg, ICE_AQ_FLAG_RD);
-
+ err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD);
if (err)
ice_debug(hw, ICE_DBG_PTP, "PTP failed to send msg to phy %d\n",
err);
@@ -935,41 +950,36 @@ static int ice_write_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr,
/**
* ice_read_phy_eth56g - Read a PHY port register
* @hw: pointer to the HW struct
- * @phy_idx: PHY index
+ * @port: destination port
* @addr: PHY register address
* @val: Value to write
*
* Return: 0 on success, other error codes when failed to read from PHY
*/
-static int ice_read_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr,
- u32 *val)
+static int ice_read_phy_eth56g(struct ice_hw *hw, u8 port, u32 addr, u32 *val)
{
- struct ice_sbq_msg_input phy_msg;
+ struct ice_sbq_msg_input msg = {
+ .dest_dev = ice_ptp_get_dest_dev_e825(hw, port),
+ .opcode = ice_sbq_msg_rd,
+ .msg_addr_low = lower_16_bits(addr),
+ .msg_addr_high = upper_16_bits(addr)
+ };
int err;
- phy_msg.opcode = ice_sbq_msg_rd;
-
- phy_msg.msg_addr_low = lower_16_bits(addr);
- phy_msg.msg_addr_high = upper_16_bits(addr);
-
- phy_msg.data = 0;
- phy_msg.dest_dev = hw->ptp.phy.eth56g.phy_addr[phy_idx];
-
- err = ice_sbq_rw_reg(hw, &phy_msg, ICE_AQ_FLAG_RD);
- if (err) {
+ err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD);
+ if (err)
ice_debug(hw, ICE_DBG_PTP, "PTP failed to send msg to phy %d\n",
err);
- return err;
- }
-
- *val = phy_msg.data;
+ else
+ *val = msg.data;
- return 0;
+ return err;
}
/**
* ice_phy_res_address_eth56g - Calculate a PHY port register address
- * @port: Port number to be written
+ * @hw: pointer to the HW struct
+ * @lane: Lane number to be written
* @res_type: resource type (register/memory)
* @offset: Offset from PHY port register base
* @addr: The result address
@@ -978,17 +988,19 @@ static int ice_read_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr,
* * %0 - success
* * %EINVAL - invalid port number or resource type
*/
-static int ice_phy_res_address_eth56g(u8 port, enum eth56g_res_type res_type,
- u32 offset, u32 *addr)
+static int ice_phy_res_address_eth56g(struct ice_hw *hw, u8 lane,
+ enum eth56g_res_type res_type,
+ u32 offset,
+ u32 *addr)
{
- u8 lane = port % ICE_PORTS_PER_QUAD;
- u8 phy = ICE_GET_QUAD_NUM(port);
-
if (res_type >= NUM_ETH56G_PHY_RES)
return -EINVAL;
- *addr = eth56g_phy_res[res_type].base[phy] +
+ /* Lanes 4..7 are in fact 0..3 on a second PHY */
+ lane %= hw->ptp.ports_per_phy;
+ *addr = eth56g_phy_res[res_type].base[0] +
lane * eth56g_phy_res[res_type].step + offset;
+
return 0;
}
@@ -1008,19 +1020,17 @@ static int ice_phy_res_address_eth56g(u8 port, enum eth56g_res_type res_type,
static int ice_write_port_eth56g(struct ice_hw *hw, u8 port, u32 offset,
u32 val, enum eth56g_res_type res_type)
{
- u8 phy_port = port % hw->ptp.ports_per_phy;
- u8 phy_idx = port / hw->ptp.ports_per_phy;
u32 addr;
int err;
if (port >= hw->ptp.num_lports)
return -EINVAL;
- err = ice_phy_res_address_eth56g(phy_port, res_type, offset, &addr);
+ err = ice_phy_res_address_eth56g(hw, port, res_type, offset, &addr);
if (err)
return err;
- return ice_write_phy_eth56g(hw, phy_idx, addr, val);
+ return ice_write_phy_eth56g(hw, port, addr, val);
}
/**
@@ -1039,19 +1049,17 @@ static int ice_write_port_eth56g(struct ice_hw *hw, u8 port, u32 offset,
static int ice_read_port_eth56g(struct ice_hw *hw, u8 port, u32 offset,
u32 *val, enum eth56g_res_type res_type)
{
- u8 phy_port = port % hw->ptp.ports_per_phy;
- u8 phy_idx = port / hw->ptp.ports_per_phy;
u32 addr;
int err;
if (port >= hw->ptp.num_lports)
return -EINVAL;
- err = ice_phy_res_address_eth56g(phy_port, res_type, offset, &addr);
+ err = ice_phy_res_address_eth56g(hw, port, res_type, offset, &addr);
if (err)
return err;
- return ice_read_phy_eth56g(hw, phy_idx, addr, val);
+ return ice_read_phy_eth56g(hw, port, addr, val);
}
/**
@@ -1201,6 +1209,56 @@ static int ice_write_port_mem_eth56g(struct ice_hw *hw, u8 port, u16 offset,
}
/**
+ * ice_write_quad_ptp_reg_eth56g - Write a PHY quad register
+ * @hw: pointer to the HW struct
+ * @offset: PHY register offset
+ * @port: Port number
+ * @val: Value to write
+ *
+ * Return:
+ * * %0 - success
+ * * %EIO - invalid port number or resource type
+ * * %other - failed to write to PHY
+ */
+static int ice_write_quad_ptp_reg_eth56g(struct ice_hw *hw, u8 port,
+ u32 offset, u32 val)
+{
+ u32 addr;
+
+ if (port >= hw->ptp.num_lports)
+ return -EIO;
+
+ addr = eth56g_phy_res[ETH56G_PHY_REG_PTP].base[0] + offset;
+
+ return ice_write_phy_eth56g(hw, port, addr, val);
+}
+
+/**
+ * ice_read_quad_ptp_reg_eth56g - Read a PHY quad register
+ * @hw: pointer to the HW struct
+ * @offset: PHY register offset
+ * @port: Port number
+ * @val: Value to read
+ *
+ * Return:
+ * * %0 - success
+ * * %EIO - invalid port number or resource type
+ * * %other - failed to read from PHY
+ */
+static int ice_read_quad_ptp_reg_eth56g(struct ice_hw *hw, u8 port,
+ u32 offset, u32 *val)
+{
+ u32 addr;
+
+ if (port >= hw->ptp.num_lports)
+ return -EIO;
+
+ addr = eth56g_phy_res[ETH56G_PHY_REG_PTP].base[0] + offset;
+
+ return ice_read_phy_eth56g(hw, port, addr, val);
+}
+
+/**
* ice_is_64b_phy_reg_eth56g - Check if this is a 64bit PHY register
* @low_addr: the low address to check
* @high_addr: on return, contains the high address of the 64bit register
@@ -1919,7 +1977,6 @@ ice_phy_get_speed_eth56g(struct ice_link_status *li)
*/
static int ice_phy_cfg_parpcs_eth56g(struct ice_hw *hw, u8 port)
{
- u8 port_blk = port & ~(ICE_PORTS_PER_QUAD - 1);
u32 val;
int err;
@@ -1934,8 +1991,8 @@ static int ice_phy_cfg_parpcs_eth56g(struct ice_hw *hw, u8 port)
switch (ice_phy_get_speed_eth56g(&hw->port_info->phy.link_info)) {
case ICE_ETH56G_LNK_SPD_1G:
case ICE_ETH56G_LNK_SPD_2_5G:
- err = ice_read_ptp_reg_eth56g(hw, port_blk,
- PHY_GPCS_CONFIG_REG0, &val);
+ err = ice_read_quad_ptp_reg_eth56g(hw, port,
+ PHY_GPCS_CONFIG_REG0, &val);
if (err) {
ice_debug(hw, ICE_DBG_PTP, "Failed to read PHY_GPCS_CONFIG_REG0, status: %d",
err);
@@ -1946,8 +2003,8 @@ static int ice_phy_cfg_parpcs_eth56g(struct ice_hw *hw, u8 port)
val |= FIELD_PREP(PHY_GPCS_CONFIG_REG0_TX_THR_M,
ICE_ETH56G_NOMINAL_TX_THRESH);
- err = ice_write_ptp_reg_eth56g(hw, port_blk,
- PHY_GPCS_CONFIG_REG0, val);
+ err = ice_write_quad_ptp_reg_eth56g(hw, port,
+ PHY_GPCS_CONFIG_REG0, val);
if (err) {
ice_debug(hw, ICE_DBG_PTP, "Failed to write PHY_GPCS_CONFIG_REG0, status: %d",
err);
@@ -1988,50 +2045,47 @@ static int ice_phy_cfg_parpcs_eth56g(struct ice_hw *hw, u8 port)
*/
int ice_phy_cfg_ptp_1step_eth56g(struct ice_hw *hw, u8 port)
{
- u8 port_blk = port & ~(ICE_PORTS_PER_QUAD - 1);
- u8 blk_port = port & (ICE_PORTS_PER_QUAD - 1);
+ u8 quad_lane = port % ICE_PORTS_PER_QUAD;
+ u32 addr, val, peer_delay;
bool enable, sfd_ena;
- u32 val, peer_delay;
int err;
enable = hw->ptp.phy.eth56g.onestep_ena;
peer_delay = hw->ptp.phy.eth56g.peer_delay;
sfd_ena = hw->ptp.phy.eth56g.sfd_ena;
- /* PHY_PTP_1STEP_CONFIG */
- err = ice_read_ptp_reg_eth56g(hw, port_blk, PHY_PTP_1STEP_CONFIG, &val);
+ addr = PHY_PTP_1STEP_CONFIG;
+ err = ice_read_quad_ptp_reg_eth56g(hw, port, addr, &val);
if (err)
return err;
if (enable)
- val |= blk_port;
+ val |= BIT(quad_lane);
else
- val &= ~blk_port;
+ val &= ~BIT(quad_lane);
val &= ~(PHY_PTP_1STEP_T1S_UP64_M | PHY_PTP_1STEP_T1S_DELTA_M);
- err = ice_write_ptp_reg_eth56g(hw, port_blk, PHY_PTP_1STEP_CONFIG, val);
+ err = ice_write_quad_ptp_reg_eth56g(hw, port, addr, val);
if (err)
return err;
- /* PHY_PTP_1STEP_PEER_DELAY */
+ addr = PHY_PTP_1STEP_PEER_DELAY(quad_lane);
val = FIELD_PREP(PHY_PTP_1STEP_PD_DELAY_M, peer_delay);
if (peer_delay)
val |= PHY_PTP_1STEP_PD_ADD_PD_M;
val |= PHY_PTP_1STEP_PD_DLY_V_M;
- err = ice_write_ptp_reg_eth56g(hw, port_blk,
- PHY_PTP_1STEP_PEER_DELAY(blk_port), val);
+ err = ice_write_quad_ptp_reg_eth56g(hw, port, addr, val);
if (err)
return err;
val &= ~PHY_PTP_1STEP_PD_DLY_V_M;
- err = ice_write_ptp_reg_eth56g(hw, port_blk,
- PHY_PTP_1STEP_PEER_DELAY(blk_port), val);
+ err = ice_write_quad_ptp_reg_eth56g(hw, port, addr, val);
if (err)
return err;
- /* PHY_MAC_XIF_MODE */
- err = ice_read_mac_reg_eth56g(hw, port, PHY_MAC_XIF_MODE, &val);
+ addr = PHY_MAC_XIF_MODE;
+ err = ice_read_mac_reg_eth56g(hw, port, addr, &val);
if (err)
return err;
@@ -2051,7 +2105,7 @@ int ice_phy_cfg_ptp_1step_eth56g(struct ice_hw *hw, u8 port)
FIELD_PREP(PHY_MAC_XIF_TS_BIN_MODE_M, enable) |
FIELD_PREP(PHY_MAC_XIF_TS_SFD_ENA_M, sfd_ena);
- return ice_write_mac_reg_eth56g(hw, port, PHY_MAC_XIF_MODE, val);
+ return ice_write_mac_reg_eth56g(hw, port, addr, val);
}
/**
@@ -2093,21 +2147,22 @@ static u32 ice_ptp_calc_bitslip_eth56g(struct ice_hw *hw, u8 port, u32 bs,
bool fc, bool rs,
enum ice_eth56g_link_spd spd)
{
- u8 port_offset = port & (ICE_PORTS_PER_QUAD - 1);
- u8 port_blk = port & ~(ICE_PORTS_PER_QUAD - 1);
u32 bitslip;
int err;
if (!bs || rs)
return 0;
- if (spd == ICE_ETH56G_LNK_SPD_1G || spd == ICE_ETH56G_LNK_SPD_2_5G)
+ if (spd == ICE_ETH56G_LNK_SPD_1G || spd == ICE_ETH56G_LNK_SPD_2_5G) {
err = ice_read_gpcs_reg_eth56g(hw, port, PHY_GPCS_BITSLIP,
&bitslip);
- else
- err = ice_read_ptp_reg_eth56g(hw, port_blk,
- PHY_REG_SD_BIT_SLIP(port_offset),
- &bitslip);
+ } else {
+ u8 quad_lane = port % ICE_PORTS_PER_QUAD;
+ u32 addr;
+
+ addr = PHY_REG_SD_BIT_SLIP(quad_lane);
+ err = ice_read_quad_ptp_reg_eth56g(hw, port, addr, &bitslip);
+ }
if (err)
return 0;
@@ -2667,59 +2722,29 @@ static int ice_get_phy_tx_tstamp_ready_eth56g(struct ice_hw *hw, u8 port,
}
/**
- * ice_is_muxed_topo - detect breakout 2x50G topology for E825C
- * @hw: pointer to the HW struct
- *
- * Return: true if it's 2x50 breakout topology, false otherwise
- */
-static bool ice_is_muxed_topo(struct ice_hw *hw)
-{
- u8 link_topo;
- bool mux;
- u32 val;
-
- val = rd32(hw, GLGEN_SWITCH_MODE_CONFIG);
- mux = FIELD_GET(GLGEN_SWITCH_MODE_CONFIG_25X4_QUAD_M, val);
- val = rd32(hw, GLGEN_MAC_LINK_TOPO);
- link_topo = FIELD_GET(GLGEN_MAC_LINK_TOPO_LINK_TOPO_M, val);
-
- return (mux && link_topo == ICE_LINK_TOPO_UP_TO_2_LINKS);
-}
-
-/**
- * ice_ptp_init_phy_e825c - initialize PHY parameters
+ * ice_ptp_init_phy_e825 - initialize PHY parameters
* @hw: pointer to the HW struct
*/
-static void ice_ptp_init_phy_e825c(struct ice_hw *hw)
+static void ice_ptp_init_phy_e825(struct ice_hw *hw)
{
struct ice_ptp_hw *ptp = &hw->ptp;
struct ice_eth56g_params *params;
- u8 phy;
+ u32 phy_rev;
+ int err;
ptp->phy_model = ICE_PHY_ETH56G;
params = &ptp->phy.eth56g;
params->onestep_ena = false;
params->peer_delay = 0;
params->sfd_ena = false;
- params->phy_addr[0] = eth56g_phy_0;
- params->phy_addr[1] = eth56g_phy_1;
params->num_phys = 2;
ptp->ports_per_phy = 4;
ptp->num_lports = params->num_phys * ptp->ports_per_phy;
ice_sb_access_ena_eth56g(hw, true);
- for (phy = 0; phy < params->num_phys; phy++) {
- u32 phy_rev;
- int err;
-
- err = ice_read_phy_eth56g(hw, phy, PHY_REG_REVISION, &phy_rev);
- if (err || phy_rev != PHY_REVISION_ETH56G) {
- ptp->phy_model = ICE_PHY_UNSUP;
- return;
- }
- }
-
- ptp->is_2x50g_muxed_topo = ice_is_muxed_topo(hw);
+ err = ice_read_phy_eth56g(hw, hw->pf_id, PHY_REG_REVISION, &phy_rev);
+ if (err || phy_rev != PHY_REVISION_ETH56G)
+ ptp->phy_model = ICE_PHY_UNSUP;
}
/* E822 family functions
@@ -2738,10 +2763,9 @@ static void ice_fill_phy_msg_e82x(struct ice_hw *hw,
struct ice_sbq_msg_input *msg, u8 port,
u16 offset)
{
- int phy_port, phy, quadtype;
+ int phy_port, quadtype;
phy_port = port % hw->ptp.ports_per_phy;
- phy = port / hw->ptp.ports_per_phy;
quadtype = ICE_GET_QUAD_NUM(port) %
ICE_GET_QUAD_NUM(hw->ptp.ports_per_phy);
@@ -2753,12 +2777,7 @@ static void ice_fill_phy_msg_e82x(struct ice_hw *hw,
msg->msg_addr_high = P_Q1_H(P_4_BASE + offset, phy_port);
}
- if (phy == 0)
- msg->dest_dev = rmn_0;
- else if (phy == 1)
- msg->dest_dev = rmn_1;
- else
- msg->dest_dev = rmn_2;
+ msg->dest_dev = rmn_0;
}
/**
@@ -5478,7 +5497,7 @@ void ice_ptp_init_hw(struct ice_hw *hw)
else if (ice_is_e810(hw))
ice_ptp_init_phy_e810(ptp);
else if (ice_is_e825c(hw))
- ice_ptp_init_phy_e825c(hw);
+ ice_ptp_init_phy_e825(hw);
else
ptp->phy_model = ICE_PHY_UNSUP;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index adb168860711..4a9ef722635f 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -850,7 +850,6 @@ struct ice_mbx_data {
struct ice_eth56g_params {
u8 num_phys;
- u8 phy_addr[2];
bool onestep_ena;
bool sfd_ena;
u32 peer_delay;
@@ -881,7 +880,6 @@ struct ice_ptp_hw {
union ice_phy_params phy;
u8 num_lports;
u8 ports_per_phy;
- bool is_2x50g_muxed_topo;
};
/* Port hardware description */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index ca92e518be76..1baf8933a07c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -724,6 +724,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
/* check esn */
if (x->props.flags & XFRM_STATE_ESN)
mlx5e_ipsec_update_esn_state(sa_entry);
+ else
+ /* According to RFC4303, section "3.3.3. Sequence Number Generation",
+ * the first packet sent using a given SA will contain a sequence
+ * number of 1.
+ */
+ sa_entry->esn_state.esn = 1;
mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
@@ -768,9 +774,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
MLX5_IPSEC_RESCHED);
if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
- x->props.mode == XFRM_MODE_TUNNEL)
- xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
- MLX5E_IPSEC_TUNNEL_SA);
+ x->props.mode == XFRM_MODE_TUNNEL) {
+ xa_lock_bh(&ipsec->sadb);
+ __xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
+ MLX5E_IPSEC_TUNNEL_SA);
+ xa_unlock_bh(&ipsec->sadb);
+ }
out:
x->xso.offload_handle = (unsigned long)sa_entry;
@@ -797,7 +806,6 @@ err_xfrm:
static void mlx5e_xfrm_del_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
- struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
struct mlx5e_ipsec_sa_entry *old;
@@ -806,12 +814,6 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x)
old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
WARN_ON(old != sa_entry);
-
- if (attrs->mode == XFRM_MODE_TUNNEL &&
- attrs->type == XFRM_DEV_OFFLOAD_PACKET)
- /* Make sure that no ARP requests are running in parallel */
- flush_workqueue(ipsec->wq);
-
}
static void mlx5e_xfrm_free_state(struct xfrm_state *x)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index e51b03d4c717..57861d34d46f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -1718,23 +1718,21 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
goto err_alloc;
}
- if (attrs->family == AF_INET)
- setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4);
- else
- setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);
-
setup_fte_no_frags(spec);
setup_fte_upper_proto_match(spec, &attrs->upspec);
switch (attrs->type) {
case XFRM_DEV_OFFLOAD_CRYPTO:
+ if (attrs->family == AF_INET)
+ setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4);
+ else
+ setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);
setup_fte_spi(spec, attrs->spi, false);
setup_fte_esp(spec);
setup_fte_reg_a(spec);
break;
case XFRM_DEV_OFFLOAD_PACKET:
- if (attrs->reqid)
- setup_fte_reg_c4(spec, attrs->reqid);
+ setup_fte_reg_c4(spec, attrs->reqid);
err = setup_pkt_reformat(ipsec, attrs, &flow_act);
if (err)
goto err_pkt_reformat;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
index 53cfa39188cb..820debf3fbbf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -91,8 +91,9 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps);
static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn,
- struct mlx5_accel_esp_xfrm_attrs *attrs)
+ struct mlx5e_ipsec_sa_entry *sa_entry)
{
+ struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
void *aso_ctx;
aso_ctx = MLX5_ADDR_OF(ipsec_obj, obj, ipsec_aso);
@@ -120,8 +121,12 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn,
* active.
*/
MLX5_SET(ipsec_obj, obj, aso_return_reg, MLX5_IPSEC_ASO_REG_C_4_5);
- if (attrs->dir == XFRM_DEV_OFFLOAD_OUT)
+ if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) {
MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_INC_SN);
+ if (!attrs->replay_esn.trigger)
+ MLX5_SET(ipsec_aso, aso_ctx, mode_parameter,
+ sa_entry->esn_state.esn);
+ }
if (attrs->lft.hard_packet_limit != XFRM_INF) {
MLX5_SET(ipsec_aso, aso_ctx, remove_flow_pkt_cnt,
@@ -175,7 +180,7 @@ static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry)
res = &mdev->mlx5e_res.hw_objs;
if (attrs->type == XFRM_DEV_OFFLOAD_PACKET)
- mlx5e_ipsec_packet_setup(obj, res->pdn, attrs);
+ mlx5e_ipsec_packet_setup(obj, res->pdn, sa_entry);
err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
if (!err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 2eabfcc247c6..0ce999706d41 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2709,6 +2709,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
break;
case MLX5_FLOW_NAMESPACE_RDMA_TX:
root_ns = steering->rdma_tx_root_ns;
+ prio = RDMA_TX_BYPASS_PRIO;
break;
case MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS:
root_ns = steering->rdma_rx_root_ns;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
index ab2717012b79..39e80704b1c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -530,7 +530,7 @@ int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
set_tt_map(port_sel, hash_type);
err = mlx5_lag_create_definers(ldev, hash_type, ports);
if (err)
- return err;
+ goto clear_port_sel;
if (port_sel->tunnel) {
err = mlx5_lag_create_inner_ttc_table(ldev);
@@ -549,6 +549,8 @@ destroy_inner:
mlx5_destroy_ttc_table(port_sel->inner.ttc);
destroy_definers:
mlx5_lag_destroy_definers(ldev);
+clear_port_sel:
+ memset(port_sel, 0, sizeof(*port_sel));
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index a96be98be032..b96909fbeb12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -257,6 +257,7 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table,
return 0;
esw_err:
+ mlx5_sf_function_id_erase(table, sf);
mlx5_sf_free(table, sf);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c
index 1bed75eca97d..740b719e7072 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c
@@ -382,6 +382,7 @@ err_alloc_bfreg:
bool mlx5_wc_support_get(struct mlx5_core_dev *mdev)
{
+ struct mutex *wc_state_lock = &mdev->wc_state_lock;
struct mlx5_core_dev *parent = NULL;
if (!MLX5_CAP_GEN(mdev, bf)) {
@@ -400,32 +401,31 @@ bool mlx5_wc_support_get(struct mlx5_core_dev *mdev)
*/
goto out;
- mutex_lock(&mdev->wc_state_lock);
-
- if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED)
- goto unlock;
-
#ifdef CONFIG_MLX5_SF
- if (mlx5_core_is_sf(mdev))
+ if (mlx5_core_is_sf(mdev)) {
parent = mdev->priv.parent_mdev;
+ wc_state_lock = &parent->wc_state_lock;
+ }
#endif
- if (parent) {
- mutex_lock(&parent->wc_state_lock);
+ mutex_lock(wc_state_lock);
+ if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED)
+ goto unlock;
+
+ if (parent) {
mlx5_core_test_wc(parent);
mlx5_core_dbg(mdev, "parent set wc_state=%d\n",
parent->wc_state);
mdev->wc_state = parent->wc_state;
- mutex_unlock(&parent->wc_state_lock);
+ } else {
+ mlx5_core_test_wc(mdev);
}
- mlx5_core_test_wc(mdev);
-
unlock:
- mutex_unlock(&mdev->wc_state_lock);
+ mutex_unlock(wc_state_lock);
out:
mlx5_core_dbg(mdev, "wc_state=%d\n", mdev->wc_state);
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 2dc0c6ad54be..be95336ce089 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -1656,9 +1656,9 @@ static int __init mana_driver_init(void)
static void __exit mana_driver_exit(void)
{
- debugfs_remove(mana_debugfs_root);
-
pci_unregister_driver(&mana_driver);
+
+ debugfs_remove(mana_debugfs_root);
}
module_init(mana_driver_init);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 9d97cd281f18..c03558adda91 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -458,7 +458,8 @@ int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data,
map_id_full = be64_to_cpu(cbe->map_ptr);
map_id = map_id_full;
- if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
+ if (size_add(pkt_size, data_size) > INT_MAX ||
+ len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
return -EINVAL;
if (cbe->hdr.ver != NFP_CCM_ABI_VERSION)
return -EINVAL;
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 739707a7b40f..8a3959bb2360 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -16,7 +16,6 @@
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/ethtool.h>
-#include <linux/hwmon.h>
#include <linux/phy.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
@@ -5347,43 +5346,6 @@ static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
return false;
}
-static umode_t r8169_hwmon_is_visible(const void *drvdata,
- enum hwmon_sensor_types type,
- u32 attr, int channel)
-{
- return 0444;
-}
-
-static int r8169_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
- u32 attr, int channel, long *val)
-{
- struct rtl8169_private *tp = dev_get_drvdata(dev);
- int val_raw;
-
- val_raw = phy_read_paged(tp->phydev, 0xbd8, 0x12) & 0x3ff;
- if (val_raw >= 512)
- val_raw -= 1024;
-
- *val = 1000 * val_raw / 2;
-
- return 0;
-}
-
-static const struct hwmon_ops r8169_hwmon_ops = {
- .is_visible = r8169_hwmon_is_visible,
- .read = r8169_hwmon_read,
-};
-
-static const struct hwmon_channel_info * const r8169_hwmon_info[] = {
- HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT),
- NULL
-};
-
-static const struct hwmon_chip_info r8169_hwmon_chip_info = {
- .ops = &r8169_hwmon_ops,
- .info = r8169_hwmon_info,
-};
-
static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct rtl8169_private *tp;
@@ -5563,12 +5525,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
return rc;
- /* The temperature sensor is available from RTl8125B */
- if (IS_REACHABLE(CONFIG_HWMON) && tp->mac_version >= RTL_GIGA_MAC_VER_63)
- /* ignore errors */
- devm_hwmon_device_register_with_info(&pdev->dev, "nic_temp", tp,
- &r8169_hwmon_chip_info,
- NULL);
rc = register_netdev(dev);
if (rc)
return rc;
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index ac0f093f647a..bc395294a32d 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -2763,6 +2763,7 @@ static const struct ravb_hw_info ravb_rzv2m_hw_info = {
.net_features = NETIF_F_RXCSUM,
.stats_len = ARRAY_SIZE(ravb_gstrings_stats),
.tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3,
+ .tx_max_frame_size = SZ_2K,
.rx_max_frame_size = SZ_2K,
.rx_buffer_size = SZ_2K +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index 52e4e350b734..5cc72a91f220 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -127,15 +127,15 @@ struct cpsw_ale_dev_id {
static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits)
{
- int idx, idx2;
+ int idx, idx2, index;
u32 hi_val = 0;
idx = start / 32;
idx2 = (start + bits - 1) / 32;
/* Check if bits to be fetched exceed a word */
if (idx != idx2) {
- idx2 = 2 - idx2; /* flip */
- hi_val = ale_entry[idx2] << ((idx2 * 32) - start);
+ index = 2 - idx2; /* flip */
+ hi_val = ale_entry[index] << ((idx2 * 32) - start);
}
start -= idx * 32;
idx = 2 - idx; /* flip */
@@ -145,16 +145,16 @@ static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits)
static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits,
u32 value)
{
- int idx, idx2;
+ int idx, idx2, index;
value &= BITMASK(bits);
idx = start / 32;
idx2 = (start + bits - 1) / 32;
/* Check if bits to be set exceed a word */
if (idx != idx2) {
- idx2 = 2 - idx2; /* flip */
- ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32)));
- ale_entry[idx2] |= (value >> ((idx2 * 32) - start));
+ index = 2 - idx2; /* flip */
+ ale_entry[index] &= ~(BITMASK(bits + start - (idx2 * 32)));
+ ale_entry[index] |= (value >> ((idx2 * 32) - start));
}
start -= idx * 32;
idx = 2 - idx; /* flip */
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 0f4b02fe6f85..ae743991117c 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -2056,6 +2056,12 @@ axienet_ethtools_set_coalesce(struct net_device *ndev,
return -EBUSY;
}
+ if (ecoalesce->rx_max_coalesced_frames > 255 ||
+ ecoalesce->tx_max_coalesced_frames > 255) {
+ NL_SET_ERR_MSG(extack, "frames must be less than 256");
+ return -EINVAL;
+ }
+
if (ecoalesce->rx_max_coalesced_frames)
lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames;
if (ecoalesce->rx_coalesce_usecs)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 89a996ad8cd0..fbabada7d3ba 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -1524,8 +1524,8 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
goto out_encap;
}
- gn = net_generic(dev_net(dev), gtp_net_id);
- list_add_rcu(&gtp->list, &gn->gtp_dev_list);
+ gn = net_generic(src_net, gtp_net_id);
+ list_add(&gtp->list, &gn->gtp_dev_list);
dev->priv_destructor = gtp_destructor;
netdev_dbg(dev, "registered new GTP interface\n");
@@ -1551,7 +1551,7 @@ static void gtp_dellink(struct net_device *dev, struct list_head *head)
hlist_for_each_entry_safe(pctx, next, &gtp->tid_hash[i], hlist_tid)
pdp_context_delete(pctx);
- list_del_rcu(&gtp->list);
+ list_del(&gtp->list);
unregister_netdevice_queue(dev, head);
}
@@ -2271,16 +2271,19 @@ static int gtp_genl_dump_pdp(struct sk_buff *skb,
struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp;
int i, j, bucket = cb->args[0], skip = cb->args[1];
struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
struct pdp_ctx *pctx;
- struct gtp_net *gn;
-
- gn = net_generic(net, gtp_net_id);
if (cb->args[4])
return 0;
rcu_read_lock();
- list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) {
+ for_each_netdev_rcu(net, dev) {
+ if (dev->rtnl_link_ops != &gtp_link_ops)
+ continue;
+
+ gtp = netdev_priv(dev);
+
if (last_gtp && last_gtp != gtp)
continue;
else
@@ -2475,9 +2478,14 @@ static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list,
list_for_each_entry(net, net_list, exit_list) {
struct gtp_net *gn = net_generic(net, gtp_net_id);
- struct gtp_dev *gtp;
+ struct gtp_dev *gtp, *gtp_next;
+ struct net_device *dev;
+
+ for_each_netdev(net, dev)
+ if (dev->rtnl_link_ops == &gtp_link_ops)
+ gtp_dellink(dev, dev_to_kill);
- list_for_each_entry(gtp, &gn->gtp_dev_list, list)
+ list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list)
gtp_dellink(gtp->dev, dev_to_kill);
}
}
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 7246a910728d..3059435af596 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -684,7 +684,9 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs,
if (ret < 0)
return ret;
- mask = DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW;
+ val = 0;
+ mask = DW_VR_MII_DIG_CTRL1_2G5_EN | DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW;
+
if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
val = DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW;
diff --git a/drivers/net/pfcp.c b/drivers/net/pfcp.c
index 69434fd13f96..68d0d9e92a22 100644
--- a/drivers/net/pfcp.c
+++ b/drivers/net/pfcp.c
@@ -206,8 +206,8 @@ static int pfcp_newlink(struct net *net, struct net_device *dev,
goto exit_del_pfcp_sock;
}
- pn = net_generic(dev_net(dev), pfcp_net_id);
- list_add_rcu(&pfcp->list, &pn->pfcp_dev_list);
+ pn = net_generic(net, pfcp_net_id);
+ list_add(&pfcp->list, &pn->pfcp_dev_list);
netdev_dbg(dev, "registered new PFCP interface\n");
@@ -224,7 +224,7 @@ static void pfcp_dellink(struct net_device *dev, struct list_head *head)
{
struct pfcp_dev *pfcp = netdev_priv(dev);
- list_del_rcu(&pfcp->list);
+ list_del(&pfcp->list);
unregister_netdevice_queue(dev, head);
}
@@ -247,11 +247,16 @@ static int __net_init pfcp_net_init(struct net *net)
static void __net_exit pfcp_net_exit(struct net *net)
{
struct pfcp_net *pn = net_generic(net, pfcp_net_id);
- struct pfcp_dev *pfcp;
+ struct pfcp_dev *pfcp, *pfcp_next;
+ struct net_device *dev;
LIST_HEAD(list);
rtnl_lock();
- list_for_each_entry(pfcp, &pn->pfcp_dev_list, list)
+ for_each_netdev(net, dev)
+ if (dev->rtnl_link_ops == &pfcp_link_ops)
+ pfcp_dellink(dev, &list);
+
+ list_for_each_entry_safe(pfcp, pfcp_next, &pn->pfcp_dev_list, list)
pfcp_dellink(pfcp->dev, &list);
unregister_netdevice_many(&list);
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index c858270141bc..c39a426ebf52 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -174,12 +174,4 @@ static inline void sk_mark_napi_id_once(struct sock *sk,
#endif
}
-static inline void sk_mark_napi_id_once_xdp(struct sock *sk,
- const struct xdp_buff *xdp)
-{
-#ifdef CONFIG_NET_RX_BUSY_POLL
- __sk_mark_napi_id_once(sk, xdp->rxq->napi_id);
-#endif
-}
-
#endif /* _LINUX_NET_BUSY_POLL_H */
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 793e6fd78bc5..60a5347922be 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -294,7 +294,7 @@ static inline long page_pool_unref_page(struct page *page, long nr)
static inline void page_pool_ref_netmem(netmem_ref netmem)
{
- atomic_long_inc(&netmem_to_page(netmem)->pp_ref_count);
+ atomic_long_inc(netmem_get_pp_ref_count_ref(netmem));
}
static inline void page_pool_ref_page(struct page *page)
diff --git a/include/net/xdp.h b/include/net/xdp.h
index e6770dd40c91..b5b10f2b88e5 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -62,7 +62,6 @@ struct xdp_rxq_info {
u32 queue_index;
u32 reg_state;
struct xdp_mem_info mem;
- unsigned int napi_id;
u32 frag_size;
} ____cacheline_aligned; /* perf critical, avoid false-sharing */
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 40085afd9160..7a7316d9c0da 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -59,15 +59,6 @@ static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
xp_fill_cb(pool, desc);
}
-static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
-{
-#ifdef CONFIG_NET_RX_BUSY_POLL
- return pool->heads[0].xdp.rxq->napi_id;
-#else
- return 0;
-#endif
-}
-
static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
unsigned long attrs)
{
@@ -306,11 +297,6 @@ static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
{
}
-static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
-{
- return 0;
-}
-
static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
unsigned long attrs)
{
diff --git a/net/core/filter.c b/net/core/filter.c
index 834614071727..2fb45a86f3dd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -11251,6 +11251,7 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
struct sock_reuseport *reuse;
struct sock *selected_sk;
+ int err;
selected_sk = map->ops->map_lookup_elem(map, key);
if (!selected_sk)
@@ -11258,10 +11259,6 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
if (!reuse) {
- /* Lookup in sock_map can return TCP ESTABLISHED sockets. */
- if (sk_is_refcounted(selected_sk))
- sock_put(selected_sk);
-
/* reuseport_array has only sk with non NULL sk_reuseport_cb.
* The only (!reuse) case here is - the sk has already been
* unhashed (e.g. by close()), so treat it as -ENOENT.
@@ -11269,24 +11266,33 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
* Other maps (e.g. sock_map) do not provide this guarantee and
* the sk may never be in the reuseport group to begin with.
*/
- return is_sockarray ? -ENOENT : -EINVAL;
+ err = is_sockarray ? -ENOENT : -EINVAL;
+ goto error;
}
if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
struct sock *sk = reuse_kern->sk;
- if (sk->sk_protocol != selected_sk->sk_protocol)
- return -EPROTOTYPE;
- else if (sk->sk_family != selected_sk->sk_family)
- return -EAFNOSUPPORT;
-
- /* Catch all. Likely bound to a different sockaddr. */
- return -EBADFD;
+ if (sk->sk_protocol != selected_sk->sk_protocol) {
+ err = -EPROTOTYPE;
+ } else if (sk->sk_family != selected_sk->sk_family) {
+ err = -EAFNOSUPPORT;
+ } else {
+ /* Catch all. Likely bound to a different sockaddr. */
+ err = -EBADFD;
+ }
+ goto error;
}
reuse_kern->selected_sk = selected_sk;
return 0;
+error:
+ /* Lookup in sock_map can return TCP ESTABLISHED sockets. */
+ if (sk_is_refcounted(selected_sk))
+ sock_put(selected_sk);
+
+ return err;
}
static const struct bpf_func_proto sk_select_reuseport_proto = {
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index a89cbd8d87c3..996ac6a449eb 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -197,6 +197,16 @@ static const struct genl_multicast_group netdev_nl_mcgrps[] = {
[NETDEV_NLGRP_PAGE_POOL] = { "page-pool", },
};
+static void __netdev_nl_sock_priv_init(void *priv)
+{
+ netdev_nl_sock_priv_init(priv);
+}
+
+static void __netdev_nl_sock_priv_destroy(void *priv)
+{
+ netdev_nl_sock_priv_destroy(priv);
+}
+
struct genl_family netdev_nl_family __ro_after_init = {
.name = NETDEV_FAMILY_NAME,
.version = NETDEV_FAMILY_VERSION,
@@ -208,6 +218,6 @@ struct genl_family netdev_nl_family __ro_after_init = {
.mcgrps = netdev_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps),
.sock_priv_size = sizeof(struct list_head),
- .sock_priv_init = (void *)netdev_nl_sock_priv_init,
- .sock_priv_destroy = (void *)netdev_nl_sock_priv_destroy,
+ .sock_priv_init = __netdev_nl_sock_priv_init,
+ .sock_priv_destroy = __netdev_nl_sock_priv_destroy,
};
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 2e459b9d88eb..96a6ed37d4cc 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -627,6 +627,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
const struct net_device_ops *ops;
int err;
+ skb_queue_head_init(&np->skb_pool);
+
if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
np_err(np, "%s doesn't support polling, aborting\n",
ndev->name);
@@ -662,6 +664,9 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
strscpy(np->dev_name, ndev->name, IFNAMSIZ);
npinfo->netpoll = np;
+ /* fill up the skb queue */
+ refill_skbs(np);
+
/* last thing to do is link it to the net device structure */
rcu_assign_pointer(ndev->npinfo, npinfo);
@@ -681,8 +686,6 @@ int netpoll_setup(struct netpoll *np)
struct in_device *in_dev;
int err;
- skb_queue_head_init(&np->skb_pool);
-
rtnl_lock();
if (np->dev_name[0]) {
struct net *net = current->nsproxy->net_ns;
@@ -782,9 +785,6 @@ put_noaddr:
}
}
- /* fill up the skb queue */
- refill_skbs(np);
-
err = __netpoll_setup(np, ndev);
if (err)
goto flush;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7e23cacbe66e..4cb547fae91f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -851,6 +851,9 @@ static ssize_t get_imix_entries(const char __user *buffer,
unsigned long weight;
unsigned long size;
+ if (pkt_dev->n_imix_entries >= MAX_IMIX_ENTRIES)
+ return -E2BIG;
+
len = num_arg(&buffer[i], max_digits, &size);
if (len < 0)
return len;
@@ -880,9 +883,6 @@ static ssize_t get_imix_entries(const char __user *buffer,
i++;
pkt_dev->n_imix_entries++;
-
- if (pkt_dev->n_imix_entries > MAX_IMIX_ENTRIES)
- return -E2BIG;
} while (c == ' ');
return i;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index bcc5551c6424..2315feed94ef 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -186,7 +186,6 @@ int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
xdp_rxq_info_init(xdp_rxq);
xdp_rxq->dev = dev;
xdp_rxq->queue_index = queue_index;
- xdp_rxq->napi_id = napi_id;
xdp_rxq->frag_size = frag_size;
xdp_rxq->reg_state = REG_STATE_REGISTERED;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0fbec3509618..e1564b95fab0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2445,6 +2445,7 @@ martian_destination:
net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
&daddr, &saddr, dev->name);
#endif
+ goto out;
e_nobufs:
reason = SKB_DROP_REASON_NOMEM;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e8953e88efef..86d282618515 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -533,7 +533,7 @@ begin:
return NULL;
}
-/* In hash4, rehash can happen in connect(), where hash4_cnt keeps unchanged. */
+/* udp_rehash4() only checks hslot4, and hash4_cnt is not processed. */
static void udp_rehash4(struct udp_table *udptable, struct sock *sk,
u16 newhash4)
{
@@ -582,15 +582,13 @@ void udp_lib_hash4(struct sock *sk, u16 hash)
struct net *net = sock_net(sk);
struct udp_table *udptable;
- /* Connected udp socket can re-connect to another remote address,
- * so rehash4 is needed.
+ /* Connected udp socket can re-connect to another remote address, which
+ * will be handled by rehash. Thus no need to redo hash4 here.
*/
- udptable = net->ipv4.udp_table;
- if (udp_hashed4(sk)) {
- udp_rehash4(udptable, sk, hash);
+ if (udp_hashed4(sk))
return;
- }
+ udptable = net->ipv4.udp_table;
hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash);
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
hslot4 = udp_hashslot4(udptable, hash);
@@ -2173,14 +2171,14 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4)
struct udp_table *udptable = udp_get_table_prot(sk);
struct udp_hslot *hslot, *hslot2, *nhslot2;
+ hslot = udp_hashslot(udptable, sock_net(sk),
+ udp_sk(sk)->udp_port_hash);
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
nhslot2 = udp_hashslot2(udptable, newhash);
udp_sk(sk)->udp_portaddr_hash = newhash;
if (hslot2 != nhslot2 ||
rcu_access_pointer(sk->sk_reuseport_cb)) {
- hslot = udp_hashslot(udptable, sock_net(sk),
- udp_sk(sk)->udp_port_hash);
/* we must lock primary chain too */
spin_lock_bh(&hslot->lock);
if (rcu_access_pointer(sk->sk_reuseport_cb))
@@ -2199,19 +2197,29 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4)
spin_unlock(&nhslot2->lock);
}
- if (udp_hashed4(sk)) {
- udp_rehash4(udptable, sk, newhash4);
+ spin_unlock_bh(&hslot->lock);
+ }
+
+ /* Now process hash4 if necessary:
+ * (1) update hslot4;
+ * (2) update hslot2->hash4_cnt.
+ * Note that hslot2/hslot4 should be checked separately, as
+ * either of them may change with the other unchanged.
+ */
+ if (udp_hashed4(sk)) {
+ spin_lock_bh(&hslot->lock);
- if (hslot2 != nhslot2) {
- spin_lock(&hslot2->lock);
- udp_hash4_dec(hslot2);
- spin_unlock(&hslot2->lock);
+ udp_rehash4(udptable, sk, newhash4);
+ if (hslot2 != nhslot2) {
+ spin_lock(&hslot2->lock);
+ udp_hash4_dec(hslot2);
+ spin_unlock(&hslot2->lock);
- spin_lock(&nhslot2->lock);
- udp_hash4_inc(nhslot2);
- spin_unlock(&nhslot2->lock);
- }
+ spin_lock(&nhslot2->lock);
+ udp_hash4_inc(nhslot2);
+ spin_unlock(&nhslot2->lock);
}
+
spin_unlock_bh(&hslot->lock);
}
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index a62bc874bf1e..123f3f297284 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -607,7 +607,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
}
opts->ext_copy.use_ack = 1;
opts->suboptions = OPTION_MPTCP_DSS;
- WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
/* Add kind/length/subtype/flag overhead if mapping is not populated */
if (dss_size == 0)
@@ -1288,7 +1287,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
}
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
}
- return;
+ goto update_wspace;
}
if (rcv_wnd_new != rcv_wnd_old) {
@@ -1313,6 +1312,9 @@ raise_win:
th->window = htons(new_win);
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED);
}
+
+update_wspace:
+ WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
}
__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index a93e661ef5c4..73526f1d768f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -760,10 +760,15 @@ static inline u64 mptcp_data_avail(const struct mptcp_sock *msk)
static inline bool mptcp_epollin_ready(const struct sock *sk)
{
+ u64 data_avail = mptcp_data_avail(mptcp_sk(sk));
+
+ if (!data_avail)
+ return false;
+
/* mptcp doesn't have to deal with small skbs in the receive queue,
- * at it can always coalesce them
+ * as it can always coalesce them
*/
- return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) ||
+ return (data_avail >= sk->sk_rcvlowat) ||
(mem_cgroup_sockets_enabled && sk->sk_memcg &&
mem_cgroup_under_socket_pressure(sk->sk_memcg)) ||
READ_ONCE(tcp_memory_pressure);
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index ef0f8f73826f..4e0842df5234 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -289,6 +289,7 @@ enum {
ncsi_dev_state_config_sp = 0x0301,
ncsi_dev_state_config_cis,
ncsi_dev_state_config_oem_gma,
+ ncsi_dev_state_config_apply_mac,
ncsi_dev_state_config_clear_vids,
ncsi_dev_state_config_svf,
ncsi_dev_state_config_ev,
@@ -322,6 +323,7 @@ struct ncsi_dev_priv {
#define NCSI_DEV_RESHUFFLE 4
#define NCSI_DEV_RESET 8 /* Reset state of NC */
unsigned int gma_flag; /* OEM GMA flag */
+ struct sockaddr pending_mac; /* MAC address received from GMA */
spinlock_t lock; /* Protect the NCSI device */
unsigned int package_probe_id;/* Current ID during probe */
unsigned int package_num; /* Number of packages */
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 5cf55bde366d..bf276eaf9330 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -1038,7 +1038,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
: ncsi_dev_state_config_clear_vids;
break;
case ncsi_dev_state_config_oem_gma:
- nd->state = ncsi_dev_state_config_clear_vids;
+ nd->state = ncsi_dev_state_config_apply_mac;
nca.package = np->id;
nca.channel = nc->id;
@@ -1050,10 +1050,22 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
nca.type = NCSI_PKT_CMD_OEM;
ret = ncsi_gma_handler(&nca, nc->version.mf_id);
}
- if (ret < 0)
+ if (ret < 0) {
+ nd->state = ncsi_dev_state_config_clear_vids;
schedule_work(&ndp->work);
+ }
break;
+ case ncsi_dev_state_config_apply_mac:
+ rtnl_lock();
+ ret = dev_set_mac_address(dev, &ndp->pending_mac, NULL);
+ rtnl_unlock();
+ if (ret < 0)
+ netdev_warn(dev, "NCSI: 'Writing MAC address to device failed\n");
+
+ nd->state = ncsi_dev_state_config_clear_vids;
+
+ fallthrough;
case ncsi_dev_state_config_clear_vids:
case ncsi_dev_state_config_svf:
case ncsi_dev_state_config_ev:
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index e28be33bdf2c..14bd66909ca4 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -628,16 +628,14 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id)
{
struct ncsi_dev_priv *ndp = nr->ndp;
+ struct sockaddr *saddr = &ndp->pending_mac;
struct net_device *ndev = ndp->ndev.dev;
struct ncsi_rsp_oem_pkt *rsp;
- struct sockaddr saddr;
u32 mac_addr_off = 0;
- int ret = 0;
/* Get the response header */
rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
- saddr.sa_family = ndev->type;
ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
if (mfr_id == NCSI_OEM_MFR_BCM_ID)
mac_addr_off = BCM_MAC_ADDR_OFFSET;
@@ -646,22 +644,17 @@ static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id)
else if (mfr_id == NCSI_OEM_MFR_INTEL_ID)
mac_addr_off = INTEL_MAC_ADDR_OFFSET;
- memcpy(saddr.sa_data, &rsp->data[mac_addr_off], ETH_ALEN);
+ saddr->sa_family = ndev->type;
+ memcpy(saddr->sa_data, &rsp->data[mac_addr_off], ETH_ALEN);
if (mfr_id == NCSI_OEM_MFR_BCM_ID || mfr_id == NCSI_OEM_MFR_INTEL_ID)
- eth_addr_inc((u8 *)saddr.sa_data);
- if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+ eth_addr_inc((u8 *)saddr->sa_data);
+ if (!is_valid_ether_addr((const u8 *)saddr->sa_data))
return -ENXIO;
/* Set the flag for GMA command which should only be called once */
ndp->gma_flag = 1;
- rtnl_lock();
- ret = dev_set_mac_address(ndev, &saddr, NULL);
- rtnl_unlock();
- if (ret < 0)
- netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
-
- return ret;
+ return 0;
}
/* Response handler for Mellanox card */
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 16e260014684..704c858cf209 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -934,7 +934,9 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
{
struct vport *vport = ovs_vport_rcu(dp, out_port);
- if (likely(vport && netif_carrier_ok(vport->dev))) {
+ if (likely(vport &&
+ netif_running(vport->dev) &&
+ netif_carrier_ok(vport->dev))) {
u16 mru = OVS_CB(skb)->mru;
u32 cutlen = OVS_CB(skb)->cutlen;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 5cf8109f672a..fa9d1b49599b 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -491,6 +491,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
*/
vsk->transport->release(vsk);
vsock_deassign_transport(vsk);
+
+ /* transport's release() and destruct() can touch some socket
+ * state, since we are reassigning the socket to a new transport
+ * during vsock_connect(), let's reset these fields to have a
+ * clean state.
+ */
+ sock_reset_flag(sk, SOCK_DONE);
+ sk->sk_state = TCP_CLOSE;
+ vsk->peer_shutdown = 0;
}
/* We increase the module refcnt to prevent the transport unloading
@@ -870,6 +879,9 @@ EXPORT_SYMBOL_GPL(vsock_create_connected);
s64 vsock_stream_has_data(struct vsock_sock *vsk)
{
+ if (WARN_ON(!vsk->transport))
+ return 0;
+
return vsk->transport->stream_has_data(vsk);
}
EXPORT_SYMBOL_GPL(vsock_stream_has_data);
@@ -878,6 +890,9 @@ s64 vsock_connectible_has_data(struct vsock_sock *vsk)
{
struct sock *sk = sk_vsock(vsk);
+ if (WARN_ON(!vsk->transport))
+ return 0;
+
if (sk->sk_type == SOCK_SEQPACKET)
return vsk->transport->seqpacket_has_data(vsk);
else
@@ -887,6 +902,9 @@ EXPORT_SYMBOL_GPL(vsock_connectible_has_data);
s64 vsock_stream_has_space(struct vsock_sock *vsk)
{
+ if (WARN_ON(!vsk->transport))
+ return 0;
+
return vsk->transport->stream_has_space(vsk);
}
EXPORT_SYMBOL_GPL(vsock_stream_has_space);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 9acc13ab3f82..7f7de6d88096 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -26,6 +26,9 @@
/* Threshold for detecting small packets to copy */
#define GOOD_COPY_LEN 128
+static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
+ bool cancel_timeout);
+
static const struct virtio_transport *
virtio_transport_get_ops(struct vsock_sock *vsk)
{
@@ -1109,6 +1112,8 @@ void virtio_transport_destruct(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
+ virtio_transport_cancel_close_work(vsk, true);
+
kfree(vvs);
vsk->trans = NULL;
}
@@ -1204,17 +1209,11 @@ static void virtio_transport_wait_close(struct sock *sk, long timeout)
}
}
-static void virtio_transport_do_close(struct vsock_sock *vsk,
- bool cancel_timeout)
+static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
+ bool cancel_timeout)
{
struct sock *sk = sk_vsock(vsk);
- sock_set_flag(sk, SOCK_DONE);
- vsk->peer_shutdown = SHUTDOWN_MASK;
- if (vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = TCP_CLOSING;
- sk->sk_state_change(sk);
-
if (vsk->close_work_scheduled &&
(!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
vsk->close_work_scheduled = false;
@@ -1226,6 +1225,20 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
}
}
+static void virtio_transport_do_close(struct vsock_sock *vsk,
+ bool cancel_timeout)
+{
+ struct sock *sk = sk_vsock(vsk);
+
+ sock_set_flag(sk, SOCK_DONE);
+ vsk->peer_shutdown = SHUTDOWN_MASK;
+ if (vsock_stream_has_data(vsk) <= 0)
+ sk->sk_state = TCP_CLOSING;
+ sk->sk_state_change(sk);
+
+ virtio_transport_cancel_close_work(vsk, cancel_timeout);
+}
+
static void virtio_transport_close_timeout(struct work_struct *work)
{
struct vsock_sock *vsk =
@@ -1628,8 +1641,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
lock_sock(sk);
- /* Check if sk has been closed before lock_sock */
- if (sock_flag(sk, SOCK_DONE)) {
+ /* Check if sk has been closed or assigned to another transport before
+ * lock_sock (note: listener sockets are not assigned to any transport)
+ */
+ if (sock_flag(sk, SOCK_DONE) ||
+ (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) {
(void)virtio_transport_reset_no_sock(t, skb);
release_sock(sk);
sock_put(sk);
diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c
index 4aa6e74ec295..f201d9eca1df 100644
--- a/net/vmw_vsock/vsock_bpf.c
+++ b/net/vmw_vsock/vsock_bpf.c
@@ -77,6 +77,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
size_t len, int flags, int *addr_len)
{
struct sk_psock *psock;
+ struct vsock_sock *vsk;
int copied;
psock = sk_psock_get(sk);
@@ -84,6 +85,13 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
return __vsock_recvmsg(sk, msg, len, flags);
lock_sock(sk);
+ vsk = vsock_sk(sk);
+
+ if (!vsk->transport) {
+ copied = -ENODEV;
+ goto out;
+ }
+
if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) {
release_sock(sk);
sk_psock_put(sk, psock);
@@ -108,6 +116,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
}
+out:
release_sock(sk);
sk_psock_put(sk, psock);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3fa70286c846..89d2bef96469 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -322,7 +322,6 @@ static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return -ENOSPC;
}
- sk_mark_napi_id_once_xdp(&xs->sk, xdp);
return 0;
}
@@ -908,11 +907,8 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len
if (unlikely(!xs->tx))
return -ENOBUFS;
- if (sk_can_busy_loop(sk)) {
- if (xs->zc)
- __sk_mark_napi_id_once(sk, xsk_pool_get_napi_id(xs->pool));
+ if (sk_can_busy_loop(sk))
sk_busy_loop(sk, 1); /* only support non-blocking sockets */
- }
if (xs->zc && xsk_no_wakeup(sk))
return 0;
@@ -1298,6 +1294,14 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->queue_id = qid;
xp_add_xsk(xs->pool, xs);
+ if (xs->zc && qid < dev->real_num_rx_queues) {
+ struct netdev_rx_queue *rxq;
+
+ rxq = __netif_get_rx_queue(dev, qid);
+ if (rxq->napi)
+ __sk_mark_napi_id_once(sk, rxq->napi->napi_id);
+ }
+
out_unlock:
if (err) {
dev_put(dev);
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index d8201c4b1520..6750fdb42564 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -2384,6 +2384,17 @@ def print_kernel_family_struct_src(family, cw):
if not kernel_can_gen_family_struct(family):
return
+ if 'sock-priv' in family.kernel_family:
+ # Generate "trampolines" to make CFI happy
+ cw.write_func("static void", f"__{family.c_name}_nl_sock_priv_init",
+ [f"{family.c_name}_nl_sock_priv_init(priv);"],
+ ["void *priv"])
+ cw.nl()
+ cw.write_func("static void", f"__{family.c_name}_nl_sock_priv_destroy",
+ [f"{family.c_name}_nl_sock_priv_destroy(priv);"],
+ ["void *priv"])
+ cw.nl()
+
cw.block_start(f"struct genl_family {family.ident_name}_nl_family __ro_after_init =")
cw.p('.name\t\t= ' + family.fam_key + ',')
cw.p('.version\t= ' + family.ver_key + ',')
@@ -2401,9 +2412,8 @@ def print_kernel_family_struct_src(family, cw):
cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.c_name}_nl_mcgrps),')
if 'sock-priv' in family.kernel_family:
cw.p(f'.sock_priv_size\t= sizeof({family.kernel_family["sock-priv"]}),')
- # Force cast here, actual helpers take pointer to the real type.
- cw.p(f'.sock_priv_init\t= (void *){family.c_name}_nl_sock_priv_init,')
- cw.p(f'.sock_priv_destroy = (void *){family.c_name}_nl_sock_priv_destroy,')
+ cw.p(f'.sock_priv_init\t= __{family.c_name}_nl_sock_priv_init,')
+ cw.p(f'.sock_priv_destroy = __{family.c_name}_nl_sock_priv_destroy,')
cw.block_end(';')
diff --git a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
index fd13c8cfb7a8..b411fe66510f 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
@@ -58,9 +58,12 @@ for root in mq mqprio; do
ethtool -L $NDEV combined 4
n_child_assert 4 "One real queue, rest default"
- # Graft some
- tcq replace parent 100:1 handle 204:
- n_child_assert 3 "Grafted"
+ # Remove real one
+ tcq del parent 100:4 handle 204:
+
+ # Replace default with pfifo
+ tcq replace parent 100:1 handle 205: pfifo limit 1000
+ n_child_assert 3 "Deleting real one, replacing default one with pfifo"
ethtool -L $NDEV combined 1
n_child_assert 1 "Grafted, one"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 4209b9569039..414addef9a45 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -25,6 +25,8 @@
#include <sys/types.h>
#include <sys/mman.h>
+#include <arpa/inet.h>
+
#include <netdb.h>
#include <netinet/in.h>
@@ -1211,23 +1213,42 @@ static void parse_setsock_options(const char *name)
exit(1);
}
-void xdisconnect(int fd, int addrlen)
+void xdisconnect(int fd)
{
- struct sockaddr_storage empty;
+ socklen_t addrlen = sizeof(struct sockaddr_storage);
+ struct sockaddr_storage addr, empty;
int msec_sleep = 10;
- int queued = 1;
- int i;
+ void *raw_addr;
+ int i, cmdlen;
+ char cmd[128];
+
+ /* get the local address and convert it to string */
+ if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0)
+ xerror("getsockname");
+
+ if (addr.ss_family == AF_INET)
+ raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr);
+ else if (addr.ss_family == AF_INET6)
+ raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr);
+ else
+ xerror("bad family");
+
+ strcpy(cmd, "ss -M | grep -q ");
+ cmdlen = strlen(cmd);
+ if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen],
+ sizeof(cmd) - cmdlen))
+ xerror("inet_ntop");
shutdown(fd, SHUT_WR);
- /* while until the pending data is completely flushed, the later
+ /*
+ * wait until the pending data is completely flushed and all
+ * the MPTCP sockets reached the closed status.
* disconnect will bypass/ignore/drop any pending data.
*/
for (i = 0; ; i += msec_sleep) {
- if (ioctl(fd, SIOCOUTQ, &queued) < 0)
- xerror("can't query out socket queue: %d", errno);
-
- if (!queued)
+ /* closed socket are not listed by 'ss' */
+ if (system(cmd) != 0)
break;
if (i > poll_timeout)
@@ -1281,9 +1302,9 @@ again:
return ret;
if (cfg_truncate > 0) {
- xdisconnect(fd, peer->ai_addrlen);
+ xdisconnect(fd);
} else if (--cfg_repeat > 0) {
- xdisconnect(fd, peer->ai_addrlen);
+ xdisconnect(fd);
/* the socket could be unblocking at this point, we need the
* connect to be blocking