diff options
432 files changed, 13338 insertions, 4861 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 4150f74c521a..f86b5e1623c6 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -365,6 +365,15 @@ new netns has been created. Default : 0 (for compatibility reasons) +txrehash +-------- + +Controls default hash rethink behaviour on listening socket when SO_TXREHASH +option is set to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt). + +If set to 1 (default), hash rethink is performed on listening socket. +If set to 0, hash rethink is not performed. + 2. /proc/sys/net/unix - Parameters for Unix domain sockets ---------------------------------------------------------- diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst index 1ebf4c5c7ddc..ab08852e53ae 100644 --- a/Documentation/bpf/btf.rst +++ b/Documentation/bpf/btf.rst @@ -565,18 +565,15 @@ A map can be created with ``btf_fd`` and specified key/value type id.:: In libbpf, the map can be defined with extra annotation like below: :: - struct bpf_map_def SEC("maps") btf_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(struct ipv_counts), - .max_entries = 4, - }; - BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); + struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct ipv_counts); + __uint(max_entries, 4); + } btf_map SEC(".maps"); -Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and -value types for the map. During ELF parsing, libbpf is able to extract -key/value type_id's and assign them to BPF_MAP_CREATE attributes -automatically. +During ELF parsing, libbpf is able to extract key/value type_id's and assign +them to BPF_MAP_CREATE attributes automatically. .. _BPF_Prog_Load: @@ -824,13 +821,12 @@ structure has bitfields. For example, for the following map,:: ___A b1:4; enum A b2:4; }; - struct bpf_map_def SEC("maps") tmpmap = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(struct tmp_t), - .max_entries = 1, - }; - BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t); + struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct tmp_t); + __uint(max_entries, 1); + } tmpmap SEC(".maps"); bpftool is able to pretty print like below: :: diff --git a/Documentation/devicetree/bindings/net/cdns,macb.yaml b/Documentation/devicetree/bindings/net/cdns,macb.yaml index 8dd06db34169..6cd3d853dcba 100644 --- a/Documentation/devicetree/bindings/net/cdns,macb.yaml +++ b/Documentation/devicetree/bindings/net/cdns,macb.yaml @@ -81,6 +81,25 @@ properties: phy-handle: true + phys: + maxItems: 1 + + phy-names: + const: sgmii-phy + description: + Required with ZynqMP SoC when in SGMII mode. + Should reference PS-GTR generic PHY device for this controller + instance. See ZynqMP example. + + resets: + maxItems: 1 + description: + Recommended with ZynqMP, specify reset control for this + controller instance with zynqmp-reset driver. + + reset-names: + maxItems: 1 + fixed-link: true iommus: @@ -157,3 +176,40 @@ examples: reset-gpios = <&pioE 6 1>; }; }; + + - | + #include <dt-bindings/clock/xlnx-zynqmp-clk.h> + #include <dt-bindings/power/xlnx-zynqmp-power.h> + #include <dt-bindings/reset/xlnx-zynqmp-resets.h> + #include <dt-bindings/phy/phy.h> + + bus { + #address-cells = <2>; + #size-cells = <2>; + gem1: ethernet@ff0c0000 { + compatible = "cdns,zynqmp-gem", "cdns,gem"; + interrupt-parent = <&gic>; + interrupts = <0 59 4>, <0 59 4>; + reg = <0x0 0xff0c0000 0x0 0x1000>; + clocks = <&zynqmp_clk LPD_LSBUS>, <&zynqmp_clk GEM1_REF>, + <&zynqmp_clk GEM1_TX>, <&zynqmp_clk GEM1_RX>, + <&zynqmp_clk GEM_TSU>; + clock-names = "pclk", "hclk", "tx_clk", "rx_clk", "tsu_clk"; + #address-cells = <1>; + #size-cells = <0>; + #stream-id-cells = <1>; + iommus = <&smmu 0x875>; + power-domains = <&zynqmp_firmware PD_ETH_1>; + resets = <&zynqmp_reset ZYNQMP_RESET_GEM1>; + reset-names = "gem1_rst"; + status = "okay"; + phy-mode = "sgmii"; + phy-names = "sgmii-phy"; + phys = <&psgtr 1 PHY_TYPE_SGMII 1 1>; + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml index 84985f53bffd..184152087b60 100644 --- a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml +++ b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml @@ -42,6 +42,12 @@ properties: description: Set if the output SYNCLKO frequency should be set to 125MHz instead of 25MHz. + microchip,synclko-disable: + $ref: /schemas/types.yaml#/definitions/flag + description: + Set if the output SYNCLKO clock should be disabled. Do not mix with + microchip,synclko-125. + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt index 020337f3c05f..801efc7d6818 100644 --- a/Documentation/devicetree/bindings/net/fsl-fman.txt +++ b/Documentation/devicetree/bindings/net/fsl-fman.txt @@ -388,14 +388,24 @@ PROPERTIES Value type: <prop-encoded-array> Definition: A standard property. -- bus-frequency +- clocks + Usage: optional + Value type: <phandle> + Definition: A reference to the input clock of the controller + from which the MDC frequency is derived. + +- clock-frequency Usage: optional Value type: <u32> - Definition: Specifies the external MDIO bus clock speed to - be used, if different from the standard 2.5 MHz. - This may be due to the standard speed being unsupported (e.g. - due to a hardware problem), or to advertise that all relevant - components in the system support a faster speed. + Definition: Specifies the external MDC frequency, in Hertz, to + be used. Requires that the input clock is specified in the + "clocks" property. See also: mdio.yaml. + +- suppress-preamble + Usage: optional + Value type: <boolean> + Definition: Disable generation of preamble bits. See also: + mdio.yaml. - interrupts Usage: required for external MDIO diff --git a/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml b/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml index e79e4e166ad8..13812768b923 100644 --- a/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml +++ b/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml @@ -38,6 +38,7 @@ properties: - description: register based extraction - description: frame dma based extraction - description: analyzer interrupt + - description: ptp interrupt interrupt-names: minItems: 1 @@ -45,6 +46,7 @@ properties: - const: xtr - const: fdma - const: ana + - const: ptp resets: items: diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 9d98e0511249..cae28af7a476 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -860,8 +860,16 @@ Kernel response contents: ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring + ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` u8 TCP header / data split ==================================== ====== =========================== +``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with +page-flipping TCP zero-copy receive (``getsockopt(TCP_ZEROCOPY_RECEIVE)``). +If enabled the device is configured to place frame headers and data into +separate buffers. The device configuration must make it possible to receive +full memory pages of data, for example because MTU is high enough or through +HW-GRO. + RINGS_SET ========= diff --git a/MAINTAINERS b/MAINTAINERS index bb83dcbcb619..c7169da15ab4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16351,8 +16351,7 @@ REALTEK RTL83xx SMI DSA ROUTER CHIPS M: Linus Walleij <linus.walleij@linaro.org> S: Maintained F: Documentation/devicetree/bindings/net/dsa/realtek-smi.txt -F: drivers/net/dsa/realtek-smi* -F: drivers/net/dsa/rtl83* +F: drivers/net/dsa/realtek/* REALTEK WIRELESS DRIVER (rtlwifi family) M: Ping-Ke Shih <pkshih@realtek.com> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 284d28755b8d..7d81535893af 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -133,6 +133,8 @@ #define SO_RESERVE_MEM 73 +#define SO_TXREHASH 74 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index 74e66443e4ce..9bec3ba20c69 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -512,6 +512,8 @@ #stream-id-cells = <1>; iommus = <&smmu 0x874>; power-domains = <&zynqmp_firmware PD_ETH_0>; + resets = <&zynqmp_reset ZYNQMP_RESET_GEM0>; + reset-names = "gem0_rst"; }; gem1: ethernet@ff0c0000 { @@ -526,6 +528,8 @@ #stream-id-cells = <1>; iommus = <&smmu 0x875>; power-domains = <&zynqmp_firmware PD_ETH_1>; + resets = <&zynqmp_reset ZYNQMP_RESET_GEM1>; + reset-names = "gem1_rst"; }; gem2: ethernet@ff0d0000 { @@ -540,6 +544,8 @@ #stream-id-cells = <1>; iommus = <&smmu 0x876>; power-domains = <&zynqmp_firmware PD_ETH_2>; + resets = <&zynqmp_reset ZYNQMP_RESET_GEM2>; + reset-names = "gem2_rst"; }; gem3: ethernet@ff0e0000 { @@ -554,6 +560,8 @@ #stream-id-cells = <1>; iommus = <&smmu 0x877>; power-domains = <&zynqmp_firmware PD_ETH_3>; + resets = <&zynqmp_reset ZYNQMP_RESET_GEM3>; + reset-names = "gem3_rst"; }; gpio: gpio@ff0a0000 { diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 24e0efb360f6..1d55e57b8466 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -144,6 +144,8 @@ #define SO_RESERVE_MEM 73 +#define SO_TXREHASH 74 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 845ddc63c882..654061e0964e 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -125,6 +125,8 @@ #define SO_RESERVE_MEM 0x4047 +#define SO_TXREHASH 0x4048 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 2672dd03faf3..666f81e617ea 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -126,6 +126,8 @@ #define SO_RESERVE_MEM 0x0052 +#define SO_TXREHASH 0x0053 + #if !defined(__KERNEL__) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 1a4f8b227eac..06514ed66022 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2428,10 +2428,15 @@ static int btintel_setup_combined(struct hci_dev *hdev) /* Apply the device specific HCI quirks * - * WBS for SdP - SdP and Stp have a same hw_varaint but - * different fw_variant + * WBS for SdP - For the Legacy ROM products, only SdP + * supports the WBS. But the version information is not + * enough to use here because the StP2 and SdP have same + * hw_variant and fw_variant. So, this flag is set by + * the transport driver (btusb) based on the HW info + * (idProduct) */ - if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22) + if (!btintel_test_flag(hdev, + INTEL_ROM_LEGACY_NO_WBS_SUPPORT)) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h index c9b24e9299e2..e0060e58573c 100644 --- a/drivers/bluetooth/btintel.h +++ b/drivers/bluetooth/btintel.h @@ -152,6 +152,7 @@ enum { INTEL_BROKEN_INITIAL_NCMD, INTEL_BROKEN_SHUTDOWN_LED, INTEL_ROM_LEGACY, + INTEL_ROM_LEGACY_NO_WBS_SUPPORT, __INTEL_NUM_FLAGS, }; diff --git a/drivers/bluetooth/btmrvl_debugfs.c b/drivers/bluetooth/btmrvl_debugfs.c index c4867576be00..db35b917aecf 100644 --- a/drivers/bluetooth/btmrvl_debugfs.c +++ b/drivers/bluetooth/btmrvl_debugfs.c @@ -1,4 +1,4 @@ -/** +/* * Marvell Bluetooth driver: debugfs related functions * * Copyright (C) 2009, Marvell International Ltd. diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c index 68378b42ea7f..b8ef66f89fc1 100644 --- a/drivers/bluetooth/btmrvl_sdio.c +++ b/drivers/bluetooth/btmrvl_sdio.c @@ -1,4 +1,4 @@ -/** +/* * Marvell BT-over-SDIO driver: SDIO interface related functions. * * Copyright (C) 2009, Marvell International Ltd. diff --git a/drivers/bluetooth/btmtk.h b/drivers/bluetooth/btmtk.h index 6e7b0c7567c0..fb76d9765ce0 100644 --- a/drivers/bluetooth/btmtk.h +++ b/drivers/bluetooth/btmtk.h @@ -7,8 +7,12 @@ #define HCI_WMT_MAX_EVENT_SIZE 64 +#define BTMTK_WMT_REG_WRITE 0x1 #define BTMTK_WMT_REG_READ 0x2 +#define MT7921_PINMUX_0 0x70005050 +#define MT7921_PINMUX_1 0x70005054 + enum { BTMTK_WMT_PATCH_DWNLD = 0x1, BTMTK_WMT_TEST = 0x2, @@ -68,6 +72,37 @@ struct btmtk_tci_sleep { u8 time_compensation; } __packed; +struct btmtk_wakeon { + u8 mode; + u8 gpo; + u8 active_high; + __le16 enable_delay; + __le16 wakeup_delay; +} __packed; + +struct btmtk_sco { + u8 clock_config; + u8 transmit_format_config; + u8 channel_format_config; + u8 channel_select_config; +} __packed; + +struct reg_read_cmd { + u8 type; + u8 rsv; + u8 num; + __le32 addr; +} __packed; + +struct reg_write_cmd { + u8 type; + u8 rsv; + u8 num; + __le32 addr; + __le32 data; + __le32 mask; +} __packed; + struct btmtk_hci_wmt_params { u8 op; u8 flag; diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index b5ea8d3bffaa..8be763ab3bf4 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -31,28 +31,32 @@ #define VERSION "0.1" -#define MTKBTSDIO_AUTOSUSPEND_DELAY 8000 +#define MTKBTSDIO_AUTOSUSPEND_DELAY 1000 -static bool enable_autosuspend; +static bool enable_autosuspend = true; struct btmtksdio_data { const char *fwname; u16 chipid; + bool lp_mbox_supported; }; static const struct btmtksdio_data mt7663_data = { .fwname = FIRMWARE_MT7663, .chipid = 0x7663, + .lp_mbox_supported = false, }; static const struct btmtksdio_data mt7668_data = { .fwname = FIRMWARE_MT7668, .chipid = 0x7668, + .lp_mbox_supported = false, }; static const struct btmtksdio_data mt7921_data = { .fwname = FIRMWARE_MT7961, .chipid = 0x7921, + .lp_mbox_supported = true, }; static const struct sdio_device_id btmtksdio_table[] = { @@ -87,8 +91,17 @@ MODULE_DEVICE_TABLE(sdio, btmtksdio_table); #define RX_DONE_INT BIT(1) #define TX_EMPTY BIT(2) #define TX_FIFO_OVERFLOW BIT(8) +#define FW_MAILBOX_INT BIT(15) +#define INT_MASK GENMASK(15, 0) #define RX_PKT_LEN GENMASK(31, 16) +#define MTK_REG_CSICR 0xc0 +#define CSICR_CLR_MBOX_ACK BIT(0) +#define MTK_REG_PH2DSM0R 0xc4 +#define PH2DSM0R_DRIVER_OWN BIT(0) +#define MTK_REG_PD2HRM0R 0xdc +#define PD2HRM0R_DRV_OWN BIT(0) + #define MTK_REG_CTDR 0x18 #define MTK_REG_CRDR 0x1c @@ -100,6 +113,7 @@ MODULE_DEVICE_TABLE(sdio, btmtksdio_table); #define BTMTKSDIO_TX_WAIT_VND_EVT 1 #define BTMTKSDIO_HW_TX_READY 2 #define BTMTKSDIO_FUNC_ENABLED 3 +#define BTMTKSDIO_PATCH_ENABLED 4 struct mtkbtsdio_hdr { __le16 len; @@ -278,6 +292,78 @@ static u32 btmtksdio_drv_own_query(struct btmtksdio_dev *bdev) return sdio_readl(bdev->func, MTK_REG_CHLPCR, NULL); } +static u32 btmtksdio_drv_own_query_79xx(struct btmtksdio_dev *bdev) +{ + return sdio_readl(bdev->func, MTK_REG_PD2HRM0R, NULL); +} + +static int btmtksdio_fw_pmctrl(struct btmtksdio_dev *bdev) +{ + u32 status; + int err; + + sdio_claim_host(bdev->func); + + if (bdev->data->lp_mbox_supported && + test_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state)) { + sdio_writel(bdev->func, CSICR_CLR_MBOX_ACK, MTK_REG_CSICR, + &err); + err = readx_poll_timeout(btmtksdio_drv_own_query_79xx, bdev, + status, !(status & PD2HRM0R_DRV_OWN), + 2000, 1000000); + if (err < 0) { + bt_dev_err(bdev->hdev, "mailbox ACK not cleared"); + goto out; + } + } + + /* Return ownership to the device */ + sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err); + if (err < 0) + goto out; + + err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, + !(status & C_COM_DRV_OWN), 2000, 1000000); + +out: + sdio_release_host(bdev->func); + + if (err < 0) + bt_dev_err(bdev->hdev, "Cannot return ownership to device"); + + return err; +} + +static int btmtksdio_drv_pmctrl(struct btmtksdio_dev *bdev) +{ + u32 status; + int err; + + sdio_claim_host(bdev->func); + + /* Get ownership from the device */ + sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err); + if (err < 0) + goto out; + + err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, + status & C_COM_DRV_OWN, 2000, 1000000); + + if (!err && bdev->data->lp_mbox_supported && + test_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state)) + err = readx_poll_timeout(btmtksdio_drv_own_query_79xx, bdev, + status, status & PD2HRM0R_DRV_OWN, + 2000, 1000000); + +out: + sdio_release_host(bdev->func); + + if (err < 0) + bt_dev_err(bdev->hdev, "Cannot get ownership from device"); + + return err; +} + static int btmtksdio_recv_event(struct hci_dev *hdev, struct sk_buff *skb) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); @@ -480,6 +566,13 @@ static void btmtksdio_txrx_work(struct work_struct *work) * FIFO. */ sdio_writel(bdev->func, int_status, MTK_REG_CHISR, NULL); + int_status &= INT_MASK; + + if ((int_status & FW_MAILBOX_INT) && + bdev->data->chipid == 0x7921) { + sdio_writel(bdev->func, PH2DSM0R_DRIVER_OWN, + MTK_REG_PH2DSM0R, 0); + } if (int_status & FW_OWN_BACK_INT) bt_dev_dbg(bdev->hdev, "Get fw own back"); @@ -531,7 +624,7 @@ static void btmtksdio_interrupt(struct sdio_func *func) static int btmtksdio_open(struct hci_dev *hdev) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); - u32 status, val; + u32 val; int err; sdio_claim_host(bdev->func); @@ -542,18 +635,10 @@ static int btmtksdio_open(struct hci_dev *hdev) set_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state); - /* Get ownership from the device */ - sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err); + err = btmtksdio_drv_pmctrl(bdev); if (err < 0) goto err_disable_func; - err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, - status & C_COM_DRV_OWN, 2000, 1000000); - if (err < 0) { - bt_dev_err(bdev->hdev, "Cannot get ownership from device"); - goto err_disable_func; - } - /* Disable interrupt & mask out all interrupt sources */ sdio_writel(bdev->func, C_INT_EN_CLR, MTK_REG_CHLPCR, &err); if (err < 0) @@ -623,8 +708,6 @@ err_release_host: static int btmtksdio_close(struct hci_dev *hdev) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); - u32 status; - int err; sdio_claim_host(bdev->func); @@ -635,13 +718,7 @@ static int btmtksdio_close(struct hci_dev *hdev) cancel_work_sync(&bdev->txrx_work); - /* Return ownership to the device */ - sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, NULL); - - err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, - !(status & C_COM_DRV_OWN), 2000, 1000000); - if (err < 0) - bt_dev_err(bdev->hdev, "Cannot return ownership to device"); + btmtksdio_fw_pmctrl(bdev); clear_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state); sdio_disable_func(bdev->func); @@ -686,6 +763,7 @@ static int btmtksdio_func_query(struct hci_dev *hdev) static int mt76xx_setup(struct hci_dev *hdev, const char *fwname) { + struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); struct btmtk_hci_wmt_params wmt_params; struct btmtk_tci_sleep tci_sleep; struct sk_buff *skb; @@ -746,6 +824,8 @@ ignore_setup_fw: return err; } + set_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state); + ignore_func_on: /* Apply the low power environment setup */ tci_sleep.mode = 0x5; @@ -768,6 +848,7 @@ ignore_func_on: static int mt79xx_setup(struct hci_dev *hdev, const char *fwname) { + struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); struct btmtk_hci_wmt_params wmt_params; u8 param = 0x1; int err; @@ -793,19 +874,15 @@ static int mt79xx_setup(struct hci_dev *hdev, const char *fwname) hci_set_msft_opcode(hdev, 0xFD30); hci_set_aosp_capable(hdev); + set_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state); return err; } -static int btsdio_mtk_reg_read(struct hci_dev *hdev, u32 reg, u32 *val) +static int btmtksdio_mtk_reg_read(struct hci_dev *hdev, u32 reg, u32 *val) { struct btmtk_hci_wmt_params wmt_params; - struct reg_read_cmd { - u8 type; - u8 rsv; - u8 num; - __le32 addr; - } __packed reg_read = { + struct reg_read_cmd reg_read = { .type = 1, .num = 1, }; @@ -821,7 +898,7 @@ static int btsdio_mtk_reg_read(struct hci_dev *hdev, u32 reg, u32 *val) err = mtk_hci_wmt_sync(hdev, &wmt_params); if (err < 0) { - bt_dev_err(hdev, "Failed to read reg(%d)", err); + bt_dev_err(hdev, "Failed to read reg (%d)", err); return err; } @@ -830,6 +907,66 @@ static int btsdio_mtk_reg_read(struct hci_dev *hdev, u32 reg, u32 *val) return err; } +static int btmtksdio_mtk_reg_write(struct hci_dev *hdev, u32 reg, u32 val, u32 mask) +{ + struct btmtk_hci_wmt_params wmt_params; + const struct reg_write_cmd reg_write = { + .type = 1, + .num = 1, + .addr = cpu_to_le32(reg), + .data = cpu_to_le32(val), + .mask = cpu_to_le32(mask), + }; + int err, status; + + wmt_params.op = BTMTK_WMT_REGISTER; + wmt_params.flag = BTMTK_WMT_REG_WRITE; + wmt_params.dlen = sizeof(reg_write); + wmt_params.data = ®_write; + wmt_params.status = &status; + + err = mtk_hci_wmt_sync(hdev, &wmt_params); + if (err < 0) + bt_dev_err(hdev, "Failed to write reg (%d)", err); + + return err; +} + +static int btmtksdio_sco_setting(struct hci_dev *hdev) +{ + const struct btmtk_sco sco_setting = { + .clock_config = 0x49, + .channel_format_config = 0x80, + }; + struct sk_buff *skb; + u32 val; + int err; + + /* Enable SCO over I2S/PCM for MediaTek chipset */ + skb = __hci_cmd_sync(hdev, 0xfc72, sizeof(sco_setting), + &sco_setting, HCI_CMD_TIMEOUT); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + kfree_skb(skb); + + err = btmtksdio_mtk_reg_read(hdev, MT7921_PINMUX_0, &val); + if (err < 0) + return err; + + val |= 0x11000000; + err = btmtksdio_mtk_reg_write(hdev, MT7921_PINMUX_0, val, ~0); + if (err < 0) + return err; + + err = btmtksdio_mtk_reg_read(hdev, MT7921_PINMUX_1, &val); + if (err < 0) + return err; + + val |= 0x00000101; + return btmtksdio_mtk_reg_write(hdev, MT7921_PINMUX_1, val, ~0); +} + static int btmtksdio_setup(struct hci_dev *hdev) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); @@ -844,13 +981,13 @@ static int btmtksdio_setup(struct hci_dev *hdev) switch (bdev->data->chipid) { case 0x7921: - err = btsdio_mtk_reg_read(hdev, 0x70010200, &dev_id); + err = btmtksdio_mtk_reg_read(hdev, 0x70010200, &dev_id); if (err < 0) { bt_dev_err(hdev, "Failed to get device id (%d)", err); return err; } - err = btsdio_mtk_reg_read(hdev, 0x80021004, &fw_version); + err = btmtksdio_mtk_reg_read(hdev, 0x80021004, &fw_version); if (err < 0) { bt_dev_err(hdev, "Failed to get fw version (%d)", err); return err; @@ -862,6 +999,22 @@ static int btmtksdio_setup(struct hci_dev *hdev) err = mt79xx_setup(hdev, fwname); if (err < 0) return err; + + err = btmtksdio_fw_pmctrl(bdev); + if (err < 0) + return err; + + err = btmtksdio_drv_pmctrl(bdev); + if (err < 0) + return err; + + /* Enable SCO over I2S/PCM */ + err = btmtksdio_sco_setting(hdev); + if (err < 0) { + bt_dev_err(hdev, "Failed to enable SCO setting (%d)", err); + return err; + } + break; case 0x7663: case 0x7668: @@ -958,6 +1111,32 @@ static int btmtksdio_send_frame(struct hci_dev *hdev, struct sk_buff *skb) return 0; } +static bool btmtksdio_sdio_wakeup(struct hci_dev *hdev) +{ + struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); + bool may_wakeup = device_may_wakeup(bdev->dev); + const struct btmtk_wakeon bt_awake = { + .mode = 0x1, + .gpo = 0, + .active_high = 0x1, + .enable_delay = cpu_to_le16(0xc80), + .wakeup_delay = cpu_to_le16(0x20), + }; + + if (may_wakeup && bdev->data->chipid == 0x7921) { + struct sk_buff *skb; + + skb = __hci_cmd_sync(hdev, 0xfc27, sizeof(bt_awake), + &bt_awake, HCI_CMD_TIMEOUT); + if (IS_ERR(skb)) + may_wakeup = false; + + kfree_skb(skb); + } + + return may_wakeup; +} + static int btmtksdio_probe(struct sdio_func *func, const struct sdio_device_id *id) { @@ -997,6 +1176,7 @@ static int btmtksdio_probe(struct sdio_func *func, hdev->setup = btmtksdio_setup; hdev->shutdown = btmtksdio_shutdown; hdev->send = btmtksdio_send_frame; + hdev->wakeup = btmtksdio_sdio_wakeup; hdev->set_bdaddr = btmtk_set_bdaddr; SET_HCIDEV_DEV(hdev, &func->dev); @@ -1032,7 +1212,11 @@ static int btmtksdio_probe(struct sdio_func *func, */ pm_runtime_put_noidle(bdev->dev); - return 0; + err = device_init_wakeup(bdev->dev, true); + if (err) + bt_dev_err(hdev, "failed to initialize device wakeup"); + + return err; } static void btmtksdio_remove(struct sdio_func *func) @@ -1058,7 +1242,6 @@ static int btmtksdio_runtime_suspend(struct device *dev) { struct sdio_func *func = dev_to_sdio_func(dev); struct btmtksdio_dev *bdev; - u32 status; int err; bdev = sdio_get_drvdata(func); @@ -1070,18 +1253,9 @@ static int btmtksdio_runtime_suspend(struct device *dev) sdio_set_host_pm_flags(func, MMC_PM_KEEP_POWER); - sdio_claim_host(bdev->func); + err = btmtksdio_fw_pmctrl(bdev); - sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err); - if (err < 0) - goto out; - - err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, - !(status & C_COM_DRV_OWN), 2000, 1000000); -out: - bt_dev_info(bdev->hdev, "status (%d) return ownership to device", err); - - sdio_release_host(bdev->func); + bt_dev_dbg(bdev->hdev, "status (%d) return ownership to device", err); return err; } @@ -1090,7 +1264,6 @@ static int btmtksdio_runtime_resume(struct device *dev) { struct sdio_func *func = dev_to_sdio_func(dev); struct btmtksdio_dev *bdev; - u32 status; int err; bdev = sdio_get_drvdata(func); @@ -1100,18 +1273,9 @@ static int btmtksdio_runtime_resume(struct device *dev) if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) return 0; - sdio_claim_host(bdev->func); + err = btmtksdio_drv_pmctrl(bdev); - sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err); - if (err < 0) - goto out; - - err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, - status & C_COM_DRV_OWN, 2000, 1000000); -out: - bt_dev_info(bdev->hdev, "status (%d) get ownership from device", err); - - sdio_release_host(bdev->func); + bt_dev_dbg(bdev->hdev, "status (%d) get ownership from device", err); return err; } diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index c2bdd1e6060e..c2030f7e25b4 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -149,6 +149,14 @@ static const struct id_table ic_id_table[] = { .cfg_name = "rtl_bt/rtl8761bu_config" }, /* 8822C with UART interface */ + { IC_INFO(RTL_ROM_LMP_8822B, 0xc, 0x8, HCI_UART), + .config_needed = true, + .has_rom_version = true, + .has_msft_ext = true, + .fw_name = "rtl_bt/rtl8822cs_fw.bin", + .cfg_name = "rtl_bt/rtl8822cs_config" }, + + /* 8822C with UART interface */ { IC_INFO(RTL_ROM_LMP_8822B, 0xc, 0xa, HCI_UART), .config_needed = true, .has_rom_version = true, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c30d131da784..aefa0ee293f3 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -62,6 +62,7 @@ static struct usb_driver btusb_driver; #define BTUSB_QCA_WCN6855 0x1000000 #define BTUSB_INTEL_BROKEN_SHUTDOWN_LED 0x2000000 #define BTUSB_INTEL_BROKEN_INITIAL_NCMD 0x4000000 +#define BTUSB_INTEL_NO_WBS_SUPPORT 0x8000000 static const struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -385,9 +386,11 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_COMBINED }, { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR }, { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL_COMBINED | + BTUSB_INTEL_NO_WBS_SUPPORT | BTUSB_INTEL_BROKEN_INITIAL_NCMD | BTUSB_INTEL_BROKEN_SHUTDOWN_LED }, { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL_COMBINED | + BTUSB_INTEL_NO_WBS_SUPPORT | BTUSB_INTEL_BROKEN_SHUTDOWN_LED }, { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_COMBINED }, { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL_COMBINED | @@ -405,6 +408,8 @@ static const struct usb_device_id blacklist_table[] = { BTUSB_WIDEBAND_SPEECH }, /* Realtek 8852AE Bluetooth devices */ + { USB_DEVICE(0x0bda, 0x2852), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0xc852), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0x385a), .driver_info = BTUSB_REALTEK | @@ -2057,10 +2062,10 @@ static int btusb_setup_csr(struct hci_dev *hdev) * These controllers are really messed-up. * * 1. Their bulk RX endpoint will never report any data unless - * the device was suspended at least once (yes, really). + * the device was suspended at least once (yes, really). * 2. They will not wakeup when autosuspended and receiving data - * on their bulk RX endpoint from e.g. a keyboard or mouse - * (IOW remote-wakeup support is broken for the bulk endpoint). + * on their bulk RX endpoint from e.g. a keyboard or mouse + * (IOW remote-wakeup support is broken for the bulk endpoint). * * To fix 1. enable runtime-suspend, force-suspend the * HCI and then wake-it up by disabling runtime-suspend. @@ -3737,6 +3742,9 @@ static int btusb_probe(struct usb_interface *intf, hdev->send = btusb_send_frame_intel; hdev->cmd_timeout = btusb_intel_cmd_timeout; + if (id->driver_info & BTUSB_INTEL_NO_WBS_SUPPORT) + btintel_set_flag(hdev, INTEL_ROM_LEGACY_NO_WBS_SUPPORT); + if (id->driver_info & BTUSB_INTEL_BROKEN_INITIAL_NCMD) btintel_set_flag(hdev, INTEL_BROKEN_INITIAL_NCMD); diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index 34286ffe0568..fdf504b0d265 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -966,6 +966,11 @@ static void h5_btrtl_open(struct h5 *h5) pm_runtime_enable(&h5->hu->serdev->dev); } + /* The controller needs reset to startup */ + gpiod_set_value_cansleep(h5->enable_gpio, 0); + gpiod_set_value_cansleep(h5->device_wake_gpio, 0); + msleep(100); + /* The controller needs up to 500ms to wakeup */ gpiod_set_value_cansleep(h5->enable_gpio, 1); gpiod_set_value_cansleep(h5->device_wake_gpio, 1); diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index eb1e736efeeb..4eb420a9ed04 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -509,7 +509,7 @@ static int send_command_from_firmware(struct ll_device *lldev, return 0; } -/** +/* * download_firmware - * internal function which parses through the .bts firmware * script file intreprets SEND, DELAY actions only as of now diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 3b00d82d36cf..4cda890ce647 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -305,6 +305,8 @@ int hci_uart_register_device(struct hci_uart *hu, if (err) return err; + percpu_init_rwsem(&hu->proto_lock); + err = p->open(hu); if (err) goto err_open; @@ -327,7 +329,6 @@ int hci_uart_register_device(struct hci_uart *hu, INIT_WORK(&hu->init_ready, hci_uart_init_work); INIT_WORK(&hu->write_work, hci_uart_write_work); - percpu_init_rwsem(&hu->proto_lock); /* Only when vendor specific setup callback is provided, consider * the manufacturer information valid. This avoids filling in the diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 533e476988f2..c98a4b0a8453 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1269,6 +1269,27 @@ unwind: return res; } +/* determine if the packet is NA or NS */ +static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond) +{ + struct ipv6hdr *ip6hdr; + struct icmp6hdr *hdr; + + if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) + return true; + + ip6hdr = ipv6_hdr(skb); + if (ip6hdr->nexthdr != IPPROTO_ICMPV6) + return false; + + if (!pskb_network_may_pull(skb, sizeof(*ip6hdr) + sizeof(*hdr))) + return true; + + hdr = icmp6_hdr(skb); + return hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT || + hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION; +} + /************************ exported alb functions ************************/ int bond_alb_initialize(struct bonding *bond, int rlb_enabled) @@ -1348,8 +1369,11 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, /* Do not TX balance any multicast or broadcast */ if (!is_multicast_ether_addr(eth_data->h_dest)) { switch (skb->protocol) { - case htons(ETH_P_IP): case htons(ETH_P_IPV6): + if (alb_determine_nd(skb, bond)) + break; + fallthrough; + case htons(ETH_P_IP): hash_index = bond_xmit_hash(bond, skb); if (bond->params.tlb_dynamic_lb) { tx_slave = tlb_choose_channel(bond, @@ -1432,10 +1456,12 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond, break; } - if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) { + if (alb_determine_nd(skb, bond)) { do_tx_balance = false; break; } + + /* The IPv6 header is pulled by alb_determine_nd */ /* Additionally, DAD probes should not be tx-balanced as that * will lead to false positives for duplicate addresses and * prevent address configuration from working. diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index c0c91440340a..8d51c1019dcd 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -68,17 +68,7 @@ config NET_DSA_QCA8K This enables support for the Qualcomm Atheros QCA8K Ethernet switch chips. -config NET_DSA_REALTEK_SMI - tristate "Realtek SMI Ethernet switch family support" - select NET_DSA_TAG_RTL4_A - select NET_DSA_TAG_RTL8_4 - select FIXED_PHY - select IRQ_DOMAIN - select REALTEK_PHY - select REGMAP - help - This enables support for the Realtek SMI-based switch - chips, currently only RTL8366RB. +source "drivers/net/dsa/realtek/Kconfig" config NET_DSA_SMSC_LAN9303 tristate diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index 8da1569a34e6..e73838c12256 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -9,8 +9,6 @@ obj-$(CONFIG_NET_DSA_LANTIQ_GSWIP) += lantiq_gswip.o obj-$(CONFIG_NET_DSA_MT7530) += mt7530.o obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o -obj-$(CONFIG_NET_DSA_REALTEK_SMI) += realtek-smi.o -realtek-smi-objs := realtek-smi-core.o rtl8366.o rtl8366rb.o rtl8365mb.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303) += lan9303-core.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303_I2C) += lan9303_i2c.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303_MDIO) += lan9303_mdio.o @@ -23,5 +21,6 @@ obj-y += microchip/ obj-y += mv88e6xxx/ obj-y += ocelot/ obj-y += qca/ +obj-y += realtek/ obj-y += sja1105/ obj-y += xrs700x/ diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 3867f3d4545f..a3b98992f180 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2186,7 +2186,7 @@ int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy) { int ret; - ret = phy_init_eee(phy, 0); + ret = phy_init_eee(phy, false); if (ret) return 0; diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 33499fcd8848..9161ce4ca352 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -709,49 +709,25 @@ static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port) PHY_BRCM_IDDQ_SUSPEND; } -static void bcm_sf2_sw_validate(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state) +static void bcm_sf2_sw_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) { + unsigned long *interfaces = config->supported_interfaces; struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; - - if (!phy_interface_mode_is_rgmii(state->interface) && - state->interface != PHY_INTERFACE_MODE_MII && - state->interface != PHY_INTERFACE_MODE_REVMII && - state->interface != PHY_INTERFACE_MODE_GMII && - state->interface != PHY_INTERFACE_MODE_INTERNAL && - state->interface != PHY_INTERFACE_MODE_MOCA) { - linkmode_zero(supported); - if (port != core_readl(priv, CORE_IMP0_PRT_ID)) - dev_err(ds->dev, - "Unsupported interface: %d for port %d\n", - state->interface, port); - return; - } - - /* Allow all the expected bits */ - phylink_set(mask, Autoneg); - phylink_set_port_modes(mask); - phylink_set(mask, Pause); - phylink_set(mask, Asym_Pause); - /* With the exclusion of MII and Reverse MII, we support Gigabit, - * including Half duplex - */ - if (state->interface != PHY_INTERFACE_MODE_MII && - state->interface != PHY_INTERFACE_MODE_REVMII) { - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseT_Half); + if (priv->int_phy_mask & BIT(port)) { + __set_bit(PHY_INTERFACE_MODE_INTERNAL, interfaces); + } else if (priv->moca_port == port) { + __set_bit(PHY_INTERFACE_MODE_MOCA, interfaces); + } else { + __set_bit(PHY_INTERFACE_MODE_MII, interfaces); + __set_bit(PHY_INTERFACE_MODE_REVMII, interfaces); + __set_bit(PHY_INTERFACE_MODE_GMII, interfaces); + phy_interface_set_rgmii(interfaces); } - phylink_set(mask, 10baseT_Half); - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Half); - phylink_set(mask, 100baseT_Full); - - linkmode_and(supported, supported, mask); - linkmode_and(state->advertising, state->advertising, mask); + config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100 | MAC_1000; } static void bcm_sf2_sw_mac_config(struct dsa_switch *ds, int port, @@ -1218,7 +1194,7 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .get_sset_count = bcm_sf2_sw_get_sset_count, .get_ethtool_phy_stats = b53_get_ethtool_phy_stats, .get_phy_flags = bcm_sf2_sw_get_phy_flags, - .phylink_validate = bcm_sf2_sw_validate, + .phylink_get_caps = bcm_sf2_sw_get_caps, .phylink_mac_config = bcm_sf2_sw_mac_config, .phylink_mac_link_down = bcm_sf2_sw_mac_link_down, .phylink_mac_link_up = bcm_sf2_sw_mac_link_up, diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 991b9c6b6ce7..5dc9899bc0a6 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1461,27 +1461,22 @@ static int ksz8_setup(struct dsa_switch *ds) return 0; } -static void ksz8_validate(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state) +static void ksz8_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; struct ksz_device *dev = ds->priv; if (port == dev->cpu_port) { - if (state->interface != PHY_INTERFACE_MODE_RMII && - state->interface != PHY_INTERFACE_MODE_MII && - state->interface != PHY_INTERFACE_MODE_NA) - goto unsupported; + __set_bit(PHY_INTERFACE_MODE_RMII, + config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_MII, + config->supported_interfaces); } else { - if (state->interface != PHY_INTERFACE_MODE_INTERNAL && - state->interface != PHY_INTERFACE_MODE_NA) - goto unsupported; + __set_bit(PHY_INTERFACE_MODE_INTERNAL, + config->supported_interfaces); } - /* Allow all the expected bits */ - phylink_set_port_modes(mask); - phylink_set(mask, Autoneg); + config->mac_capabilities = MAC_10 | MAC_100; /* Silicon Errata Sheet (DS80000830A): * "Port 1 does not respond to received flow control PAUSE frames" @@ -1489,27 +1484,11 @@ static void ksz8_validate(struct dsa_switch *ds, int port, * switches. */ if (!ksz_is_ksz88x3(dev) || port) - phylink_set(mask, Pause); + config->mac_capabilities |= MAC_SYM_PAUSE; /* Asym pause is not supported on KSZ8863 and KSZ8873 */ if (!ksz_is_ksz88x3(dev)) - phylink_set(mask, Asym_Pause); - - /* 10M and 100M are only supported */ - phylink_set(mask, 10baseT_Half); - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Half); - phylink_set(mask, 100baseT_Full); - - linkmode_and(supported, supported, mask); - linkmode_and(state->advertising, state->advertising, mask); - - return; - -unsupported: - linkmode_zero(supported); - dev_err(ds->dev, "Unsupported interface: %s, port: %d\n", - phy_modes(state->interface), port); + config->mac_capabilities |= MAC_ASYM_PAUSE; } static const struct dsa_switch_ops ksz8_switch_ops = { @@ -1518,7 +1497,7 @@ static const struct dsa_switch_ops ksz8_switch_ops = { .setup = ksz8_setup, .phy_read = ksz_phy_read16, .phy_write = ksz_phy_write16, - .phylink_validate = ksz8_validate, + .phylink_get_caps = ksz8_get_caps, .phylink_mac_link_down = ksz_mac_link_down, .port_enable = ksz_enable_port, .get_strings = ksz8_get_strings, diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 353b5f981740..a85d990896b0 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -222,9 +222,12 @@ static int ksz9477_reset_switch(struct ksz_device *dev) (BROADCAST_STORM_VALUE * BROADCAST_STORM_PROT_RATE) / 100); - if (dev->synclko_125) - ksz_write8(dev, REG_SW_GLOBAL_OUTPUT_CTRL__1, - SW_ENABLE_REFCLKO | SW_REFCLKO_IS_125MHZ); + data8 = SW_ENABLE_REFCLKO; + if (dev->synclko_disable) + data8 = 0; + else if (dev->synclko_125) + data8 = SW_ENABLE_REFCLKO | SW_REFCLKO_IS_125MHZ; + ksz_write8(dev, REG_SW_GLOBAL_OUTPUT_CTRL__1, data8); return 0; } diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 55dbda04ea62..7e33ec73f803 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -434,6 +434,12 @@ int ksz_switch_register(struct ksz_device *dev, } dev->synclko_125 = of_property_read_bool(dev->dev->of_node, "microchip,synclko-125"); + dev->synclko_disable = of_property_read_bool(dev->dev->of_node, + "microchip,synclko-disable"); + if (dev->synclko_125 && dev->synclko_disable) { + dev_err(dev->dev, "inconsistent synclko settings\n"); + return -EINVAL; + } } ret = dsa_register_switch(dev->ds); diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index df8ae59c8525..3db63f62f0a1 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -75,6 +75,7 @@ struct ksz_device { u32 regs_size; bool phy_errata_9477; bool synclko_125; + bool synclko_disable; struct vlan_table *vlan_cache; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b82512e5b33b..bc77a26c825a 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2846,7 +2846,7 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port, mcr |= PMCR_RX_FC_EN; } - if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, 0) >= 0) { + if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, false) >= 0) { switch (speed) { case SPEED_1000: mcr |= PMCR_FORCE_EEE1G; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 58ca684d73f7..7670796c2aa1 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -86,12 +86,16 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val) int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask, u16 val) { + const unsigned long timeout = jiffies + msecs_to_jiffies(50); u16 data; int err; int i; - /* There's no bus specific operation to wait for a mask */ - for (i = 0; i < 16; i++) { + /* There's no bus specific operation to wait for a mask. Even + * if the initial poll takes longer than 50ms, always do at + * least one more attempt. + */ + for (i = 0; time_before(jiffies, timeout) || (i < 2); i++) { err = mv88e6xxx_read(chip, addr, reg, &data); if (err) return err; @@ -99,7 +103,10 @@ int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg, if ((data & mask) == val) return 0; - usleep_range(1000, 2000); + if (i < 2) + cpu_relax(); + else + usleep_range(1000, 2000); } dev_err(chip->dev, "Timeout while waiting for switch\n"); @@ -563,133 +570,249 @@ static int mv88e6xxx_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port, return 0; } -static void mv88e6065_phylink_validate(struct mv88e6xxx_chip *chip, int port, - unsigned long *mask, - struct phylink_link_state *state) +static const u8 mv88e6185_phy_interface_modes[] = { + [MV88E6185_PORT_STS_CMODE_GMII_FD] = PHY_INTERFACE_MODE_GMII, + [MV88E6185_PORT_STS_CMODE_MII_100_FD_PS] = PHY_INTERFACE_MODE_MII, + [MV88E6185_PORT_STS_CMODE_MII_100] = PHY_INTERFACE_MODE_MII, + [MV88E6185_PORT_STS_CMODE_MII_10] = PHY_INTERFACE_MODE_MII, + [MV88E6185_PORT_STS_CMODE_SERDES] = PHY_INTERFACE_MODE_1000BASEX, + [MV88E6185_PORT_STS_CMODE_1000BASE_X] = PHY_INTERFACE_MODE_1000BASEX, + [MV88E6185_PORT_STS_CMODE_PHY] = PHY_INTERFACE_MODE_SGMII, +}; + +static void mv88e6185_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config) { - if (!phy_interface_mode_is_8023z(state->interface)) { - /* 10M and 100M are only supported in non-802.3z mode */ - phylink_set(mask, 10baseT_Half); - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Half); - phylink_set(mask, 100baseT_Full); - } + u8 cmode = chip->ports[port].cmode; + + if (cmode <= ARRAY_SIZE(mv88e6185_phy_interface_modes) && + mv88e6185_phy_interface_modes[cmode]) + __set_bit(mv88e6185_phy_interface_modes[cmode], + config->supported_interfaces); + + config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | + MAC_1000FD; } -static void mv88e6185_phylink_validate(struct mv88e6xxx_chip *chip, int port, - unsigned long *mask, - struct phylink_link_state *state) -{ - /* FIXME: if the port is in 1000Base-X mode, then it only supports - * 1000M FD speeds. In this case, CMODE will indicate 5. +static const u8 mv88e6xxx_phy_interface_modes[] = { + [MV88E6XXX_PORT_STS_CMODE_MII_PHY] = PHY_INTERFACE_MODE_MII, + [MV88E6XXX_PORT_STS_CMODE_MII] = PHY_INTERFACE_MODE_MII, + [MV88E6XXX_PORT_STS_CMODE_GMII] = PHY_INTERFACE_MODE_GMII, + [MV88E6XXX_PORT_STS_CMODE_RMII_PHY] = PHY_INTERFACE_MODE_RMII, + [MV88E6XXX_PORT_STS_CMODE_RMII] = PHY_INTERFACE_MODE_RMII, + [MV88E6XXX_PORT_STS_CMODE_100BASEX] = PHY_INTERFACE_MODE_100BASEX, + [MV88E6XXX_PORT_STS_CMODE_1000BASEX] = PHY_INTERFACE_MODE_1000BASEX, + [MV88E6XXX_PORT_STS_CMODE_SGMII] = PHY_INTERFACE_MODE_SGMII, + /* higher interface modes are not needed here, since ports supporting + * them are writable, and so the supported interfaces are filled in the + * corresponding .phylink_set_interfaces() implementation below */ - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseX_Full); +}; - mv88e6065_phylink_validate(chip, port, mask, state); +static void mv88e6xxx_translate_cmode(u8 cmode, unsigned long *supported) +{ + if (cmode < ARRAY_SIZE(mv88e6xxx_phy_interface_modes) && + mv88e6xxx_phy_interface_modes[cmode]) + __set_bit(mv88e6xxx_phy_interface_modes[cmode], supported); + else if (cmode == MV88E6XXX_PORT_STS_CMODE_RGMII) + phy_interface_set_rgmii(supported); } -static void mv88e6341_phylink_validate(struct mv88e6xxx_chip *chip, int port, - unsigned long *mask, - struct phylink_link_state *state) +static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config) { - if (port >= 5) - phylink_set(mask, 2500baseX_Full); + unsigned long *supported = config->supported_interfaces; - /* No ethtool bits for 200Mbps */ - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseX_Full); + /* Translate the default cmode */ + mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported); - mv88e6065_phylink_validate(chip, port, mask, state); + config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100; } -static void mv88e6352_phylink_validate(struct mv88e6xxx_chip *chip, int port, - unsigned long *mask, - struct phylink_link_state *state) +static int mv88e6352_get_port4_serdes_cmode(struct mv88e6xxx_chip *chip) { - /* No ethtool bits for 200Mbps */ - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseX_Full); + u16 reg, val; + int err; + + err = mv88e6xxx_port_read(chip, 4, MV88E6XXX_PORT_STS, ®); + if (err) + return err; + + /* If PHY_DETECT is zero, then we are not in auto-media mode */ + if (!(reg & MV88E6XXX_PORT_STS_PHY_DETECT)) + return 0xf; + + val = reg & ~MV88E6XXX_PORT_STS_PHY_DETECT; + err = mv88e6xxx_port_write(chip, 4, MV88E6XXX_PORT_STS, val); + if (err) + return err; + + err = mv88e6xxx_port_read(chip, 4, MV88E6XXX_PORT_STS, &val); + if (err) + return err; + + /* Restore PHY_DETECT value */ + err = mv88e6xxx_port_write(chip, 4, MV88E6XXX_PORT_STS, reg); + if (err) + return err; - mv88e6065_phylink_validate(chip, port, mask, state); + return val & MV88E6XXX_PORT_STS_CMODE_MASK; } -static void mv88e6390_phylink_validate(struct mv88e6xxx_chip *chip, int port, - unsigned long *mask, - struct phylink_link_state *state) +static void mv88e6352_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config) { - if (port >= 9) { - phylink_set(mask, 2500baseX_Full); - phylink_set(mask, 2500baseT_Full); + unsigned long *supported = config->supported_interfaces; + int err, cmode; + + /* Translate the default cmode */ + mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported); + + config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | + MAC_1000FD; + + /* Port 4 supports automedia if the serdes is associated with it. */ + if (port == 4) { + mv88e6xxx_reg_lock(chip); + err = mv88e6352_g2_scratch_port_has_serdes(chip, port); + if (err < 0) + dev_err(chip->dev, "p%d: failed to read scratch\n", + port); + if (err <= 0) + goto unlock; + + cmode = mv88e6352_get_port4_serdes_cmode(chip); + if (cmode < 0) + dev_err(chip->dev, "p%d: failed to read serdes cmode\n", + port); + else + mv88e6xxx_translate_cmode(cmode, supported); +unlock: + mv88e6xxx_reg_unlock(chip); } +} + +static void mv88e6341_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config) +{ + unsigned long *supported = config->supported_interfaces; + + /* Translate the default cmode */ + mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported); /* No ethtool bits for 200Mbps */ - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseX_Full); + config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | + MAC_1000FD; + + /* The C_Mode field is programmable on port 5 */ + if (port == 5) { + __set_bit(PHY_INTERFACE_MODE_SGMII, supported); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, supported); + __set_bit(PHY_INTERFACE_MODE_2500BASEX, supported); - mv88e6065_phylink_validate(chip, port, mask, state); + config->mac_capabilities |= MAC_2500FD; + } } -static void mv88e6390x_phylink_validate(struct mv88e6xxx_chip *chip, int port, - unsigned long *mask, - struct phylink_link_state *state) +static void mv88e6390_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config) { - if (port >= 9) { - phylink_set(mask, 10000baseT_Full); - phylink_set(mask, 10000baseKR_Full); + unsigned long *supported = config->supported_interfaces; + + /* Translate the default cmode */ + mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported); + + /* No ethtool bits for 200Mbps */ + config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | + MAC_1000FD; + + /* The C_Mode field is programmable on ports 9 and 10 */ + if (port == 9 || port == 10) { + __set_bit(PHY_INTERFACE_MODE_SGMII, supported); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, supported); + __set_bit(PHY_INTERFACE_MODE_2500BASEX, supported); + + config->mac_capabilities |= MAC_2500FD; } +} + +static void mv88e6390x_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config) +{ + unsigned long *supported = config->supported_interfaces; - mv88e6390_phylink_validate(chip, port, mask, state); + mv88e6390_phylink_get_caps(chip, port, config); + + /* For the 6x90X, ports 2-7 can be in automedia mode. + * (Note that 6x90 doesn't support RXAUI nor XAUI). + * + * Port 2 can also support 1000BASE-X in automedia mode if port 9 is + * configured for 1000BASE-X, SGMII or 2500BASE-X. + * Port 3-4 can also support 1000BASE-X in automedia mode if port 9 is + * configured for RXAUI, 1000BASE-X, SGMII or 2500BASE-X. + * + * Port 5 can also support 1000BASE-X in automedia mode if port 10 is + * configured for 1000BASE-X, SGMII or 2500BASE-X. + * Port 6-7 can also support 1000BASE-X in automedia mode if port 10 is + * configured for RXAUI, 1000BASE-X, SGMII or 2500BASE-X. + * + * For now, be permissive (as the old code was) and allow 1000BASE-X + * on ports 2..7. + */ + if (port >= 2 && port <= 7) + __set_bit(PHY_INTERFACE_MODE_1000BASEX, supported); + + /* The C_Mode field can also be programmed for 10G speeds */ + if (port == 9 || port == 10) { + __set_bit(PHY_INTERFACE_MODE_XAUI, supported); + __set_bit(PHY_INTERFACE_MODE_RXAUI, supported); + + config->mac_capabilities |= MAC_10000FD; + } } -static void mv88e6393x_phylink_validate(struct mv88e6xxx_chip *chip, int port, - unsigned long *mask, - struct phylink_link_state *state) +static void mv88e6393x_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config) { + unsigned long *supported = config->supported_interfaces; bool is_6191x = chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6191X; - if (((port == 0 || port == 9) && !is_6191x) || port == 10) { - phylink_set(mask, 10000baseT_Full); - phylink_set(mask, 10000baseKR_Full); - phylink_set(mask, 10000baseCR_Full); - phylink_set(mask, 10000baseSR_Full); - phylink_set(mask, 10000baseLR_Full); - phylink_set(mask, 10000baseLRM_Full); - phylink_set(mask, 10000baseER_Full); - phylink_set(mask, 5000baseT_Full); - phylink_set(mask, 2500baseX_Full); - phylink_set(mask, 2500baseT_Full); - } + mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported); + + config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | + MAC_1000FD; - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseX_Full); + /* The C_Mode field can be programmed for ports 0, 9 and 10 */ + if (port == 0 || port == 9 || port == 10) { + __set_bit(PHY_INTERFACE_MODE_SGMII, supported); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, supported); - mv88e6065_phylink_validate(chip, port, mask, state); + /* 6191X supports >1G modes only on port 10 */ + if (!is_6191x || port == 10) { + __set_bit(PHY_INTERFACE_MODE_2500BASEX, supported); + __set_bit(PHY_INTERFACE_MODE_5GBASER, supported); + __set_bit(PHY_INTERFACE_MODE_10GBASER, supported); + /* FIXME: USXGMII is not supported yet */ + /* __set_bit(PHY_INTERFACE_MODE_USXGMII, supported); */ + + config->mac_capabilities |= MAC_2500FD | MAC_5000FD | + MAC_10000FD; + } + } } -static void mv88e6xxx_validate(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state) +static void mv88e6xxx_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; struct mv88e6xxx_chip *chip = ds->priv; - /* Allow all the expected bits */ - phylink_set(mask, Autoneg); - phylink_set(mask, Pause); - phylink_set_port_modes(mask); + chip->info->ops->phylink_get_caps(chip, port, config); - if (chip->info->ops->phylink_validate) - chip->info->ops->phylink_validate(chip, port, mask, state); - - linkmode_and(supported, supported, mask); - linkmode_and(state->advertising, state->advertising, mask); - - /* We can only operate at 2500BaseX or 1000BaseX. If requested - * to advertise both, only report advertising at 2500BaseX. - */ - phylink_helper_basex_speed(state); + /* Internal ports need GMII for PHYLIB */ + if (mv88e6xxx_phy_is_internal(ds, port)) + __set_bit(PHY_INTERFACE_MODE_GMII, + config->supported_interfaces); } static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port, @@ -1283,8 +1406,15 @@ static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port) pvlan = 0; - /* Frames from user ports can egress any local DSA links and CPU ports, - * as well as any local member of their bridge group. + /* Frames from standalone user ports can only egress on the + * upstream port. + */ + if (!dsa_port_bridge_dev_get(dp)) + return BIT(dsa_switch_upstream_port(ds)); + + /* Frames from bridged user ports can egress any local DSA + * links and CPU ports, as well as any local member of their + * bridge group. */ dsa_switch_for_each_port(other_dp, ds) if (other_dp->type == DSA_PORT_TYPE_CPU || @@ -1616,21 +1746,11 @@ static int mv88e6xxx_fid_map_vlan(struct mv88e6xxx_chip *chip, int mv88e6xxx_fid_map(struct mv88e6xxx_chip *chip, unsigned long *fid_bitmap) { - int i, err; - u16 fid; - bitmap_zero(fid_bitmap, MV88E6XXX_N_FID); - /* Set every FID bit used by the (un)bridged ports */ - for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { - err = mv88e6xxx_port_get_fid(chip, i, &fid); - if (err) - return err; - - set_bit(fid, fid_bitmap); - } - - /* Set every FID bit used by the VLAN entries */ + /* Every FID has an associated VID, so walking the VTU + * will discover the full set of FIDs in use. + */ return mv88e6xxx_vtu_walk(chip, mv88e6xxx_fid_map_vlan, fid_bitmap); } @@ -1643,10 +1763,7 @@ static int mv88e6xxx_atu_new(struct mv88e6xxx_chip *chip, u16 *fid) if (err) return err; - /* The reset value 0x000 is used to indicate that multiple address - * databases are not needed. Return the next positive available. - */ - *fid = find_next_zero_bit(fid_bitmap, MV88E6XXX_N_FID, 1); + *fid = find_first_zero_bit(fid_bitmap, MV88E6XXX_N_FID); if (unlikely(*fid >= mv88e6xxx_num_databases(chip))) return -ENOSPC; @@ -2138,6 +2255,9 @@ static int mv88e6xxx_port_vlan_join(struct mv88e6xxx_chip *chip, int port, if (!vlan.valid) { memset(&vlan, 0, sizeof(vlan)); + if (vid == MV88E6XXX_VID_STANDALONE) + vlan.policy = true; + err = mv88e6xxx_atu_new(chip, &vlan.fid); if (err) return err; @@ -2480,6 +2600,10 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, if (err) goto unlock; + err = mv88e6xxx_port_set_map_da(chip, port, true); + if (err) + return err; + err = mv88e6xxx_port_commit_pvid(chip, port); if (err) goto unlock; @@ -2514,6 +2638,12 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port, mv88e6xxx_port_vlan_map(chip, port)) dev_err(ds->dev, "failed to remap in-chip Port VLAN\n"); + err = mv88e6xxx_port_set_map_da(chip, port, false); + if (err) + dev_err(ds->dev, + "port %d failed to restore map-DA: %pe\n", + port, ERR_PTR(err)); + err = mv88e6xxx_port_commit_pvid(chip, port); if (err) dev_err(ds->dev, @@ -2911,12 +3041,13 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) return err; /* Port Control 2: don't force a good FCS, set the MTU size to - * 10222 bytes, disable 802.1q tags checking, don't discard tagged or - * untagged frames on this port, do a destination address lookup on all - * received packets as usual, disable ARP mirroring and don't send a - * copy of all transmitted/received frames on this port to the CPU. + * 10222 bytes, disable 802.1q tags checking, don't discard + * tagged or untagged frames on this port, skip destination + * address lookup on user ports, disable ARP mirroring and don't + * send a copy of all transmitted/received frames on this port + * to the CPU. */ - err = mv88e6xxx_port_set_map_da(chip, port); + err = mv88e6xxx_port_set_map_da(chip, port, !dsa_is_user_port(ds, port)); if (err) return err; @@ -2924,8 +3055,44 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if (err) return err; + /* On chips that support it, set all downstream DSA ports' + * VLAN policy to TRAP. In combination with loading + * MV88E6XXX_VID_STANDALONE as a policy entry in the VTU, this + * provides a better isolation barrier between standalone + * ports, as the ATU is bypassed on any intermediate switches + * between the incoming port and the CPU. + */ + if (dsa_is_downstream_port(ds, port) && + chip->info->ops->port_set_policy) { + err = chip->info->ops->port_set_policy(chip, port, + MV88E6XXX_POLICY_MAPPING_VTU, + MV88E6XXX_POLICY_ACTION_TRAP); + if (err) + return err; + } + + /* User ports start out in standalone mode and 802.1Q is + * therefore disabled. On DSA ports, all valid VIDs are always + * loaded in the VTU - therefore, enable 802.1Q in order to take + * advantage of VLAN policy on chips that supports it. + */ err = mv88e6xxx_port_set_8021q_mode(chip, port, - MV88E6XXX_PORT_CTL2_8021Q_MODE_DISABLED); + dsa_is_user_port(ds, port) ? + MV88E6XXX_PORT_CTL2_8021Q_MODE_DISABLED : + MV88E6XXX_PORT_CTL2_8021Q_MODE_SECURE); + if (err) + return err; + + /* Bind MV88E6XXX_VID_STANDALONE to MV88E6XXX_FID_STANDALONE by + * virtue of the fact that mv88e6xxx_atu_new() will pick it as + * the first free FID. This will be used as the private PVID for + * unbridged ports. Shared (DSA and CPU) ports must also be + * members of this VID, in order to trap all frames assigned to + * it to the CPU. + */ + err = mv88e6xxx_port_vlan_join(chip, port, MV88E6XXX_VID_STANDALONE, + MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNMODIFIED, + false); if (err) return err; @@ -2938,7 +3105,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * relying on their port default FID. */ err = mv88e6xxx_port_vlan_join(chip, port, MV88E6XXX_VID_BRIDGED, - MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNTAGGED, + MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNMODIFIED, false); if (err) return err; @@ -3577,7 +3744,7 @@ static const struct mv88e6xxx_ops mv88e6085_ops = { .rmu_disable = mv88e6085_g1_rmu_disable, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, .set_max_frame_size = mv88e6185_g1_set_max_frame_size, }; @@ -3611,7 +3778,7 @@ static const struct mv88e6xxx_ops mv88e6095_ops = { .reset = mv88e6185_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, .set_max_frame_size = mv88e6185_g1_set_max_frame_size, }; @@ -3627,6 +3794,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .port_sync_link = mv88e6185_port_sync_link, .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, + .port_set_policy = mv88e6352_port_set_policy, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_ucast_flood = mv88e6352_port_set_ucast_flood, .port_set_mcast_flood = mv88e6352_port_set_mcast_flood, @@ -3657,7 +3825,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .rmu_disable = mv88e6085_g1_rmu_disable, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, .set_max_frame_size = mv88e6185_g1_set_max_frame_size, }; @@ -3694,7 +3862,7 @@ static const struct mv88e6xxx_ops mv88e6123_ops = { .atu_set_hash = mv88e6165_g1_atu_set_hash, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, .set_max_frame_size = mv88e6185_g1_set_max_frame_size, }; @@ -3735,7 +3903,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = { .reset = mv88e6185_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6141_ops = { @@ -3799,7 +3967,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .serdes_get_stats = mv88e6390_serdes_get_stats, .serdes_get_regs_len = mv88e6390_serdes_get_regs_len, .serdes_get_regs = mv88e6390_serdes_get_regs, - .phylink_validate = mv88e6341_phylink_validate, + .phylink_get_caps = mv88e6341_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6161_ops = { @@ -3841,7 +4009,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .avb_ops = &mv88e6165_avb_ops, .ptp_ops = &mv88e6165_ptp_ops, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, .set_max_frame_size = mv88e6185_g1_set_max_frame_size, }; @@ -3877,7 +4045,7 @@ static const struct mv88e6xxx_ops mv88e6165_ops = { .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .avb_ops = &mv88e6165_avb_ops, .ptp_ops = &mv88e6165_ptp_ops, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6171_ops = { @@ -3919,7 +4087,7 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { .atu_set_hash = mv88e6165_g1_atu_set_hash, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6172_ops = { @@ -3974,7 +4142,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .serdes_get_regs_len = mv88e6352_serdes_get_regs_len, .serdes_get_regs = mv88e6352_serdes_get_regs, .gpio_ops = &mv88e6352_gpio_ops, - .phylink_validate = mv88e6352_phylink_validate, + .phylink_get_caps = mv88e6352_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6175_ops = { @@ -4016,7 +4184,7 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { .atu_set_hash = mv88e6165_g1_atu_set_hash, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6176_ops = { @@ -4074,7 +4242,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .serdes_get_regs_len = mv88e6352_serdes_get_regs_len, .serdes_get_regs = mv88e6352_serdes_get_regs, .gpio_ops = &mv88e6352_gpio_ops, - .phylink_validate = mv88e6352_phylink_validate, + .phylink_get_caps = mv88e6352_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6185_ops = { @@ -4113,7 +4281,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = { .reset = mv88e6185_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, .set_max_frame_size = mv88e6185_g1_set_max_frame_size, }; @@ -4175,7 +4343,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { .serdes_get_regs_len = mv88e6390_serdes_get_regs_len, .serdes_get_regs = mv88e6390_serdes_get_regs, .gpio_ops = &mv88e6352_gpio_ops, - .phylink_validate = mv88e6390_phylink_validate, + .phylink_get_caps = mv88e6390_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6190x_ops = { @@ -4236,7 +4404,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { .serdes_get_regs_len = mv88e6390_serdes_get_regs_len, .serdes_get_regs = mv88e6390_serdes_get_regs, .gpio_ops = &mv88e6352_gpio_ops, - .phylink_validate = mv88e6390x_phylink_validate, + .phylink_get_caps = mv88e6390x_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6191_ops = { @@ -4296,7 +4464,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = { .serdes_get_regs = mv88e6390_serdes_get_regs, .avb_ops = &mv88e6390_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, - .phylink_validate = mv88e6390_phylink_validate, + .phylink_get_caps = mv88e6390_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6240_ops = { @@ -4356,7 +4524,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .gpio_ops = &mv88e6352_gpio_ops, .avb_ops = &mv88e6352_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, - .phylink_validate = mv88e6352_phylink_validate, + .phylink_get_caps = mv88e6352_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6250_ops = { @@ -4396,7 +4564,7 @@ static const struct mv88e6xxx_ops mv88e6250_ops = { .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, .avb_ops = &mv88e6352_avb_ops, .ptp_ops = &mv88e6250_ptp_ops, - .phylink_validate = mv88e6065_phylink_validate, + .phylink_get_caps = mv88e6250_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6290_ops = { @@ -4458,7 +4626,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .gpio_ops = &mv88e6352_gpio_ops, .avb_ops = &mv88e6390_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, - .phylink_validate = mv88e6390_phylink_validate, + .phylink_get_caps = mv88e6390_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6320_ops = { @@ -4502,7 +4670,7 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .gpio_ops = &mv88e6352_gpio_ops, .avb_ops = &mv88e6352_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6321_ops = { @@ -4544,7 +4712,7 @@ static const struct mv88e6xxx_ops mv88e6321_ops = { .gpio_ops = &mv88e6352_gpio_ops, .avb_ops = &mv88e6352_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6341_ops = { @@ -4610,7 +4778,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .serdes_get_stats = mv88e6390_serdes_get_stats, .serdes_get_regs_len = mv88e6390_serdes_get_regs_len, .serdes_get_regs = mv88e6390_serdes_get_regs, - .phylink_validate = mv88e6341_phylink_validate, + .phylink_get_caps = mv88e6341_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6350_ops = { @@ -4652,7 +4820,7 @@ static const struct mv88e6xxx_ops mv88e6350_ops = { .atu_set_hash = mv88e6165_g1_atu_set_hash, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6351_ops = { @@ -4696,7 +4864,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .avb_ops = &mv88e6352_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, - .phylink_validate = mv88e6185_phylink_validate, + .phylink_get_caps = mv88e6185_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6352_ops = { @@ -4759,7 +4927,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .serdes_get_stats = mv88e6352_serdes_get_stats, .serdes_get_regs_len = mv88e6352_serdes_get_regs_len, .serdes_get_regs = mv88e6352_serdes_get_regs, - .phylink_validate = mv88e6352_phylink_validate, + .phylink_get_caps = mv88e6352_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6390_ops = { @@ -4824,7 +4992,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .serdes_get_stats = mv88e6390_serdes_get_stats, .serdes_get_regs_len = mv88e6390_serdes_get_regs_len, .serdes_get_regs = mv88e6390_serdes_get_regs, - .phylink_validate = mv88e6390_phylink_validate, + .phylink_get_caps = mv88e6390_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6390x_ops = { @@ -4888,7 +5056,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .gpio_ops = &mv88e6352_gpio_ops, .avb_ops = &mv88e6390_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, - .phylink_validate = mv88e6390x_phylink_validate, + .phylink_get_caps = mv88e6390x_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6393x_ops = { @@ -4952,7 +5120,7 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = { .gpio_ops = &mv88e6352_gpio_ops, .avb_ops = &mv88e6390_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, - .phylink_validate = mv88e6393x_phylink_validate, + .phylink_get_caps = mv88e6393x_phylink_get_caps, }; static const struct mv88e6xxx_info mv88e6xxx_table[] = { @@ -6221,7 +6389,7 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .teardown = mv88e6xxx_teardown, .port_setup = mv88e6xxx_port_setup, .port_teardown = mv88e6xxx_port_teardown, - .phylink_validate = mv88e6xxx_validate, + .phylink_get_caps = mv88e6xxx_get_caps, .phylink_mac_link_state = mv88e6xxx_serdes_pcs_get_state, .phylink_mac_config = mv88e6xxx_mac_config, .phylink_mac_an_restart = mv88e6xxx_serdes_pcs_an_restart, diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 8271b8aa7b71..12aa637779f5 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -179,6 +179,7 @@ struct mv88e6xxx_vtu_entry { u16 fid; u8 sid; bool valid; + bool policy; u8 member[DSA_MAX_PORTS]; u8 state[DSA_MAX_PORTS]; }; @@ -392,6 +393,7 @@ struct mv88e6xxx_chip { struct mv88e6xxx_bus_ops { int (*read)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int (*write)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); + int (*init)(struct mv88e6xxx_chip *chip); }; struct mv88e6xxx_mdio_bus { @@ -609,9 +611,8 @@ struct mv88e6xxx_ops { const struct mv88e6xxx_ptp_ops *ptp_ops; /* Phylink */ - void (*phylink_validate)(struct mv88e6xxx_chip *chip, int port, - unsigned long *mask, - struct phylink_link_state *state); + void (*phylink_get_caps)(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config); /* Max Frame Size */ int (*set_max_frame_size)(struct mv88e6xxx_chip *chip, int mtu); diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h index 4f3dbb015f77..2c1607c858a1 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.h +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -46,6 +46,7 @@ /* Offset 0x02: VTU FID Register */ #define MV88E6352_G1_VTU_FID 0x02 +#define MV88E6352_G1_VTU_FID_VID_POLICY 0x1000 #define MV88E6352_G1_VTU_FID_MASK 0x0fff /* Offset 0x03: VTU SID Register */ diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c index ae12c981923e..b1bd9274a562 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c @@ -27,7 +27,7 @@ static int mv88e6xxx_g1_vtu_fid_read(struct mv88e6xxx_chip *chip, return err; entry->fid = val & MV88E6352_G1_VTU_FID_MASK; - + entry->policy = !!(val & MV88E6352_G1_VTU_FID_VID_POLICY); return 0; } @@ -36,6 +36,9 @@ static int mv88e6xxx_g1_vtu_fid_write(struct mv88e6xxx_chip *chip, { u16 val = entry->fid & MV88E6352_G1_VTU_FID_MASK; + if (entry->policy) + val |= MV88E6352_G1_VTU_FID_VID_POLICY; + return mv88e6xxx_g1_write(chip, MV88E6352_G1_VTU_FID, val); } diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index f3e27573a386..807aeaad9830 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -299,6 +299,8 @@ #define MV88E6352_G2_SCRATCH_CONFIG_DATA1_NO_CPU BIT(2) #define MV88E6352_G2_SCRATCH_CONFIG_DATA2 0x72 #define MV88E6352_G2_SCRATCH_CONFIG_DATA2_P0_MODE_MASK 0x3 +#define MV88E6352_G2_SCRATCH_CONFIG_DATA3 0x73 +#define MV88E6352_G2_SCRATCH_CONFIG_DATA3_S_SEL BIT(1) #define MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO 0 #define MV88E6352_G2_SCRATCH_GPIO_PCTL_TRIG 1 @@ -370,6 +372,7 @@ extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops; int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip, bool external); +int mv88e6352_g2_scratch_port_has_serdes(struct mv88e6xxx_chip *chip, int port); int mv88e6xxx_g2_atu_stats_set(struct mv88e6xxx_chip *chip, u16 kind, u16 bin); int mv88e6xxx_g2_atu_stats_get(struct mv88e6xxx_chip *chip, u16 *stats); diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c index eda710062933..a9d6e40321a2 100644 --- a/drivers/net/dsa/mv88e6xxx/global2_scratch.c +++ b/drivers/net/dsa/mv88e6xxx/global2_scratch.c @@ -289,3 +289,31 @@ int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip, return mv88e6xxx_g2_scratch_write(chip, misc_cfg, val); } + +/** + * mv88e6352_g2_scratch_port_has_serdes - indicate if a port can have a serdes + * @chip: chip private data + * @port: port number to check for serdes + * + * Indicates whether the port may have a serdes attached according to the + * pin strapping. Returns negative error number, 0 if the port is not + * configured to have a serdes, and 1 if the port is configured to have a + * serdes attached. + */ +int mv88e6352_g2_scratch_port_has_serdes(struct mv88e6xxx_chip *chip, int port) +{ + u8 config3, p; + int err; + + err = mv88e6xxx_g2_scratch_read(chip, MV88E6352_G2_SCRATCH_CONFIG_DATA3, + &config3); + if (err) + return err; + + if (config3 & MV88E6352_G2_SCRATCH_CONFIG_DATA3_S_SEL) + p = 5; + else + p = 4; + + return port == p; +} diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index ab41619a809b..ceb450113f88 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -1278,7 +1278,7 @@ int mv88e6xxx_port_drop_untagged(struct mv88e6xxx_chip *chip, int port, return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL2, new); } -int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port) +int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port, bool map) { u16 reg; int err; @@ -1287,7 +1287,10 @@ int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port) if (err) return err; - reg |= MV88E6XXX_PORT_CTL2_MAP_DA; + if (map) + reg |= MV88E6XXX_PORT_CTL2_MAP_DA; + else + reg &= ~MV88E6XXX_PORT_CTL2_MAP_DA; return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL2, reg); } diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index 03382b66f800..3a13db2ec27b 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -42,6 +42,11 @@ #define MV88E6XXX_PORT_STS_TX_PAUSED 0x0020 #define MV88E6XXX_PORT_STS_FLOW_CTL 0x0010 #define MV88E6XXX_PORT_STS_CMODE_MASK 0x000f +#define MV88E6XXX_PORT_STS_CMODE_MII_PHY 0x0001 +#define MV88E6XXX_PORT_STS_CMODE_MII 0x0002 +#define MV88E6XXX_PORT_STS_CMODE_GMII 0x0003 +#define MV88E6XXX_PORT_STS_CMODE_RMII_PHY 0x0004 +#define MV88E6XXX_PORT_STS_CMODE_RMII 0x0005 #define MV88E6XXX_PORT_STS_CMODE_RGMII 0x0007 #define MV88E6XXX_PORT_STS_CMODE_100BASEX 0x0008 #define MV88E6XXX_PORT_STS_CMODE_1000BASEX 0x0009 @@ -425,7 +430,7 @@ int mv88e6185_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode); int mv88e6352_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode); int mv88e6xxx_port_drop_untagged(struct mv88e6xxx_chip *chip, int port, bool drop_untagged); -int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port); +int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port, bool map); int mv88e6095_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port, int upstream_port); int mv88e6xxx_port_set_mirror(struct mv88e6xxx_chip *chip, int port, diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 2b05ead515cd..6a177bf654ee 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -272,14 +272,6 @@ int mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port) return lane; } -static bool mv88e6352_port_has_serdes(struct mv88e6xxx_chip *chip, int port) -{ - if (mv88e6xxx_serdes_get_lane(chip, port) >= 0) - return true; - - return false; -} - struct mv88e6352_serdes_hw_stat { char string[ETH_GSTRING_LEN]; int sizeof_stat; @@ -293,20 +285,24 @@ static struct mv88e6352_serdes_hw_stat mv88e6352_serdes_hw_stats[] = { int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port) { - if (mv88e6352_port_has_serdes(chip, port)) - return ARRAY_SIZE(mv88e6352_serdes_hw_stats); + int err; - return 0; + err = mv88e6352_g2_scratch_port_has_serdes(chip, port); + if (err <= 0) + return err; + + return ARRAY_SIZE(mv88e6352_serdes_hw_stats); } int mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip, int port, uint8_t *data) { struct mv88e6352_serdes_hw_stat *stat; - int i; + int err, i; - if (!mv88e6352_port_has_serdes(chip, port)) - return 0; + err = mv88e6352_g2_scratch_port_has_serdes(chip, port); + if (err <= 0) + return err; for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) { stat = &mv88e6352_serdes_hw_stats[i]; @@ -348,11 +344,12 @@ int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, { struct mv88e6xxx_port *mv88e6xxx_port = &chip->ports[port]; struct mv88e6352_serdes_hw_stat *stat; + int i, err; u64 value; - int i; - if (!mv88e6352_port_has_serdes(chip, port)) - return 0; + err = mv88e6352_g2_scratch_port_has_serdes(chip, port); + if (err <= 0) + return err; BUILD_BUG_ON(ARRAY_SIZE(mv88e6352_serdes_hw_stats) > ARRAY_SIZE(mv88e6xxx_port->serdes_stats)); @@ -419,8 +416,13 @@ unsigned int mv88e6352_serdes_irq_mapping(struct mv88e6xxx_chip *chip, int port) int mv88e6352_serdes_get_regs_len(struct mv88e6xxx_chip *chip, int port) { - if (!mv88e6352_port_has_serdes(chip, port)) - return 0; + int err; + + mv88e6xxx_reg_lock(chip); + err = mv88e6352_g2_scratch_port_has_serdes(chip, port); + mv88e6xxx_reg_unlock(chip); + if (err <= 0) + return err; return 32 * sizeof(u16); } @@ -432,7 +434,8 @@ void mv88e6352_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p) int err; int i; - if (!mv88e6352_port_has_serdes(chip, port)) + err = mv88e6352_g2_scratch_port_has_serdes(chip, port); + if (err <= 0) return; for (i = 0 ; i < 32; i++) { diff --git a/drivers/net/dsa/mv88e6xxx/smi.c b/drivers/net/dsa/mv88e6xxx/smi.c index 282fe08db050..a990271b7482 100644 --- a/drivers/net/dsa/mv88e6xxx/smi.c +++ b/drivers/net/dsa/mv88e6xxx/smi.c @@ -55,11 +55,15 @@ static int mv88e6xxx_smi_direct_write(struct mv88e6xxx_chip *chip, static int mv88e6xxx_smi_direct_wait(struct mv88e6xxx_chip *chip, int dev, int reg, int bit, int val) { + const unsigned long timeout = jiffies + msecs_to_jiffies(50); u16 data; int err; int i; - for (i = 0; i < 16; i++) { + /* Even if the initial poll takes longer than 50ms, always do + * at least one more attempt. + */ + for (i = 0; time_before(jiffies, timeout) || (i < 2); i++) { err = mv88e6xxx_smi_direct_read(chip, dev, reg, &data); if (err) return err; @@ -67,7 +71,10 @@ static int mv88e6xxx_smi_direct_wait(struct mv88e6xxx_chip *chip, if (!!(data & BIT(bit)) == !!val) return 0; - usleep_range(1000, 2000); + if (i < 2) + cpu_relax(); + else + usleep_range(1000, 2000); } return -ETIMEDOUT; @@ -104,11 +111,6 @@ static int mv88e6xxx_smi_indirect_read(struct mv88e6xxx_chip *chip, { int err; - err = mv88e6xxx_smi_direct_wait(chip, chip->sw_addr, - MV88E6XXX_SMI_CMD, 15, 0); - if (err) - return err; - err = mv88e6xxx_smi_direct_write(chip, chip->sw_addr, MV88E6XXX_SMI_CMD, MV88E6XXX_SMI_CMD_BUSY | @@ -132,11 +134,6 @@ static int mv88e6xxx_smi_indirect_write(struct mv88e6xxx_chip *chip, { int err; - err = mv88e6xxx_smi_direct_wait(chip, chip->sw_addr, - MV88E6XXX_SMI_CMD, 15, 0); - if (err) - return err; - err = mv88e6xxx_smi_direct_write(chip, chip->sw_addr, MV88E6XXX_SMI_DATA, data); if (err) @@ -155,9 +152,20 @@ static int mv88e6xxx_smi_indirect_write(struct mv88e6xxx_chip *chip, MV88E6XXX_SMI_CMD, 15, 0); } +static int mv88e6xxx_smi_indirect_init(struct mv88e6xxx_chip *chip) +{ + /* Ensure that the chip starts out in the ready state. As both + * reads and writes always ensure this on return, they can + * safely depend on the chip not being busy on entry. + */ + return mv88e6xxx_smi_direct_wait(chip, chip->sw_addr, + MV88E6XXX_SMI_CMD, 15, 0); +} + static const struct mv88e6xxx_bus_ops mv88e6xxx_smi_indirect_ops = { .read = mv88e6xxx_smi_indirect_read, .write = mv88e6xxx_smi_indirect_write, + .init = mv88e6xxx_smi_indirect_init, }; int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, @@ -175,5 +183,8 @@ int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, chip->bus = bus; chip->sw_addr = sw_addr; + if (chip->smi_ops->init) + return chip->smi_ops->init(chip); + return 0; } diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index da0d7e68643a..3bda7015f0c1 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -499,52 +499,27 @@ static enum dsa_tag_protocol ar9331_sw_get_tag_protocol(struct dsa_switch *ds, return DSA_TAG_PROTO_AR9331; } -static void ar9331_sw_phylink_validate(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state) +static void ar9331_sw_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; + config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100; switch (port) { case 0: - if (state->interface != PHY_INTERFACE_MODE_GMII) - goto unsupported; - - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseT_Half); + __set_bit(PHY_INTERFACE_MODE_GMII, + config->supported_interfaces); + config->mac_capabilities |= MAC_1000; break; case 1: case 2: case 3: case 4: case 5: - if (state->interface != PHY_INTERFACE_MODE_INTERNAL) - goto unsupported; + __set_bit(PHY_INTERFACE_MODE_INTERNAL, + config->supported_interfaces); break; - default: - linkmode_zero(supported); - dev_err(ds->dev, "Unsupported port: %i\n", port); - return; } - - phylink_set_port_modes(mask); - phylink_set(mask, Pause); - phylink_set(mask, Asym_Pause); - - phylink_set(mask, 10baseT_Half); - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Half); - phylink_set(mask, 100baseT_Full); - - linkmode_and(supported, supported, mask); - linkmode_and(state->advertising, state->advertising, mask); - - return; - -unsupported: - linkmode_zero(supported); - dev_err(ds->dev, "Unsupported interface: %d, port: %d\n", - state->interface, port); } static void ar9331_sw_phylink_mac_config(struct dsa_switch *ds, int port, @@ -697,7 +672,7 @@ static const struct dsa_switch_ops ar9331_sw_ops = { .get_tag_protocol = ar9331_sw_get_tag_protocol, .setup = ar9331_sw_setup, .port_disable = ar9331_sw_port_disable, - .phylink_validate = ar9331_sw_phylink_validate, + .phylink_get_caps = ar9331_sw_phylink_get_caps, .phylink_mac_config = ar9331_sw_phylink_mac_config, .phylink_mac_link_down = ar9331_sw_phylink_mac_link_down, .phylink_mac_link_up = ar9331_sw_phylink_mac_link_up, diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 039694518788..83066af3757f 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -20,6 +20,7 @@ #include <linux/phylink.h> #include <linux/gpio/consumer.h> #include <linux/etherdevice.h> +#include <linux/dsa/tag_qca.h> #include "qca8k.h" @@ -74,12 +75,6 @@ static const struct qca8k_mib_desc ar8327_mib[] = { MIB_DESC(1, 0xac, "TXUnicast"), }; -/* The 32bit switch registers are accessed indirectly. To achieve this we need - * to set the page of the register. Track the last page that was set to reduce - * mdio writes - */ -static u16 qca8k_current_page = 0xffff; - static void qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page) { @@ -94,6 +89,44 @@ qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page) } static int +qca8k_set_lo(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 lo) +{ + u16 *cached_lo = &priv->mdio_cache.lo; + struct mii_bus *bus = priv->bus; + int ret; + + if (lo == *cached_lo) + return 0; + + ret = bus->write(bus, phy_id, regnum, lo); + if (ret < 0) + dev_err_ratelimited(&bus->dev, + "failed to write qca8k 32bit lo register\n"); + + *cached_lo = lo; + return 0; +} + +static int +qca8k_set_hi(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 hi) +{ + u16 *cached_hi = &priv->mdio_cache.hi; + struct mii_bus *bus = priv->bus; + int ret; + + if (hi == *cached_hi) + return 0; + + ret = bus->write(bus, phy_id, regnum, hi); + if (ret < 0) + dev_err_ratelimited(&bus->dev, + "failed to write qca8k 32bit hi register\n"); + + *cached_hi = hi; + return 0; +} + +static int qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) { int ret; @@ -116,7 +149,7 @@ qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) } static void -qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) +qca8k_mii_write32(struct qca8k_priv *priv, int phy_id, u32 regnum, u32 val) { u16 lo, hi; int ret; @@ -124,20 +157,19 @@ qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) lo = val & 0xffff; hi = (u16)(val >> 16); - ret = bus->write(bus, phy_id, regnum, lo); + ret = qca8k_set_lo(priv, phy_id, regnum, lo); if (ret >= 0) - ret = bus->write(bus, phy_id, regnum + 1, hi); - if (ret < 0) - dev_err_ratelimited(&bus->dev, - "failed to write qca8k 32bit register\n"); + ret = qca8k_set_hi(priv, phy_id, regnum + 1, hi); } static int -qca8k_set_page(struct mii_bus *bus, u16 page) +qca8k_set_page(struct qca8k_priv *priv, u16 page) { + u16 *cached_page = &priv->mdio_cache.page; + struct mii_bus *bus = priv->bus; int ret; - if (page == qca8k_current_page) + if (page == *cached_page) return 0; ret = bus->write(bus, 0x18, 0, page); @@ -147,7 +179,7 @@ qca8k_set_page(struct mii_bus *bus, u16 page) return ret; } - qca8k_current_page = page; + *cached_page = page; usleep_range(1000, 2000); return 0; } @@ -170,6 +202,252 @@ qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 write_val) return regmap_update_bits(priv->regmap, reg, mask, write_val); } +static void qca8k_rw_reg_ack_handler(struct dsa_switch *ds, struct sk_buff *skb) +{ + struct qca8k_mgmt_eth_data *mgmt_eth_data; + struct qca8k_priv *priv = ds->priv; + struct qca_mgmt_ethhdr *mgmt_ethhdr; + u8 len, cmd; + + mgmt_ethhdr = (struct qca_mgmt_ethhdr *)skb_mac_header(skb); + mgmt_eth_data = &priv->mgmt_eth_data; + + cmd = FIELD_GET(QCA_HDR_MGMT_CMD, mgmt_ethhdr->command); + len = FIELD_GET(QCA_HDR_MGMT_LENGTH, mgmt_ethhdr->command); + + /* Make sure the seq match the requested packet */ + if (mgmt_ethhdr->seq == mgmt_eth_data->seq) + mgmt_eth_data->ack = true; + + if (cmd == MDIO_READ) { + mgmt_eth_data->data[0] = mgmt_ethhdr->mdio_data; + + /* Get the rest of the 12 byte of data. + * The read/write function will extract the requested data. + */ + if (len > QCA_HDR_MGMT_DATA1_LEN) + memcpy(mgmt_eth_data->data + 1, skb->data, + QCA_HDR_MGMT_DATA2_LEN); + } + + complete(&mgmt_eth_data->rw_done); +} + +static struct sk_buff *qca8k_alloc_mdio_header(enum mdio_cmd cmd, u32 reg, u32 *val, + int priority, unsigned int len) +{ + struct qca_mgmt_ethhdr *mgmt_ethhdr; + unsigned int real_len; + struct sk_buff *skb; + u32 *data2; + u16 hdr; + + skb = dev_alloc_skb(QCA_HDR_MGMT_PKT_LEN); + if (!skb) + return NULL; + + /* Max value for len reg is 15 (0xf) but the switch actually return 16 byte + * Actually for some reason the steps are: + * 0: nothing + * 1-4: first 4 byte + * 5-6: first 12 byte + * 7-15: all 16 byte + */ + if (len == 16) + real_len = 15; + else + real_len = len; + + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb->len); + + mgmt_ethhdr = skb_push(skb, QCA_HDR_MGMT_HEADER_LEN + QCA_HDR_LEN); + + hdr = FIELD_PREP(QCA_HDR_XMIT_VERSION, QCA_HDR_VERSION); + hdr |= FIELD_PREP(QCA_HDR_XMIT_PRIORITY, priority); + hdr |= QCA_HDR_XMIT_FROM_CPU; + hdr |= FIELD_PREP(QCA_HDR_XMIT_DP_BIT, BIT(0)); + hdr |= FIELD_PREP(QCA_HDR_XMIT_CONTROL, QCA_HDR_XMIT_TYPE_RW_REG); + + mgmt_ethhdr->command = FIELD_PREP(QCA_HDR_MGMT_ADDR, reg); + mgmt_ethhdr->command |= FIELD_PREP(QCA_HDR_MGMT_LENGTH, real_len); + mgmt_ethhdr->command |= FIELD_PREP(QCA_HDR_MGMT_CMD, cmd); + mgmt_ethhdr->command |= FIELD_PREP(QCA_HDR_MGMT_CHECK_CODE, + QCA_HDR_MGMT_CHECK_CODE_VAL); + + if (cmd == MDIO_WRITE) + mgmt_ethhdr->mdio_data = *val; + + mgmt_ethhdr->hdr = htons(hdr); + + data2 = skb_put_zero(skb, QCA_HDR_MGMT_DATA2_LEN + QCA_HDR_MGMT_PADDING_LEN); + if (cmd == MDIO_WRITE && len > QCA_HDR_MGMT_DATA1_LEN) + memcpy(data2, val + 1, len - QCA_HDR_MGMT_DATA1_LEN); + + return skb; +} + +static void qca8k_mdio_header_fill_seq_num(struct sk_buff *skb, u32 seq_num) +{ + struct qca_mgmt_ethhdr *mgmt_ethhdr; + + mgmt_ethhdr = (struct qca_mgmt_ethhdr *)skb->data; + mgmt_ethhdr->seq = FIELD_PREP(QCA_HDR_MGMT_SEQ_NUM, seq_num); +} + +static int qca8k_read_eth(struct qca8k_priv *priv, u32 reg, u32 *val, int len) +{ + struct qca8k_mgmt_eth_data *mgmt_eth_data = &priv->mgmt_eth_data; + struct sk_buff *skb; + bool ack; + int ret; + + skb = qca8k_alloc_mdio_header(MDIO_READ, reg, NULL, + QCA8K_ETHERNET_MDIO_PRIORITY, len); + if (!skb) + return -ENOMEM; + + mutex_lock(&mgmt_eth_data->mutex); + + /* Check mgmt_master if is operational */ + if (!priv->mgmt_master) { + kfree_skb(skb); + mutex_unlock(&mgmt_eth_data->mutex); + return -EINVAL; + } + + skb->dev = priv->mgmt_master; + + reinit_completion(&mgmt_eth_data->rw_done); + + /* Increment seq_num and set it in the mdio pkt */ + mgmt_eth_data->seq++; + qca8k_mdio_header_fill_seq_num(skb, mgmt_eth_data->seq); + mgmt_eth_data->ack = false; + + dev_queue_xmit(skb); + + ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done, + msecs_to_jiffies(QCA8K_ETHERNET_TIMEOUT)); + + *val = mgmt_eth_data->data[0]; + if (len > QCA_HDR_MGMT_DATA1_LEN) + memcpy(val + 1, mgmt_eth_data->data + 1, len - QCA_HDR_MGMT_DATA1_LEN); + + ack = mgmt_eth_data->ack; + + mutex_unlock(&mgmt_eth_data->mutex); + + if (ret <= 0) + return -ETIMEDOUT; + + if (!ack) + return -EINVAL; + + return 0; +} + +static int qca8k_write_eth(struct qca8k_priv *priv, u32 reg, u32 *val, int len) +{ + struct qca8k_mgmt_eth_data *mgmt_eth_data = &priv->mgmt_eth_data; + struct sk_buff *skb; + bool ack; + int ret; + + skb = qca8k_alloc_mdio_header(MDIO_WRITE, reg, val, + QCA8K_ETHERNET_MDIO_PRIORITY, len); + if (!skb) + return -ENOMEM; + + mutex_lock(&mgmt_eth_data->mutex); + + /* Check mgmt_master if is operational */ + if (!priv->mgmt_master) { + kfree_skb(skb); + mutex_unlock(&mgmt_eth_data->mutex); + return -EINVAL; + } + + skb->dev = priv->mgmt_master; + + reinit_completion(&mgmt_eth_data->rw_done); + + /* Increment seq_num and set it in the mdio pkt */ + mgmt_eth_data->seq++; + qca8k_mdio_header_fill_seq_num(skb, mgmt_eth_data->seq); + mgmt_eth_data->ack = false; + + dev_queue_xmit(skb); + + ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done, + msecs_to_jiffies(QCA8K_ETHERNET_TIMEOUT)); + + ack = mgmt_eth_data->ack; + + mutex_unlock(&mgmt_eth_data->mutex); + + if (ret <= 0) + return -ETIMEDOUT; + + if (!ack) + return -EINVAL; + + return 0; +} + +static int +qca8k_regmap_update_bits_eth(struct qca8k_priv *priv, u32 reg, u32 mask, u32 write_val) +{ + u32 val = 0; + int ret; + + ret = qca8k_read_eth(priv, reg, &val, sizeof(val)); + if (ret) + return ret; + + val &= ~mask; + val |= write_val; + + return qca8k_write_eth(priv, reg, &val, sizeof(val)); +} + +static int +qca8k_bulk_read(struct qca8k_priv *priv, u32 reg, u32 *val, int len) +{ + int i, count = len / sizeof(u32), ret; + + if (priv->mgmt_master && !qca8k_read_eth(priv, reg, val, len)) + return 0; + + for (i = 0; i < count; i++) { + ret = regmap_read(priv->regmap, reg + (i * 4), val + i); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +qca8k_bulk_write(struct qca8k_priv *priv, u32 reg, u32 *val, int len) +{ + int i, count = len / sizeof(u32), ret; + u32 tmp; + + if (priv->mgmt_master && !qca8k_write_eth(priv, reg, val, len)) + return 0; + + for (i = 0; i < count; i++) { + tmp = val[i]; + + ret = regmap_write(priv->regmap, reg + (i * 4), tmp); + if (ret < 0) + return ret; + } + + return 0; +} + static int qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val) { @@ -178,11 +456,14 @@ qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val) u16 r1, r2, page; int ret; + if (!qca8k_read_eth(priv, reg, val, sizeof(val))) + return 0; + qca8k_split_addr(reg, &r1, &r2, &page); mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); - ret = qca8k_set_page(bus, page); + ret = qca8k_set_page(priv, page); if (ret < 0) goto exit; @@ -201,15 +482,18 @@ qca8k_regmap_write(void *ctx, uint32_t reg, uint32_t val) u16 r1, r2, page; int ret; + if (!qca8k_write_eth(priv, reg, &val, sizeof(val))) + return 0; + qca8k_split_addr(reg, &r1, &r2, &page); mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); - ret = qca8k_set_page(bus, page); + ret = qca8k_set_page(priv, page); if (ret < 0) goto exit; - qca8k_mii_write32(bus, 0x10 | r2, r1, val); + qca8k_mii_write32(priv, 0x10 | r2, r1, val); exit: mutex_unlock(&bus->mdio_lock); @@ -225,11 +509,14 @@ qca8k_regmap_update_bits(void *ctx, uint32_t reg, uint32_t mask, uint32_t write_ u32 val; int ret; + if (!qca8k_regmap_update_bits_eth(priv, reg, mask, write_val)) + return 0; + qca8k_split_addr(reg, &r1, &r2, &page); mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); - ret = qca8k_set_page(bus, page); + ret = qca8k_set_page(priv, page); if (ret < 0) goto exit; @@ -239,7 +526,7 @@ qca8k_regmap_update_bits(void *ctx, uint32_t reg, uint32_t mask, uint32_t write_ val &= ~mask; val |= write_val; - qca8k_mii_write32(bus, 0x10 | r2, r1, val); + qca8k_mii_write32(priv, 0x10 | r2, r1, val); exit: mutex_unlock(&bus->mdio_lock); @@ -296,17 +583,13 @@ qca8k_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask) static int qca8k_fdb_read(struct qca8k_priv *priv, struct qca8k_fdb *fdb) { - u32 reg[4], val; - int i, ret; + u32 reg[3]; + int ret; /* load the ARL table into an array */ - for (i = 0; i < 4; i++) { - ret = qca8k_read(priv, QCA8K_REG_ATU_DATA0 + (i * 4), &val); - if (ret < 0) - return ret; - - reg[i] = val; - } + ret = qca8k_bulk_read(priv, QCA8K_REG_ATU_DATA0, reg, sizeof(reg)); + if (ret) + return ret; /* vid - 83:72 */ fdb->vid = FIELD_GET(QCA8K_ATU_VID_MASK, reg[2]); @@ -330,7 +613,6 @@ qca8k_fdb_write(struct qca8k_priv *priv, u16 vid, u8 port_mask, const u8 *mac, u8 aging) { u32 reg[3] = { 0 }; - int i; /* vid - 83:72 */ reg[2] = FIELD_PREP(QCA8K_ATU_VID_MASK, vid); @@ -347,8 +629,7 @@ qca8k_fdb_write(struct qca8k_priv *priv, u16 vid, u8 port_mask, const u8 *mac, reg[0] |= FIELD_PREP(QCA8K_ATU_ADDR5_MASK, mac[5]); /* load the array into the ARL table */ - for (i = 0; i < 3; i++) - qca8k_write(priv, QCA8K_REG_ATU_DATA0 + (i * 4), reg[i]); + qca8k_bulk_write(priv, QCA8K_REG_ATU_DATA0, reg, sizeof(reg)); } static int @@ -632,7 +913,10 @@ qca8k_mib_init(struct qca8k_priv *priv) int ret; mutex_lock(&priv->reg_mutex); - ret = regmap_set_bits(priv->regmap, QCA8K_REG_MIB, QCA8K_MIB_FLUSH | QCA8K_MIB_BUSY); + ret = regmap_update_bits(priv->regmap, QCA8K_REG_MIB, + QCA8K_MIB_FUNC | QCA8K_MIB_BUSY, + FIELD_PREP(QCA8K_MIB_FUNC, QCA8K_MIB_FLUSH) | + QCA8K_MIB_BUSY); if (ret) goto exit; @@ -666,6 +950,199 @@ qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable) regmap_clear_bits(priv->regmap, QCA8K_REG_PORT_STATUS(port), mask); } +static int +qca8k_phy_eth_busy_wait(struct qca8k_mgmt_eth_data *mgmt_eth_data, + struct sk_buff *read_skb, u32 *val) +{ + struct sk_buff *skb = skb_copy(read_skb, GFP_KERNEL); + bool ack; + int ret; + + reinit_completion(&mgmt_eth_data->rw_done); + + /* Increment seq_num and set it in the copy pkt */ + mgmt_eth_data->seq++; + qca8k_mdio_header_fill_seq_num(skb, mgmt_eth_data->seq); + mgmt_eth_data->ack = false; + + dev_queue_xmit(skb); + + ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done, + QCA8K_ETHERNET_TIMEOUT); + + ack = mgmt_eth_data->ack; + + if (ret <= 0) + return -ETIMEDOUT; + + if (!ack) + return -EINVAL; + + *val = mgmt_eth_data->data[0]; + + return 0; +} + +static int +qca8k_phy_eth_command(struct qca8k_priv *priv, bool read, int phy, + int regnum, u16 data) +{ + struct sk_buff *write_skb, *clear_skb, *read_skb; + struct qca8k_mgmt_eth_data *mgmt_eth_data; + u32 write_val, clear_val = 0, val; + struct net_device *mgmt_master; + int ret, ret1; + bool ack; + + if (regnum >= QCA8K_MDIO_MASTER_MAX_REG) + return -EINVAL; + + mgmt_eth_data = &priv->mgmt_eth_data; + + write_val = QCA8K_MDIO_MASTER_BUSY | QCA8K_MDIO_MASTER_EN | + QCA8K_MDIO_MASTER_PHY_ADDR(phy) | + QCA8K_MDIO_MASTER_REG_ADDR(regnum); + + if (read) { + write_val |= QCA8K_MDIO_MASTER_READ; + } else { + write_val |= QCA8K_MDIO_MASTER_WRITE; + write_val |= QCA8K_MDIO_MASTER_DATA(data); + } + + /* Prealloc all the needed skb before the lock */ + write_skb = qca8k_alloc_mdio_header(MDIO_WRITE, QCA8K_MDIO_MASTER_CTRL, &write_val, + QCA8K_ETHERNET_PHY_PRIORITY, sizeof(write_val)); + if (!write_skb) + return -ENOMEM; + + clear_skb = qca8k_alloc_mdio_header(MDIO_WRITE, QCA8K_MDIO_MASTER_CTRL, &clear_val, + QCA8K_ETHERNET_PHY_PRIORITY, sizeof(clear_val)); + if (!write_skb) { + ret = -ENOMEM; + goto err_clear_skb; + } + + read_skb = qca8k_alloc_mdio_header(MDIO_READ, QCA8K_MDIO_MASTER_CTRL, &clear_val, + QCA8K_ETHERNET_PHY_PRIORITY, sizeof(clear_val)); + if (!read_skb) { + ret = -ENOMEM; + goto err_read_skb; + } + + /* Actually start the request: + * 1. Send mdio master packet + * 2. Busy Wait for mdio master command + * 3. Get the data if we are reading + * 4. Reset the mdio master (even with error) + */ + mutex_lock(&mgmt_eth_data->mutex); + + /* Check if mgmt_master is operational */ + mgmt_master = priv->mgmt_master; + if (!mgmt_master) { + mutex_unlock(&mgmt_eth_data->mutex); + ret = -EINVAL; + goto err_mgmt_master; + } + + read_skb->dev = mgmt_master; + clear_skb->dev = mgmt_master; + write_skb->dev = mgmt_master; + + reinit_completion(&mgmt_eth_data->rw_done); + + /* Increment seq_num and set it in the write pkt */ + mgmt_eth_data->seq++; + qca8k_mdio_header_fill_seq_num(write_skb, mgmt_eth_data->seq); + mgmt_eth_data->ack = false; + + dev_queue_xmit(write_skb); + + ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done, + QCA8K_ETHERNET_TIMEOUT); + + ack = mgmt_eth_data->ack; + + if (ret <= 0) { + ret = -ETIMEDOUT; + kfree_skb(read_skb); + goto exit; + } + + if (!ack) { + ret = -EINVAL; + kfree_skb(read_skb); + goto exit; + } + + ret = read_poll_timeout(qca8k_phy_eth_busy_wait, ret1, + !(val & QCA8K_MDIO_MASTER_BUSY), 0, + QCA8K_BUSY_WAIT_TIMEOUT * USEC_PER_MSEC, false, + mgmt_eth_data, read_skb, &val); + + if (ret < 0 && ret1 < 0) { + ret = ret1; + goto exit; + } + + if (read) { + reinit_completion(&mgmt_eth_data->rw_done); + + /* Increment seq_num and set it in the read pkt */ + mgmt_eth_data->seq++; + qca8k_mdio_header_fill_seq_num(read_skb, mgmt_eth_data->seq); + mgmt_eth_data->ack = false; + + dev_queue_xmit(read_skb); + + ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done, + QCA8K_ETHERNET_TIMEOUT); + + ack = mgmt_eth_data->ack; + + if (ret <= 0) { + ret = -ETIMEDOUT; + goto exit; + } + + if (!ack) { + ret = -EINVAL; + goto exit; + } + + ret = mgmt_eth_data->data[0] & QCA8K_MDIO_MASTER_DATA_MASK; + } else { + kfree_skb(read_skb); + } +exit: + reinit_completion(&mgmt_eth_data->rw_done); + + /* Increment seq_num and set it in the clear pkt */ + mgmt_eth_data->seq++; + qca8k_mdio_header_fill_seq_num(clear_skb, mgmt_eth_data->seq); + mgmt_eth_data->ack = false; + + dev_queue_xmit(clear_skb); + + wait_for_completion_timeout(&mgmt_eth_data->rw_done, + QCA8K_ETHERNET_TIMEOUT); + + mutex_unlock(&mgmt_eth_data->mutex); + + return ret; + + /* Error handling before lock */ +err_mgmt_master: + kfree_skb(read_skb); +err_read_skb: + kfree_skb(clear_skb); +err_clear_skb: + kfree_skb(write_skb); + + return ret; +} + static u32 qca8k_port_to_phy(int port) { @@ -704,8 +1181,9 @@ qca8k_mdio_busy_wait(struct mii_bus *bus, u32 reg, u32 mask) } static int -qca8k_mdio_write(struct mii_bus *bus, int phy, int regnum, u16 data) +qca8k_mdio_write(struct qca8k_priv *priv, int phy, int regnum, u16 data) { + struct mii_bus *bus = priv->bus; u16 r1, r2, page; u32 val; int ret; @@ -722,18 +1200,18 @@ qca8k_mdio_write(struct mii_bus *bus, int phy, int regnum, u16 data) mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); - ret = qca8k_set_page(bus, page); + ret = qca8k_set_page(priv, page); if (ret) goto exit; - qca8k_mii_write32(bus, 0x10 | r2, r1, val); + qca8k_mii_write32(priv, 0x10 | r2, r1, val); ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL, QCA8K_MDIO_MASTER_BUSY); exit: /* even if the busy_wait timeouts try to clear the MASTER_EN */ - qca8k_mii_write32(bus, 0x10 | r2, r1, 0); + qca8k_mii_write32(priv, 0x10 | r2, r1, 0); mutex_unlock(&bus->mdio_lock); @@ -741,8 +1219,9 @@ exit: } static int -qca8k_mdio_read(struct mii_bus *bus, int phy, int regnum) +qca8k_mdio_read(struct qca8k_priv *priv, int phy, int regnum) { + struct mii_bus *bus = priv->bus; u16 r1, r2, page; u32 val; int ret; @@ -758,11 +1237,11 @@ qca8k_mdio_read(struct mii_bus *bus, int phy, int regnum) mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); - ret = qca8k_set_page(bus, page); + ret = qca8k_set_page(priv, page); if (ret) goto exit; - qca8k_mii_write32(bus, 0x10 | r2, r1, val); + qca8k_mii_write32(priv, 0x10 | r2, r1, val); ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL, QCA8K_MDIO_MASTER_BUSY); @@ -773,7 +1252,7 @@ qca8k_mdio_read(struct mii_bus *bus, int phy, int regnum) exit: /* even if the busy_wait timeouts try to clear the MASTER_EN */ - qca8k_mii_write32(bus, 0x10 | r2, r1, 0); + qca8k_mii_write32(priv, 0x10 | r2, r1, 0); mutex_unlock(&bus->mdio_lock); @@ -787,24 +1266,35 @@ static int qca8k_internal_mdio_write(struct mii_bus *slave_bus, int phy, int regnum, u16 data) { struct qca8k_priv *priv = slave_bus->priv; - struct mii_bus *bus = priv->bus; + int ret; - return qca8k_mdio_write(bus, phy, regnum, data); + /* Use mdio Ethernet when available, fallback to legacy one on error */ + ret = qca8k_phy_eth_command(priv, false, phy, regnum, data); + if (!ret) + return 0; + + return qca8k_mdio_write(priv, phy, regnum, data); } static int qca8k_internal_mdio_read(struct mii_bus *slave_bus, int phy, int regnum) { struct qca8k_priv *priv = slave_bus->priv; - struct mii_bus *bus = priv->bus; + int ret; - return qca8k_mdio_read(bus, phy, regnum); + /* Use mdio Ethernet when available, fallback to legacy one on error */ + ret = qca8k_phy_eth_command(priv, true, phy, regnum, 0); + if (ret >= 0) + return ret; + + return qca8k_mdio_read(priv, phy, regnum); } static int qca8k_phy_write(struct dsa_switch *ds, int port, int regnum, u16 data) { struct qca8k_priv *priv = ds->priv; + int ret; /* Check if the legacy mapping should be used and the * port is not correctly mapped to the right PHY in the @@ -813,7 +1303,12 @@ qca8k_phy_write(struct dsa_switch *ds, int port, int regnum, u16 data) if (priv->legacy_phy_port_mapping) port = qca8k_port_to_phy(port) % PHY_MAX_ADDR; - return qca8k_mdio_write(priv->bus, port, regnum, data); + /* Use mdio Ethernet when available, fallback to legacy one on error */ + ret = qca8k_phy_eth_command(priv, false, port, regnum, 0); + if (!ret) + return ret; + + return qca8k_mdio_write(priv, port, regnum, data); } static int @@ -829,7 +1324,12 @@ qca8k_phy_read(struct dsa_switch *ds, int port, int regnum) if (priv->legacy_phy_port_mapping) port = qca8k_port_to_phy(port) % PHY_MAX_ADDR; - ret = qca8k_mdio_read(priv->bus, port, regnum); + /* Use mdio Ethernet when available, fallback to legacy one on error */ + ret = qca8k_phy_eth_command(priv, true, port, regnum, 0); + if (ret >= 0) + return ret; + + ret = qca8k_mdio_read(priv, port, regnum); if (ret < 0) return 0xffff; @@ -1531,67 +2031,39 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, } } -static void -qca8k_phylink_validate(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state) +static void qca8k_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; - switch (port) { case 0: /* 1st CPU port */ - if (state->interface != PHY_INTERFACE_MODE_NA && - state->interface != PHY_INTERFACE_MODE_RGMII && - state->interface != PHY_INTERFACE_MODE_RGMII_ID && - state->interface != PHY_INTERFACE_MODE_RGMII_TXID && - state->interface != PHY_INTERFACE_MODE_RGMII_RXID && - state->interface != PHY_INTERFACE_MODE_SGMII) - goto unsupported; + phy_interface_set_rgmii(config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_SGMII, + config->supported_interfaces); break; + case 1: case 2: case 3: case 4: case 5: /* Internal PHY */ - if (state->interface != PHY_INTERFACE_MODE_NA && - state->interface != PHY_INTERFACE_MODE_GMII && - state->interface != PHY_INTERFACE_MODE_INTERNAL) - goto unsupported; + __set_bit(PHY_INTERFACE_MODE_GMII, + config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_INTERNAL, + config->supported_interfaces); break; + case 6: /* 2nd CPU port / external PHY */ - if (state->interface != PHY_INTERFACE_MODE_NA && - state->interface != PHY_INTERFACE_MODE_RGMII && - state->interface != PHY_INTERFACE_MODE_RGMII_ID && - state->interface != PHY_INTERFACE_MODE_RGMII_TXID && - state->interface != PHY_INTERFACE_MODE_RGMII_RXID && - state->interface != PHY_INTERFACE_MODE_SGMII && - state->interface != PHY_INTERFACE_MODE_1000BASEX) - goto unsupported; + phy_interface_set_rgmii(config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_SGMII, + config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, + config->supported_interfaces); break; - default: -unsupported: - linkmode_zero(supported); - return; } - phylink_set_port_modes(mask); - phylink_set(mask, Autoneg); - - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 10baseT_Half); - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Half); - phylink_set(mask, 100baseT_Full); - - if (state->interface == PHY_INTERFACE_MODE_1000BASEX) - phylink_set(mask, 1000baseX_Full); - - phylink_set(mask, Pause); - phylink_set(mask, Asym_Pause); - - linkmode_and(supported, supported, mask); - linkmode_and(state->advertising, state->advertising, mask); + config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100 | MAC_1000FD; } static int @@ -1703,6 +2175,97 @@ qca8k_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data) ETH_GSTRING_LEN); } +static void qca8k_mib_autocast_handler(struct dsa_switch *ds, struct sk_buff *skb) +{ + const struct qca8k_match_data *match_data; + struct qca8k_mib_eth_data *mib_eth_data; + struct qca8k_priv *priv = ds->priv; + const struct qca8k_mib_desc *mib; + struct mib_ethhdr *mib_ethhdr; + int i, mib_len, offset = 0; + u64 *data; + u8 port; + + mib_ethhdr = (struct mib_ethhdr *)skb_mac_header(skb); + mib_eth_data = &priv->mib_eth_data; + + /* The switch autocast every port. Ignore other packet and + * parse only the requested one. + */ + port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, ntohs(mib_ethhdr->hdr)); + if (port != mib_eth_data->req_port) + goto exit; + + match_data = device_get_match_data(priv->dev); + data = mib_eth_data->data; + + for (i = 0; i < match_data->mib_count; i++) { + mib = &ar8327_mib[i]; + + /* First 3 mib are present in the skb head */ + if (i < 3) { + data[i] = mib_ethhdr->data[i]; + continue; + } + + mib_len = sizeof(uint32_t); + + /* Some mib are 64 bit wide */ + if (mib->size == 2) + mib_len = sizeof(uint64_t); + + /* Copy the mib value from packet to the */ + memcpy(data + i, skb->data + offset, mib_len); + + /* Set the offset for the next mib */ + offset += mib_len; + } + +exit: + /* Complete on receiving all the mib packet */ + if (refcount_dec_and_test(&mib_eth_data->port_parsed)) + complete(&mib_eth_data->rw_done); +} + +static int +qca8k_get_ethtool_stats_eth(struct dsa_switch *ds, int port, u64 *data) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + struct qca8k_mib_eth_data *mib_eth_data; + struct qca8k_priv *priv = ds->priv; + int ret; + + mib_eth_data = &priv->mib_eth_data; + + mutex_lock(&mib_eth_data->mutex); + + reinit_completion(&mib_eth_data->rw_done); + + mib_eth_data->req_port = dp->index; + mib_eth_data->data = data; + refcount_set(&mib_eth_data->port_parsed, QCA8K_NUM_PORTS); + + mutex_lock(&priv->reg_mutex); + + /* Send mib autocast request */ + ret = regmap_update_bits(priv->regmap, QCA8K_REG_MIB, + QCA8K_MIB_FUNC | QCA8K_MIB_BUSY, + FIELD_PREP(QCA8K_MIB_FUNC, QCA8K_MIB_CAST) | + QCA8K_MIB_BUSY); + + mutex_unlock(&priv->reg_mutex); + + if (ret) + goto exit; + + ret = wait_for_completion_timeout(&mib_eth_data->rw_done, QCA8K_ETHERNET_TIMEOUT); + +exit: + mutex_unlock(&mib_eth_data->mutex); + + return ret; +} + static void qca8k_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data) @@ -1714,6 +2277,10 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port, u32 hi = 0; int ret; + if (priv->mgmt_master && + qca8k_get_ethtool_stats_eth(ds, port, data) > 0) + return; + match_data = of_device_get_match_data(priv->dev); for (i = 0; i < match_data->mib_count; i++) { @@ -2383,6 +2950,46 @@ qca8k_port_lag_leave(struct dsa_switch *ds, int port, return qca8k_lag_refresh_portmap(ds, port, lag, true); } +static void +qca8k_master_change(struct dsa_switch *ds, const struct net_device *master, + bool operational) +{ + struct dsa_port *dp = master->dsa_ptr; + struct qca8k_priv *priv = ds->priv; + + /* Ethernet MIB/MDIO is only supported for CPU port 0 */ + if (dp->index != 0) + return; + + mutex_lock(&priv->mgmt_eth_data.mutex); + mutex_lock(&priv->mib_eth_data.mutex); + + priv->mgmt_master = operational ? (struct net_device *)master : NULL; + + mutex_unlock(&priv->mib_eth_data.mutex); + mutex_unlock(&priv->mgmt_eth_data.mutex); +} + +static int qca8k_connect_tag_protocol(struct dsa_switch *ds, + enum dsa_tag_protocol proto) +{ + struct qca_tagger_data *tagger_data; + + switch (proto) { + case DSA_TAG_PROTO_QCA: + tagger_data = ds->tagger_data; + + tagger_data->rw_reg_ack_handler = qca8k_rw_reg_ack_handler; + tagger_data->mib_autocast_handler = qca8k_mib_autocast_handler; + + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + static const struct dsa_switch_ops qca8k_switch_ops = { .get_tag_protocol = qca8k_get_tag_protocol, .setup = qca8k_setup, @@ -2410,7 +3017,7 @@ static const struct dsa_switch_ops qca8k_switch_ops = { .port_vlan_filtering = qca8k_port_vlan_filtering, .port_vlan_add = qca8k_port_vlan_add, .port_vlan_del = qca8k_port_vlan_del, - .phylink_validate = qca8k_phylink_validate, + .phylink_get_caps = qca8k_phylink_get_caps, .phylink_mac_link_state = qca8k_phylink_mac_link_state, .phylink_mac_config = qca8k_phylink_mac_config, .phylink_mac_link_down = qca8k_phylink_mac_link_down, @@ -2418,6 +3025,8 @@ static const struct dsa_switch_ops qca8k_switch_ops = { .get_phy_flags = qca8k_get_phy_flags, .port_lag_join = qca8k_port_lag_join, .port_lag_leave = qca8k_port_lag_leave, + .master_state_change = qca8k_master_change, + .connect_tag_protocol = qca8k_connect_tag_protocol, }; static int qca8k_read_switch_id(struct qca8k_priv *priv) @@ -2488,6 +3097,10 @@ qca8k_sw_probe(struct mdio_device *mdiodev) return PTR_ERR(priv->regmap); } + priv->mdio_cache.page = 0xffff; + priv->mdio_cache.lo = 0xffff; + priv->mdio_cache.hi = 0xffff; + /* Check the detected switch id */ ret = qca8k_read_switch_id(priv); if (ret) @@ -2497,6 +3110,12 @@ qca8k_sw_probe(struct mdio_device *mdiodev) if (!priv->ds) return -ENOMEM; + mutex_init(&priv->mgmt_eth_data.mutex); + init_completion(&priv->mgmt_eth_data.rw_done); + + mutex_init(&priv->mib_eth_data.mutex); + init_completion(&priv->mib_eth_data.rw_done); + priv->ds->dev = &mdiodev->dev; priv->ds->num_ports = QCA8K_NUM_PORTS; priv->ds->priv = priv; diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index ab4a417b25a9..c3d3c2269b1d 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -11,6 +11,11 @@ #include <linux/delay.h> #include <linux/regmap.h> #include <linux/gpio.h> +#include <linux/dsa/tag_qca.h> + +#define QCA8K_ETHERNET_MDIO_PRIORITY 7 +#define QCA8K_ETHERNET_PHY_PRIORITY 6 +#define QCA8K_ETHERNET_TIMEOUT 100 #define QCA8K_NUM_PORTS 7 #define QCA8K_NUM_CPU_PORTS 2 @@ -63,7 +68,7 @@ #define QCA8K_REG_MODULE_EN 0x030 #define QCA8K_MODULE_EN_MIB BIT(0) #define QCA8K_REG_MIB 0x034 -#define QCA8K_MIB_FLUSH BIT(24) +#define QCA8K_MIB_FUNC GENMASK(26, 24) #define QCA8K_MIB_CPU_KEEP BIT(20) #define QCA8K_MIB_BUSY BIT(17) #define QCA8K_MDIO_MASTER_CTRL 0x3c @@ -313,6 +318,12 @@ enum qca8k_vlan_cmd { QCA8K_VLAN_READ = 6, }; +enum qca8k_mid_cmd { + QCA8K_MIB_FLUSH = 1, + QCA8K_MIB_FLUSH_PORT = 2, + QCA8K_MIB_CAST = 3, +}; + struct ar8xxx_port_status { int enabled; }; @@ -328,6 +339,22 @@ enum { QCA8K_CPU_PORT6, }; +struct qca8k_mgmt_eth_data { + struct completion rw_done; + struct mutex mutex; /* Enforce one mdio read/write at time */ + bool ack; + u32 seq; + u32 data[4]; +}; + +struct qca8k_mib_eth_data { + struct completion rw_done; + struct mutex mutex; /* Process one command at time */ + refcount_t port_parsed; /* Counter to track parsed port */ + u8 req_port; + u64 *data; /* pointer to ethtool data */ +}; + struct qca8k_ports_config { bool sgmii_rx_clk_falling_edge; bool sgmii_tx_clk_falling_edge; @@ -336,6 +363,19 @@ struct qca8k_ports_config { u8 rgmii_tx_delay[QCA8K_NUM_CPU_PORTS]; /* 0: CPU port0, 1: CPU port6 */ }; +struct qca8k_mdio_cache { +/* The 32bit switch registers are accessed indirectly. To achieve this we need + * to set the page of the register. Track the last page that was set to reduce + * mdio writes + */ + u16 page; +/* lo and hi can also be cached and from Documentation we can skip one + * extra mdio write if lo or hi is didn't change. + */ + u16 lo; + u16 hi; +}; + struct qca8k_priv { u8 switch_id; u8 switch_revision; @@ -353,6 +393,10 @@ struct qca8k_priv { struct dsa_switch_ops ops; struct gpio_desc *reset_gpio; unsigned int port_mtu[QCA8K_NUM_PORTS]; + struct net_device *mgmt_master; /* Track if mdio/mib Ethernet is available */ + struct qca8k_mgmt_eth_data mgmt_eth_data; + struct qca8k_mib_eth_data mib_eth_data; + struct qca8k_mdio_cache mdio_cache; }; struct qca8k_mib_desc { diff --git a/drivers/net/dsa/realtek-smi-core.c b/drivers/net/dsa/realtek-smi-core.c deleted file mode 100644 index aae46ada8d83..000000000000 --- a/drivers/net/dsa/realtek-smi-core.c +++ /dev/null @@ -1,523 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* Realtek Simple Management Interface (SMI) driver - * It can be discussed how "simple" this interface is. - * - * The SMI protocol piggy-backs the MDIO MDC and MDIO signals levels - * but the protocol is not MDIO at all. Instead it is a Realtek - * pecularity that need to bit-bang the lines in a special way to - * communicate with the switch. - * - * ASICs we intend to support with this driver: - * - * RTL8366 - The original version, apparently - * RTL8369 - Similar enough to have the same datsheet as RTL8366 - * RTL8366RB - Probably reads out "RTL8366 revision B", has a quite - * different register layout from the other two - * RTL8366S - Is this "RTL8366 super"? - * RTL8367 - Has an OpenWRT driver as well - * RTL8368S - Seems to be an alternative name for RTL8366RB - * RTL8370 - Also uses SMI - * - * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org> - * Copyright (C) 2010 Antti Seppälä <a.seppala@gmail.com> - * Copyright (C) 2010 Roman Yeryomin <roman@advem.lv> - * Copyright (C) 2011 Colin Leitner <colin.leitner@googlemail.com> - * Copyright (C) 2009-2010 Gabor Juhos <juhosg@openwrt.org> - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/device.h> -#include <linux/spinlock.h> -#include <linux/skbuff.h> -#include <linux/of.h> -#include <linux/of_device.h> -#include <linux/of_mdio.h> -#include <linux/delay.h> -#include <linux/gpio/consumer.h> -#include <linux/platform_device.h> -#include <linux/regmap.h> -#include <linux/bitops.h> -#include <linux/if_bridge.h> - -#include "realtek-smi-core.h" - -#define REALTEK_SMI_ACK_RETRY_COUNT 5 -#define REALTEK_SMI_HW_STOP_DELAY 25 /* msecs */ -#define REALTEK_SMI_HW_START_DELAY 100 /* msecs */ - -static inline void realtek_smi_clk_delay(struct realtek_smi *smi) -{ - ndelay(smi->clk_delay); -} - -static void realtek_smi_start(struct realtek_smi *smi) -{ - /* Set GPIO pins to output mode, with initial state: - * SCK = 0, SDA = 1 - */ - gpiod_direction_output(smi->mdc, 0); - gpiod_direction_output(smi->mdio, 1); - realtek_smi_clk_delay(smi); - - /* CLK 1: 0 -> 1, 1 -> 0 */ - gpiod_set_value(smi->mdc, 1); - realtek_smi_clk_delay(smi); - gpiod_set_value(smi->mdc, 0); - realtek_smi_clk_delay(smi); - - /* CLK 2: */ - gpiod_set_value(smi->mdc, 1); - realtek_smi_clk_delay(smi); - gpiod_set_value(smi->mdio, 0); - realtek_smi_clk_delay(smi); - gpiod_set_value(smi->mdc, 0); - realtek_smi_clk_delay(smi); - gpiod_set_value(smi->mdio, 1); -} - -static void realtek_smi_stop(struct realtek_smi *smi) -{ - realtek_smi_clk_delay(smi); - gpiod_set_value(smi->mdio, 0); - gpiod_set_value(smi->mdc, 1); - realtek_smi_clk_delay(smi); - gpiod_set_value(smi->mdio, 1); - realtek_smi_clk_delay(smi); - gpiod_set_value(smi->mdc, 1); - realtek_smi_clk_delay(smi); - gpiod_set_value(smi->mdc, 0); - realtek_smi_clk_delay(smi); - gpiod_set_value(smi->mdc, 1); - - /* Add a click */ - realtek_smi_clk_delay(smi); - gpiod_set_value(smi->mdc, 0); - realtek_smi_clk_delay(smi); - gpiod_set_value(smi->mdc, 1); - - /* Set GPIO pins to input mode */ - gpiod_direction_input(smi->mdio); - gpiod_direction_input(smi->mdc); -} - -static void realtek_smi_write_bits(struct realtek_smi *smi, u32 data, u32 len) -{ - for (; len > 0; len--) { - realtek_smi_clk_delay(smi); - - /* Prepare data */ - gpiod_set_value(smi->mdio, !!(data & (1 << (len - 1)))); - realtek_smi_clk_delay(smi); - - /* Clocking */ - gpiod_set_value(smi->mdc, 1); - realtek_smi_clk_delay(smi); - gpiod_set_value(smi->mdc, 0); - } -} - -static void realtek_smi_read_bits(struct realtek_smi *smi, u32 len, u32 *data) -{ - gpiod_direction_input(smi->mdio); - - for (*data = 0; len > 0; len--) { - u32 u; - - realtek_smi_clk_delay(smi); - - /* Clocking */ - gpiod_set_value(smi->mdc, 1); - realtek_smi_clk_delay(smi); - u = !!gpiod_get_value(smi->mdio); - gpiod_set_value(smi->mdc, 0); - - *data |= (u << (len - 1)); - } - - gpiod_direction_output(smi->mdio, 0); -} - -static int realtek_smi_wait_for_ack(struct realtek_smi *smi) -{ - int retry_cnt; - - retry_cnt = 0; - do { - u32 ack; - - realtek_smi_read_bits(smi, 1, &ack); - if (ack == 0) - break; - - if (++retry_cnt > REALTEK_SMI_ACK_RETRY_COUNT) { - dev_err(smi->dev, "ACK timeout\n"); - return -ETIMEDOUT; - } - } while (1); - - return 0; -} - -static int realtek_smi_write_byte(struct realtek_smi *smi, u8 data) -{ - realtek_smi_write_bits(smi, data, 8); - return realtek_smi_wait_for_ack(smi); -} - -static int realtek_smi_write_byte_noack(struct realtek_smi *smi, u8 data) -{ - realtek_smi_write_bits(smi, data, 8); - return 0; -} - -static int realtek_smi_read_byte0(struct realtek_smi *smi, u8 *data) -{ - u32 t; - - /* Read data */ - realtek_smi_read_bits(smi, 8, &t); - *data = (t & 0xff); - - /* Send an ACK */ - realtek_smi_write_bits(smi, 0x00, 1); - - return 0; -} - -static int realtek_smi_read_byte1(struct realtek_smi *smi, u8 *data) -{ - u32 t; - - /* Read data */ - realtek_smi_read_bits(smi, 8, &t); - *data = (t & 0xff); - - /* Send an ACK */ - realtek_smi_write_bits(smi, 0x01, 1); - - return 0; -} - -static int realtek_smi_read_reg(struct realtek_smi *smi, u32 addr, u32 *data) -{ - unsigned long flags; - u8 lo = 0; - u8 hi = 0; - int ret; - - spin_lock_irqsave(&smi->lock, flags); - - realtek_smi_start(smi); - - /* Send READ command */ - ret = realtek_smi_write_byte(smi, smi->cmd_read); - if (ret) - goto out; - - /* Set ADDR[7:0] */ - ret = realtek_smi_write_byte(smi, addr & 0xff); - if (ret) - goto out; - - /* Set ADDR[15:8] */ - ret = realtek_smi_write_byte(smi, addr >> 8); - if (ret) - goto out; - - /* Read DATA[7:0] */ - realtek_smi_read_byte0(smi, &lo); - /* Read DATA[15:8] */ - realtek_smi_read_byte1(smi, &hi); - - *data = ((u32)lo) | (((u32)hi) << 8); - - ret = 0; - - out: - realtek_smi_stop(smi); - spin_unlock_irqrestore(&smi->lock, flags); - - return ret; -} - -static int realtek_smi_write_reg(struct realtek_smi *smi, - u32 addr, u32 data, bool ack) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&smi->lock, flags); - - realtek_smi_start(smi); - - /* Send WRITE command */ - ret = realtek_smi_write_byte(smi, smi->cmd_write); - if (ret) - goto out; - - /* Set ADDR[7:0] */ - ret = realtek_smi_write_byte(smi, addr & 0xff); - if (ret) - goto out; - - /* Set ADDR[15:8] */ - ret = realtek_smi_write_byte(smi, addr >> 8); - if (ret) - goto out; - - /* Write DATA[7:0] */ - ret = realtek_smi_write_byte(smi, data & 0xff); - if (ret) - goto out; - - /* Write DATA[15:8] */ - if (ack) - ret = realtek_smi_write_byte(smi, data >> 8); - else - ret = realtek_smi_write_byte_noack(smi, data >> 8); - if (ret) - goto out; - - ret = 0; - - out: - realtek_smi_stop(smi); - spin_unlock_irqrestore(&smi->lock, flags); - - return ret; -} - -/* There is one single case when we need to use this accessor and that - * is when issueing soft reset. Since the device reset as soon as we write - * that bit, no ACK will come back for natural reasons. - */ -int realtek_smi_write_reg_noack(struct realtek_smi *smi, u32 addr, - u32 data) -{ - return realtek_smi_write_reg(smi, addr, data, false); -} -EXPORT_SYMBOL_GPL(realtek_smi_write_reg_noack); - -/* Regmap accessors */ - -static int realtek_smi_write(void *ctx, u32 reg, u32 val) -{ - struct realtek_smi *smi = ctx; - - return realtek_smi_write_reg(smi, reg, val, true); -} - -static int realtek_smi_read(void *ctx, u32 reg, u32 *val) -{ - struct realtek_smi *smi = ctx; - - return realtek_smi_read_reg(smi, reg, val); -} - -static const struct regmap_config realtek_smi_mdio_regmap_config = { - .reg_bits = 10, /* A4..A0 R4..R0 */ - .val_bits = 16, - .reg_stride = 1, - /* PHY regs are at 0x8000 */ - .max_register = 0xffff, - .reg_format_endian = REGMAP_ENDIAN_BIG, - .reg_read = realtek_smi_read, - .reg_write = realtek_smi_write, - .cache_type = REGCACHE_NONE, -}; - -static int realtek_smi_mdio_read(struct mii_bus *bus, int addr, int regnum) -{ - struct realtek_smi *smi = bus->priv; - - return smi->ops->phy_read(smi, addr, regnum); -} - -static int realtek_smi_mdio_write(struct mii_bus *bus, int addr, int regnum, - u16 val) -{ - struct realtek_smi *smi = bus->priv; - - return smi->ops->phy_write(smi, addr, regnum, val); -} - -int realtek_smi_setup_mdio(struct realtek_smi *smi) -{ - struct device_node *mdio_np; - int ret; - - mdio_np = of_get_compatible_child(smi->dev->of_node, "realtek,smi-mdio"); - if (!mdio_np) { - dev_err(smi->dev, "no MDIO bus node\n"); - return -ENODEV; - } - - smi->slave_mii_bus = devm_mdiobus_alloc(smi->dev); - if (!smi->slave_mii_bus) { - ret = -ENOMEM; - goto err_put_node; - } - smi->slave_mii_bus->priv = smi; - smi->slave_mii_bus->name = "SMI slave MII"; - smi->slave_mii_bus->read = realtek_smi_mdio_read; - smi->slave_mii_bus->write = realtek_smi_mdio_write; - snprintf(smi->slave_mii_bus->id, MII_BUS_ID_SIZE, "SMI-%d", - smi->ds->index); - smi->slave_mii_bus->dev.of_node = mdio_np; - smi->slave_mii_bus->parent = smi->dev; - smi->ds->slave_mii_bus = smi->slave_mii_bus; - - ret = devm_of_mdiobus_register(smi->dev, smi->slave_mii_bus, mdio_np); - if (ret) { - dev_err(smi->dev, "unable to register MDIO bus %s\n", - smi->slave_mii_bus->id); - goto err_put_node; - } - - return 0; - -err_put_node: - of_node_put(mdio_np); - - return ret; -} - -static int realtek_smi_probe(struct platform_device *pdev) -{ - const struct realtek_smi_variant *var; - struct device *dev = &pdev->dev; - struct realtek_smi *smi; - struct device_node *np; - int ret; - - var = of_device_get_match_data(dev); - np = dev->of_node; - - smi = devm_kzalloc(dev, sizeof(*smi) + var->chip_data_sz, GFP_KERNEL); - if (!smi) - return -ENOMEM; - smi->chip_data = (void *)smi + sizeof(*smi); - smi->map = devm_regmap_init(dev, NULL, smi, - &realtek_smi_mdio_regmap_config); - if (IS_ERR(smi->map)) { - ret = PTR_ERR(smi->map); - dev_err(dev, "regmap init failed: %d\n", ret); - return ret; - } - - /* Link forward and backward */ - smi->dev = dev; - smi->clk_delay = var->clk_delay; - smi->cmd_read = var->cmd_read; - smi->cmd_write = var->cmd_write; - smi->ops = var->ops; - - dev_set_drvdata(dev, smi); - spin_lock_init(&smi->lock); - - /* TODO: if power is software controlled, set up any regulators here */ - - /* Assert then deassert RESET */ - smi->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); - if (IS_ERR(smi->reset)) { - dev_err(dev, "failed to get RESET GPIO\n"); - return PTR_ERR(smi->reset); - } - msleep(REALTEK_SMI_HW_STOP_DELAY); - gpiod_set_value(smi->reset, 0); - msleep(REALTEK_SMI_HW_START_DELAY); - dev_info(dev, "deasserted RESET\n"); - - /* Fetch MDIO pins */ - smi->mdc = devm_gpiod_get_optional(dev, "mdc", GPIOD_OUT_LOW); - if (IS_ERR(smi->mdc)) - return PTR_ERR(smi->mdc); - smi->mdio = devm_gpiod_get_optional(dev, "mdio", GPIOD_OUT_LOW); - if (IS_ERR(smi->mdio)) - return PTR_ERR(smi->mdio); - - smi->leds_disabled = of_property_read_bool(np, "realtek,disable-leds"); - - ret = smi->ops->detect(smi); - if (ret) { - dev_err(dev, "unable to detect switch\n"); - return ret; - } - - smi->ds = devm_kzalloc(dev, sizeof(*smi->ds), GFP_KERNEL); - if (!smi->ds) - return -ENOMEM; - - smi->ds->dev = dev; - smi->ds->num_ports = smi->num_ports; - smi->ds->priv = smi; - - smi->ds->ops = var->ds_ops; - ret = dsa_register_switch(smi->ds); - if (ret) { - dev_err_probe(dev, ret, "unable to register switch\n"); - return ret; - } - return 0; -} - -static int realtek_smi_remove(struct platform_device *pdev) -{ - struct realtek_smi *smi = platform_get_drvdata(pdev); - - if (!smi) - return 0; - - dsa_unregister_switch(smi->ds); - if (smi->slave_mii_bus) - of_node_put(smi->slave_mii_bus->dev.of_node); - gpiod_set_value(smi->reset, 1); - - platform_set_drvdata(pdev, NULL); - - return 0; -} - -static void realtek_smi_shutdown(struct platform_device *pdev) -{ - struct realtek_smi *smi = platform_get_drvdata(pdev); - - if (!smi) - return; - - dsa_switch_shutdown(smi->ds); - - platform_set_drvdata(pdev, NULL); -} - -static const struct of_device_id realtek_smi_of_match[] = { - { - .compatible = "realtek,rtl8366rb", - .data = &rtl8366rb_variant, - }, - { - /* FIXME: add support for RTL8366S and more */ - .compatible = "realtek,rtl8366s", - .data = NULL, - }, - { - .compatible = "realtek,rtl8365mb", - .data = &rtl8365mb_variant, - }, - { /* sentinel */ }, -}; -MODULE_DEVICE_TABLE(of, realtek_smi_of_match); - -static struct platform_driver realtek_smi_driver = { - .driver = { - .name = "realtek-smi", - .of_match_table = of_match_ptr(realtek_smi_of_match), - }, - .probe = realtek_smi_probe, - .remove = realtek_smi_remove, - .shutdown = realtek_smi_shutdown, -}; -module_platform_driver(realtek_smi_driver); - -MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig new file mode 100644 index 000000000000..5242698143d9 --- /dev/null +++ b/drivers/net/dsa/realtek/Kconfig @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: GPL-2.0-only +menuconfig NET_DSA_REALTEK + tristate "Realtek Ethernet switch family support" + depends on NET_DSA + select FIXED_PHY + select IRQ_DOMAIN + select REALTEK_PHY + select REGMAP + help + Select to enable support for Realtek Ethernet switch chips. + +config NET_DSA_REALTEK_MDIO + tristate "Realtek MDIO connected switch driver" + depends on NET_DSA_REALTEK + default y + help + Select to enable support for registering switches configured + through MDIO. + +config NET_DSA_REALTEK_SMI + tristate "Realtek SMI connected switch driver" + depends on NET_DSA_REALTEK + default y + help + Select to enable support for registering switches connected + through SMI. + +config NET_DSA_REALTEK_RTL8365MB + tristate "Realtek RTL8365MB switch subdriver" + default y + depends on NET_DSA_REALTEK + depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO + select NET_DSA_TAG_RTL8_4 + help + Select to enable support for Realtek RTL8365MB-VC and RTL8367S. + +config NET_DSA_REALTEK_RTL8366RB + tristate "Realtek RTL8366RB switch subdriver" + default y + depends on NET_DSA_REALTEK + depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO + select NET_DSA_TAG_RTL4_A + help + Select to enable support for Realtek RTL8366RB diff --git a/drivers/net/dsa/realtek/Makefile b/drivers/net/dsa/realtek/Makefile new file mode 100644 index 000000000000..0aab57252a7c --- /dev/null +++ b/drivers/net/dsa/realtek/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_NET_DSA_REALTEK_MDIO) += realtek-mdio.o +obj-$(CONFIG_NET_DSA_REALTEK_SMI) += realtek-smi.o +obj-$(CONFIG_NET_DSA_REALTEK_RTL8366RB) += rtl8366.o +rtl8366-objs := rtl8366-core.o rtl8366rb.o +obj-$(CONFIG_NET_DSA_REALTEK_RTL8365MB) += rtl8365mb.o diff --git a/drivers/net/dsa/realtek/realtek-mdio.c b/drivers/net/dsa/realtek/realtek-mdio.c new file mode 100644 index 000000000000..0c5f2bdced9d --- /dev/null +++ b/drivers/net/dsa/realtek/realtek-mdio.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Realtek MDIO interface driver + * + * ASICs we intend to support with this driver: + * + * RTL8366 - The original version, apparently + * RTL8369 - Similar enough to have the same datsheet as RTL8366 + * RTL8366RB - Probably reads out "RTL8366 revision B", has a quite + * different register layout from the other two + * RTL8366S - Is this "RTL8366 super"? + * RTL8367 - Has an OpenWRT driver as well + * RTL8368S - Seems to be an alternative name for RTL8366RB + * RTL8370 - Also uses SMI + * + * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org> + * Copyright (C) 2010 Antti Seppälä <a.seppala@gmail.com> + * Copyright (C) 2010 Roman Yeryomin <roman@advem.lv> + * Copyright (C) 2011 Colin Leitner <colin.leitner@googlemail.com> + * Copyright (C) 2009-2010 Gabor Juhos <juhosg@openwrt.org> + */ + +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/regmap.h> + +#include "realtek.h" + +/* Read/write via mdiobus */ +#define REALTEK_MDIO_CTRL0_REG 31 +#define REALTEK_MDIO_START_REG 29 +#define REALTEK_MDIO_CTRL1_REG 21 +#define REALTEK_MDIO_ADDRESS_REG 23 +#define REALTEK_MDIO_DATA_WRITE_REG 24 +#define REALTEK_MDIO_DATA_READ_REG 25 + +#define REALTEK_MDIO_START_OP 0xFFFF +#define REALTEK_MDIO_ADDR_OP 0x000E +#define REALTEK_MDIO_READ_OP 0x0001 +#define REALTEK_MDIO_WRITE_OP 0x0003 + +static int realtek_mdio_write(void *ctx, u32 reg, u32 val) +{ + struct realtek_priv *priv = ctx; + struct mii_bus *bus = priv->bus; + int ret; + + mutex_lock(&bus->mdio_lock); + + ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_CTRL0_REG, REALTEK_MDIO_ADDR_OP); + if (ret) + goto out_unlock; + + ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_ADDRESS_REG, reg); + if (ret) + goto out_unlock; + + ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_DATA_WRITE_REG, val); + if (ret) + goto out_unlock; + + ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_CTRL1_REG, REALTEK_MDIO_WRITE_OP); + +out_unlock: + mutex_unlock(&bus->mdio_lock); + + return ret; +} + +static int realtek_mdio_read(void *ctx, u32 reg, u32 *val) +{ + struct realtek_priv *priv = ctx; + struct mii_bus *bus = priv->bus; + int ret; + + mutex_lock(&bus->mdio_lock); + + ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_CTRL0_REG, REALTEK_MDIO_ADDR_OP); + if (ret) + goto out_unlock; + + ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_ADDRESS_REG, reg); + if (ret) + goto out_unlock; + + ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_CTRL1_REG, REALTEK_MDIO_READ_OP); + if (ret) + goto out_unlock; + + ret = bus->read(bus, priv->mdio_addr, REALTEK_MDIO_DATA_READ_REG); + if (ret >= 0) { + *val = ret; + ret = 0; + } + +out_unlock: + mutex_unlock(&bus->mdio_lock); + + return ret; +} + +static const struct regmap_config realtek_mdio_regmap_config = { + .reg_bits = 10, /* A4..A0 R4..R0 */ + .val_bits = 16, + .reg_stride = 1, + /* PHY regs are at 0x8000 */ + .max_register = 0xffff, + .reg_format_endian = REGMAP_ENDIAN_BIG, + .reg_read = realtek_mdio_read, + .reg_write = realtek_mdio_write, + .cache_type = REGCACHE_NONE, +}; + +static int realtek_mdio_probe(struct mdio_device *mdiodev) +{ + struct realtek_priv *priv; + struct device *dev = &mdiodev->dev; + const struct realtek_variant *var; + int ret; + struct device_node *np; + + var = of_device_get_match_data(dev); + if (!var) + return -EINVAL; + + priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->map = devm_regmap_init(dev, NULL, priv, &realtek_mdio_regmap_config); + if (IS_ERR(priv->map)) { + ret = PTR_ERR(priv->map); + dev_err(dev, "regmap init failed: %d\n", ret); + return ret; + } + + priv->mdio_addr = mdiodev->addr; + priv->bus = mdiodev->bus; + priv->dev = &mdiodev->dev; + priv->chip_data = (void *)priv + sizeof(*priv); + + priv->clk_delay = var->clk_delay; + priv->cmd_read = var->cmd_read; + priv->cmd_write = var->cmd_write; + priv->ops = var->ops; + + priv->write_reg_noack = realtek_mdio_write; + + np = dev->of_node; + + dev_set_drvdata(dev, priv); + + /* TODO: if power is software controlled, set up any regulators here */ + priv->leds_disabled = of_property_read_bool(np, "realtek,disable-leds"); + + ret = priv->ops->detect(priv); + if (ret) { + dev_err(dev, "unable to detect switch\n"); + return ret; + } + + priv->ds = devm_kzalloc(dev, sizeof(*priv->ds), GFP_KERNEL); + if (!priv->ds) + return -ENOMEM; + + priv->ds->dev = dev; + priv->ds->num_ports = priv->num_ports; + priv->ds->priv = priv; + priv->ds->ops = var->ds_ops_mdio; + + ret = dsa_register_switch(priv->ds); + if (ret) { + dev_err(priv->dev, "unable to register switch ret = %d\n", ret); + return ret; + } + + return 0; +} + +static void realtek_mdio_remove(struct mdio_device *mdiodev) +{ + struct realtek_priv *priv = dev_get_drvdata(&mdiodev->dev); + + if (!priv) + return; + + dsa_unregister_switch(priv->ds); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void realtek_mdio_shutdown(struct mdio_device *mdiodev) +{ + struct realtek_priv *priv = dev_get_drvdata(&mdiodev->dev); + + if (!priv) + return; + + dsa_switch_shutdown(priv->ds); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static const struct of_device_id realtek_mdio_of_match[] = { +#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8366RB) + { .compatible = "realtek,rtl8366rb", .data = &rtl8366rb_variant, }, +#endif +#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB) + { .compatible = "realtek,rtl8365mb", .data = &rtl8365mb_variant, }, + { .compatible = "realtek,rtl8367s", .data = &rtl8365mb_variant, }, +#endif + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, realtek_mdio_of_match); + +static struct mdio_driver realtek_mdio_driver = { + .mdiodrv.driver = { + .name = "realtek-mdio", + .of_match_table = of_match_ptr(realtek_mdio_of_match), + }, + .probe = realtek_mdio_probe, + .remove = realtek_mdio_remove, + .shutdown = realtek_mdio_shutdown, +}; + +mdio_module_driver(realtek_mdio_driver); + +MODULE_AUTHOR("Luiz Angelo Daros de Luca <luizluca@gmail.com>"); +MODULE_DESCRIPTION("Driver for Realtek ethernet switch connected via MDIO interface"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c new file mode 100644 index 000000000000..946fbbd70153 --- /dev/null +++ b/drivers/net/dsa/realtek/realtek-smi.c @@ -0,0 +1,535 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Realtek Simple Management Interface (SMI) driver + * It can be discussed how "simple" this interface is. + * + * The SMI protocol piggy-backs the MDIO MDC and MDIO signals levels + * but the protocol is not MDIO at all. Instead it is a Realtek + * pecularity that need to bit-bang the lines in a special way to + * communicate with the switch. + * + * ASICs we intend to support with this driver: + * + * RTL8366 - The original version, apparently + * RTL8369 - Similar enough to have the same datsheet as RTL8366 + * RTL8366RB - Probably reads out "RTL8366 revision B", has a quite + * different register layout from the other two + * RTL8366S - Is this "RTL8366 super"? + * RTL8367 - Has an OpenWRT driver as well + * RTL8368S - Seems to be an alternative name for RTL8366RB + * RTL8370 - Also uses SMI + * + * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org> + * Copyright (C) 2010 Antti Seppälä <a.seppala@gmail.com> + * Copyright (C) 2010 Roman Yeryomin <roman@advem.lv> + * Copyright (C) 2011 Colin Leitner <colin.leitner@googlemail.com> + * Copyright (C) 2009-2010 Gabor Juhos <juhosg@openwrt.org> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_mdio.h> +#include <linux/delay.h> +#include <linux/gpio/consumer.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/bitops.h> +#include <linux/if_bridge.h> + +#include "realtek.h" + +#define REALTEK_SMI_ACK_RETRY_COUNT 5 +#define REALTEK_SMI_HW_STOP_DELAY 25 /* msecs */ +#define REALTEK_SMI_HW_START_DELAY 100 /* msecs */ + +static inline void realtek_smi_clk_delay(struct realtek_priv *priv) +{ + ndelay(priv->clk_delay); +} + +static void realtek_smi_start(struct realtek_priv *priv) +{ + /* Set GPIO pins to output mode, with initial state: + * SCK = 0, SDA = 1 + */ + gpiod_direction_output(priv->mdc, 0); + gpiod_direction_output(priv->mdio, 1); + realtek_smi_clk_delay(priv); + + /* CLK 1: 0 -> 1, 1 -> 0 */ + gpiod_set_value(priv->mdc, 1); + realtek_smi_clk_delay(priv); + gpiod_set_value(priv->mdc, 0); + realtek_smi_clk_delay(priv); + + /* CLK 2: */ + gpiod_set_value(priv->mdc, 1); + realtek_smi_clk_delay(priv); + gpiod_set_value(priv->mdio, 0); + realtek_smi_clk_delay(priv); + gpiod_set_value(priv->mdc, 0); + realtek_smi_clk_delay(priv); + gpiod_set_value(priv->mdio, 1); +} + +static void realtek_smi_stop(struct realtek_priv *priv) +{ + realtek_smi_clk_delay(priv); + gpiod_set_value(priv->mdio, 0); + gpiod_set_value(priv->mdc, 1); + realtek_smi_clk_delay(priv); + gpiod_set_value(priv->mdio, 1); + realtek_smi_clk_delay(priv); + gpiod_set_value(priv->mdc, 1); + realtek_smi_clk_delay(priv); + gpiod_set_value(priv->mdc, 0); + realtek_smi_clk_delay(priv); + gpiod_set_value(priv->mdc, 1); + + /* Add a click */ + realtek_smi_clk_delay(priv); + gpiod_set_value(priv->mdc, 0); + realtek_smi_clk_delay(priv); + gpiod_set_value(priv->mdc, 1); + + /* Set GPIO pins to input mode */ + gpiod_direction_input(priv->mdio); + gpiod_direction_input(priv->mdc); +} + +static void realtek_smi_write_bits(struct realtek_priv *priv, u32 data, u32 len) +{ + for (; len > 0; len--) { + realtek_smi_clk_delay(priv); + + /* Prepare data */ + gpiod_set_value(priv->mdio, !!(data & (1 << (len - 1)))); + realtek_smi_clk_delay(priv); + + /* Clocking */ + gpiod_set_value(priv->mdc, 1); + realtek_smi_clk_delay(priv); + gpiod_set_value(priv->mdc, 0); + } +} + +static void realtek_smi_read_bits(struct realtek_priv *priv, u32 len, u32 *data) +{ + gpiod_direction_input(priv->mdio); + + for (*data = 0; len > 0; len--) { + u32 u; + + realtek_smi_clk_delay(priv); + + /* Clocking */ + gpiod_set_value(priv->mdc, 1); + realtek_smi_clk_delay(priv); + u = !!gpiod_get_value(priv->mdio); + gpiod_set_value(priv->mdc, 0); + + *data |= (u << (len - 1)); + } + + gpiod_direction_output(priv->mdio, 0); +} + +static int realtek_smi_wait_for_ack(struct realtek_priv *priv) +{ + int retry_cnt; + + retry_cnt = 0; + do { + u32 ack; + + realtek_smi_read_bits(priv, 1, &ack); + if (ack == 0) + break; + + if (++retry_cnt > REALTEK_SMI_ACK_RETRY_COUNT) { + dev_err(priv->dev, "ACK timeout\n"); + return -ETIMEDOUT; + } + } while (1); + + return 0; +} + +static int realtek_smi_write_byte(struct realtek_priv *priv, u8 data) +{ + realtek_smi_write_bits(priv, data, 8); + return realtek_smi_wait_for_ack(priv); +} + +static int realtek_smi_write_byte_noack(struct realtek_priv *priv, u8 data) +{ + realtek_smi_write_bits(priv, data, 8); + return 0; +} + +static int realtek_smi_read_byte0(struct realtek_priv *priv, u8 *data) +{ + u32 t; + + /* Read data */ + realtek_smi_read_bits(priv, 8, &t); + *data = (t & 0xff); + + /* Send an ACK */ + realtek_smi_write_bits(priv, 0x00, 1); + + return 0; +} + +static int realtek_smi_read_byte1(struct realtek_priv *priv, u8 *data) +{ + u32 t; + + /* Read data */ + realtek_smi_read_bits(priv, 8, &t); + *data = (t & 0xff); + + /* Send an ACK */ + realtek_smi_write_bits(priv, 0x01, 1); + + return 0; +} + +static int realtek_smi_read_reg(struct realtek_priv *priv, u32 addr, u32 *data) +{ + unsigned long flags; + u8 lo = 0; + u8 hi = 0; + int ret; + + spin_lock_irqsave(&priv->lock, flags); + + realtek_smi_start(priv); + + /* Send READ command */ + ret = realtek_smi_write_byte(priv, priv->cmd_read); + if (ret) + goto out; + + /* Set ADDR[7:0] */ + ret = realtek_smi_write_byte(priv, addr & 0xff); + if (ret) + goto out; + + /* Set ADDR[15:8] */ + ret = realtek_smi_write_byte(priv, addr >> 8); + if (ret) + goto out; + + /* Read DATA[7:0] */ + realtek_smi_read_byte0(priv, &lo); + /* Read DATA[15:8] */ + realtek_smi_read_byte1(priv, &hi); + + *data = ((u32)lo) | (((u32)hi) << 8); + + ret = 0; + + out: + realtek_smi_stop(priv); + spin_unlock_irqrestore(&priv->lock, flags); + + return ret; +} + +static int realtek_smi_write_reg(struct realtek_priv *priv, + u32 addr, u32 data, bool ack) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&priv->lock, flags); + + realtek_smi_start(priv); + + /* Send WRITE command */ + ret = realtek_smi_write_byte(priv, priv->cmd_write); + if (ret) + goto out; + + /* Set ADDR[7:0] */ + ret = realtek_smi_write_byte(priv, addr & 0xff); + if (ret) + goto out; + + /* Set ADDR[15:8] */ + ret = realtek_smi_write_byte(priv, addr >> 8); + if (ret) + goto out; + + /* Write DATA[7:0] */ + ret = realtek_smi_write_byte(priv, data & 0xff); + if (ret) + goto out; + + /* Write DATA[15:8] */ + if (ack) + ret = realtek_smi_write_byte(priv, data >> 8); + else + ret = realtek_smi_write_byte_noack(priv, data >> 8); + if (ret) + goto out; + + ret = 0; + + out: + realtek_smi_stop(priv); + spin_unlock_irqrestore(&priv->lock, flags); + + return ret; +} + +/* There is one single case when we need to use this accessor and that + * is when issueing soft reset. Since the device reset as soon as we write + * that bit, no ACK will come back for natural reasons. + */ +static int realtek_smi_write_reg_noack(void *ctx, u32 reg, u32 val) +{ + return realtek_smi_write_reg(ctx, reg, val, false); +} + +/* Regmap accessors */ + +static int realtek_smi_write(void *ctx, u32 reg, u32 val) +{ + struct realtek_priv *priv = ctx; + + return realtek_smi_write_reg(priv, reg, val, true); +} + +static int realtek_smi_read(void *ctx, u32 reg, u32 *val) +{ + struct realtek_priv *priv = ctx; + + return realtek_smi_read_reg(priv, reg, val); +} + +static const struct regmap_config realtek_smi_mdio_regmap_config = { + .reg_bits = 10, /* A4..A0 R4..R0 */ + .val_bits = 16, + .reg_stride = 1, + /* PHY regs are at 0x8000 */ + .max_register = 0xffff, + .reg_format_endian = REGMAP_ENDIAN_BIG, + .reg_read = realtek_smi_read, + .reg_write = realtek_smi_write, + .cache_type = REGCACHE_NONE, +}; + +static int realtek_smi_mdio_read(struct mii_bus *bus, int addr, int regnum) +{ + struct realtek_priv *priv = bus->priv; + + return priv->ops->phy_read(priv, addr, regnum); +} + +static int realtek_smi_mdio_write(struct mii_bus *bus, int addr, int regnum, + u16 val) +{ + struct realtek_priv *priv = bus->priv; + + return priv->ops->phy_write(priv, addr, regnum, val); +} + +static int realtek_smi_setup_mdio(struct dsa_switch *ds) +{ + struct realtek_priv *priv = ds->priv; + struct device_node *mdio_np; + int ret; + + mdio_np = of_get_compatible_child(priv->dev->of_node, "realtek,smi-mdio"); + if (!mdio_np) { + dev_err(priv->dev, "no MDIO bus node\n"); + return -ENODEV; + } + + priv->slave_mii_bus = devm_mdiobus_alloc(priv->dev); + if (!priv->slave_mii_bus) { + ret = -ENOMEM; + goto err_put_node; + } + priv->slave_mii_bus->priv = priv; + priv->slave_mii_bus->name = "SMI slave MII"; + priv->slave_mii_bus->read = realtek_smi_mdio_read; + priv->slave_mii_bus->write = realtek_smi_mdio_write; + snprintf(priv->slave_mii_bus->id, MII_BUS_ID_SIZE, "SMI-%d", + ds->index); + priv->slave_mii_bus->dev.of_node = mdio_np; + priv->slave_mii_bus->parent = priv->dev; + ds->slave_mii_bus = priv->slave_mii_bus; + + ret = devm_of_mdiobus_register(priv->dev, priv->slave_mii_bus, mdio_np); + if (ret) { + dev_err(priv->dev, "unable to register MDIO bus %s\n", + priv->slave_mii_bus->id); + goto err_put_node; + } + + return 0; + +err_put_node: + of_node_put(mdio_np); + + return ret; +} + +static int realtek_smi_probe(struct platform_device *pdev) +{ + const struct realtek_variant *var; + struct device *dev = &pdev->dev; + struct realtek_priv *priv; + struct device_node *np; + int ret; + + var = of_device_get_match_data(dev); + np = dev->of_node; + + priv = devm_kzalloc(dev, sizeof(*priv) + var->chip_data_sz, GFP_KERNEL); + if (!priv) + return -ENOMEM; + priv->chip_data = (void *)priv + sizeof(*priv); + priv->map = devm_regmap_init(dev, NULL, priv, + &realtek_smi_mdio_regmap_config); + if (IS_ERR(priv->map)) { + ret = PTR_ERR(priv->map); + dev_err(dev, "regmap init failed: %d\n", ret); + return ret; + } + + /* Link forward and backward */ + priv->dev = dev; + priv->clk_delay = var->clk_delay; + priv->cmd_read = var->cmd_read; + priv->cmd_write = var->cmd_write; + priv->ops = var->ops; + + priv->setup_interface = realtek_smi_setup_mdio; + priv->write_reg_noack = realtek_smi_write_reg_noack; + + dev_set_drvdata(dev, priv); + spin_lock_init(&priv->lock); + + /* TODO: if power is software controlled, set up any regulators here */ + + /* Assert then deassert RESET */ + priv->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(priv->reset)) { + dev_err(dev, "failed to get RESET GPIO\n"); + return PTR_ERR(priv->reset); + } + msleep(REALTEK_SMI_HW_STOP_DELAY); + gpiod_set_value(priv->reset, 0); + msleep(REALTEK_SMI_HW_START_DELAY); + dev_info(dev, "deasserted RESET\n"); + + /* Fetch MDIO pins */ + priv->mdc = devm_gpiod_get_optional(dev, "mdc", GPIOD_OUT_LOW); + if (IS_ERR(priv->mdc)) + return PTR_ERR(priv->mdc); + priv->mdio = devm_gpiod_get_optional(dev, "mdio", GPIOD_OUT_LOW); + if (IS_ERR(priv->mdio)) + return PTR_ERR(priv->mdio); + + priv->leds_disabled = of_property_read_bool(np, "realtek,disable-leds"); + + ret = priv->ops->detect(priv); + if (ret) { + dev_err(dev, "unable to detect switch\n"); + return ret; + } + + priv->ds = devm_kzalloc(dev, sizeof(*priv->ds), GFP_KERNEL); + if (!priv->ds) + return -ENOMEM; + + priv->ds->dev = dev; + priv->ds->num_ports = priv->num_ports; + priv->ds->priv = priv; + + priv->ds->ops = var->ds_ops_smi; + ret = dsa_register_switch(priv->ds); + if (ret) { + dev_err_probe(dev, ret, "unable to register switch\n"); + return ret; + } + return 0; +} + +static int realtek_smi_remove(struct platform_device *pdev) +{ + struct realtek_priv *priv = platform_get_drvdata(pdev); + + if (!priv) + return 0; + + dsa_unregister_switch(priv->ds); + if (priv->slave_mii_bus) + of_node_put(priv->slave_mii_bus->dev.of_node); + gpiod_set_value(priv->reset, 1); + + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static void realtek_smi_shutdown(struct platform_device *pdev) +{ + struct realtek_priv *priv = platform_get_drvdata(pdev); + + if (!priv) + return; + + dsa_switch_shutdown(priv->ds); + + platform_set_drvdata(pdev, NULL); +} + +static const struct of_device_id realtek_smi_of_match[] = { +#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8366RB) + { + .compatible = "realtek,rtl8366rb", + .data = &rtl8366rb_variant, + }, +#endif + { + /* FIXME: add support for RTL8366S and more */ + .compatible = "realtek,rtl8366s", + .data = NULL, + }, +#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB) + { + .compatible = "realtek,rtl8365mb", + .data = &rtl8365mb_variant, + }, + { + .compatible = "realtek,rtl8367s", + .data = &rtl8365mb_variant, + }, +#endif + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, realtek_smi_of_match); + +static struct platform_driver realtek_smi_driver = { + .driver = { + .name = "realtek-smi", + .of_match_table = of_match_ptr(realtek_smi_of_match), + }, + .probe = realtek_smi_probe, + .remove = realtek_smi_remove, + .shutdown = realtek_smi_shutdown, +}; +module_platform_driver(realtek_smi_driver); + +MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>"); +MODULE_DESCRIPTION("Driver for Realtek ethernet switch connected via SMI interface"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/realtek-smi-core.h b/drivers/net/dsa/realtek/realtek.h index 5bfa53e2480a..ed5abf6cb3d6 100644 --- a/drivers/net/dsa/realtek-smi-core.h +++ b/drivers/net/dsa/realtek/realtek.h @@ -13,7 +13,7 @@ #include <linux/gpio/consumer.h> #include <net/dsa.h> -struct realtek_smi_ops; +struct realtek_ops; struct dentry; struct inode; struct file; @@ -25,7 +25,7 @@ struct rtl8366_mib_counter { const char *name; }; -/** +/* * struct rtl8366_vlan_mc - Virtual LAN member configuration */ struct rtl8366_vlan_mc { @@ -43,13 +43,15 @@ struct rtl8366_vlan_4k { u8 fid; }; -struct realtek_smi { +struct realtek_priv { struct device *dev; struct gpio_desc *reset; struct gpio_desc *mdc; struct gpio_desc *mdio; struct regmap *map; struct mii_bus *slave_mii_bus; + struct mii_bus *bus; + int mdio_addr; unsigned int clk_delay; u8 cmd_read; @@ -65,7 +67,9 @@ struct realtek_smi { unsigned int num_mib_counters; struct rtl8366_mib_counter *mib_counters; - const struct realtek_smi_ops *ops; + const struct realtek_ops *ops; + int (*setup_interface)(struct dsa_switch *ds); + int (*write_reg_noack)(void *ctx, u32 addr, u32 data); int vlan_enabled; int vlan4k_enabled; @@ -74,61 +78,57 @@ struct realtek_smi { void *chip_data; /* Per-chip extra variant data */ }; -/** - * struct realtek_smi_ops - vtable for the per-SMI-chiptype operations +/* + * struct realtek_ops - vtable for the per-SMI-chiptype operations * @detect: detects the chiptype */ -struct realtek_smi_ops { - int (*detect)(struct realtek_smi *smi); - int (*reset_chip)(struct realtek_smi *smi); - int (*setup)(struct realtek_smi *smi); - void (*cleanup)(struct realtek_smi *smi); - int (*get_mib_counter)(struct realtek_smi *smi, +struct realtek_ops { + int (*detect)(struct realtek_priv *priv); + int (*reset_chip)(struct realtek_priv *priv); + int (*setup)(struct realtek_priv *priv); + void (*cleanup)(struct realtek_priv *priv); + int (*get_mib_counter)(struct realtek_priv *priv, int port, struct rtl8366_mib_counter *mib, u64 *mibvalue); - int (*get_vlan_mc)(struct realtek_smi *smi, u32 index, + int (*get_vlan_mc)(struct realtek_priv *priv, u32 index, struct rtl8366_vlan_mc *vlanmc); - int (*set_vlan_mc)(struct realtek_smi *smi, u32 index, + int (*set_vlan_mc)(struct realtek_priv *priv, u32 index, const struct rtl8366_vlan_mc *vlanmc); - int (*get_vlan_4k)(struct realtek_smi *smi, u32 vid, + int (*get_vlan_4k)(struct realtek_priv *priv, u32 vid, struct rtl8366_vlan_4k *vlan4k); - int (*set_vlan_4k)(struct realtek_smi *smi, + int (*set_vlan_4k)(struct realtek_priv *priv, const struct rtl8366_vlan_4k *vlan4k); - int (*get_mc_index)(struct realtek_smi *smi, int port, int *val); - int (*set_mc_index)(struct realtek_smi *smi, int port, int index); - bool (*is_vlan_valid)(struct realtek_smi *smi, unsigned int vlan); - int (*enable_vlan)(struct realtek_smi *smi, bool enable); - int (*enable_vlan4k)(struct realtek_smi *smi, bool enable); - int (*enable_port)(struct realtek_smi *smi, int port, bool enable); - int (*phy_read)(struct realtek_smi *smi, int phy, int regnum); - int (*phy_write)(struct realtek_smi *smi, int phy, int regnum, + int (*get_mc_index)(struct realtek_priv *priv, int port, int *val); + int (*set_mc_index)(struct realtek_priv *priv, int port, int index); + bool (*is_vlan_valid)(struct realtek_priv *priv, unsigned int vlan); + int (*enable_vlan)(struct realtek_priv *priv, bool enable); + int (*enable_vlan4k)(struct realtek_priv *priv, bool enable); + int (*enable_port)(struct realtek_priv *priv, int port, bool enable); + int (*phy_read)(struct realtek_priv *priv, int phy, int regnum); + int (*phy_write)(struct realtek_priv *priv, int phy, int regnum, u16 val); }; -struct realtek_smi_variant { - const struct dsa_switch_ops *ds_ops; - const struct realtek_smi_ops *ops; +struct realtek_variant { + const struct dsa_switch_ops *ds_ops_smi; + const struct dsa_switch_ops *ds_ops_mdio; + const struct realtek_ops *ops; unsigned int clk_delay; u8 cmd_read; u8 cmd_write; size_t chip_data_sz; }; -/* SMI core calls */ -int realtek_smi_write_reg_noack(struct realtek_smi *smi, u32 addr, - u32 data); -int realtek_smi_setup_mdio(struct realtek_smi *smi); - /* RTL8366 library helpers */ -int rtl8366_mc_is_used(struct realtek_smi *smi, int mc_index, int *used); -int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member, +int rtl8366_mc_is_used(struct realtek_priv *priv, int mc_index, int *used); +int rtl8366_set_vlan(struct realtek_priv *priv, int vid, u32 member, u32 untag, u32 fid); -int rtl8366_set_pvid(struct realtek_smi *smi, unsigned int port, +int rtl8366_set_pvid(struct realtek_priv *priv, unsigned int port, unsigned int vid); -int rtl8366_enable_vlan4k(struct realtek_smi *smi, bool enable); -int rtl8366_enable_vlan(struct realtek_smi *smi, bool enable); -int rtl8366_reset_vlan(struct realtek_smi *smi); +int rtl8366_enable_vlan4k(struct realtek_priv *priv, bool enable); +int rtl8366_enable_vlan(struct realtek_priv *priv, bool enable); +int rtl8366_reset_vlan(struct realtek_priv *priv); int rtl8366_vlan_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct netlink_ext_ack *extack); @@ -139,7 +139,7 @@ void rtl8366_get_strings(struct dsa_switch *ds, int port, u32 stringset, int rtl8366_get_sset_count(struct dsa_switch *ds, int port, int sset); void rtl8366_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data); -extern const struct realtek_smi_variant rtl8366rb_variant; -extern const struct realtek_smi_variant rtl8365mb_variant; +extern const struct realtek_variant rtl8366rb_variant; +extern const struct realtek_variant rtl8365mb_variant; #endif /* _REALTEK_SMI_H */ diff --git a/drivers/net/dsa/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c index 3b729544798b..e1c5a67a21c4 100644 --- a/drivers/net/dsa/rtl8365mb.c +++ b/drivers/net/dsa/realtek/rtl8365mb.c @@ -99,18 +99,28 @@ #include <linux/regmap.h> #include <linux/if_bridge.h> -#include "realtek-smi-core.h" +#include "realtek.h" /* Chip-specific data and limits */ -#define RTL8365MB_CHIP_ID_8365MB_VC 0x6367 -#define RTL8365MB_CPU_PORT_NUM_8365MB_VC 6 -#define RTL8365MB_LEARN_LIMIT_MAX_8365MB_VC 2112 +#define RTL8365MB_CHIP_ID_8365MB_VC 0x6367 +#define RTL8365MB_CHIP_VER_8365MB_VC 0x0040 + +#define RTL8365MB_CHIP_ID_8367S 0x6367 +#define RTL8365MB_CHIP_VER_8367S 0x00A0 + +#define RTL8365MB_CHIP_ID_8367RB 0x6367 +#define RTL8365MB_CHIP_VER_8367RB 0x0020 /* Family-specific data and limits */ -#define RTL8365MB_PHYADDRMAX 7 -#define RTL8365MB_NUM_PHYREGS 32 -#define RTL8365MB_PHYREGMAX (RTL8365MB_NUM_PHYREGS - 1) -#define RTL8365MB_MAX_NUM_PORTS (RTL8365MB_CPU_PORT_NUM_8365MB_VC + 1) +#define RTL8365MB_PHYADDRMAX 7 +#define RTL8365MB_NUM_PHYREGS 32 +#define RTL8365MB_PHYREGMAX (RTL8365MB_NUM_PHYREGS - 1) +/* RTL8370MB and RTL8310SR, possibly suportable by this driver, have 10 ports */ +#define RTL8365MB_MAX_NUM_PORTS 10 +#define RTL8365MB_LEARN_LIMIT_MAX 2112 + +/* valid for all 6-port or less variants */ +static const int rtl8365mb_extint_port_map[] = { -1, -1, -1, -1, -1, -1, 1, 2, -1, -1}; /* Chip identification registers */ #define RTL8365MB_CHIP_ID_REG 0x1300 @@ -191,7 +201,7 @@ /* The PHY OCP addresses of PHY registers 0~31 start here */ #define RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE 0xA400 -/* EXT port interface mode values - used in DIGITAL_INTERFACE_SELECT */ +/* EXT interface port mode values - used in DIGITAL_INTERFACE_SELECT */ #define RTL8365MB_EXT_PORT_MODE_DISABLE 0 #define RTL8365MB_EXT_PORT_MODE_RGMII 1 #define RTL8365MB_EXT_PORT_MODE_MII_MAC 2 @@ -207,39 +217,56 @@ #define RTL8365MB_EXT_PORT_MODE_1000X 12 #define RTL8365MB_EXT_PORT_MODE_100FX 13 -/* EXT port interface mode configuration registers 0~1 */ -#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 0x1305 -#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1 0x13C3 -#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(_extport) \ - (RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 + \ - ((_extport) >> 1) * (0x13C3 - 0x1305)) -#define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(_extport) \ - (0xF << (((_extport) % 2))) -#define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET(_extport) \ - (((_extport) % 2) * 4) - -/* EXT port RGMII TX/RX delay configuration registers 1~2 */ -#define RTL8365MB_EXT_RGMXF_REG1 0x1307 -#define RTL8365MB_EXT_RGMXF_REG2 0x13C5 -#define RTL8365MB_EXT_RGMXF_REG(_extport) \ - (RTL8365MB_EXT_RGMXF_REG1 + \ - (((_extport) >> 1) * (0x13C5 - 0x1307))) +/* Realtek docs and driver uses logic number as EXT_PORT0=16, EXT_PORT1=17, + * EXT_PORT2=18, to interact with switch ports. That logic number is internally + * converted to either a physical port number (0..9) or an external interface id (0..2), + * depending on which function was called. The external interface id is calculated as + * (ext_id=logic_port-15), while the logical to physical map depends on the chip id/version. + * + * EXT_PORT0 mentioned in datasheets and rtl8367c driver is used in this driver + * as extid==1, EXT_PORT2, mentioned in Realtek rtl8367c driver for 10-port switches, + * would have an ext_id of 3 (out of range for most extint macros) and ext_id 0 does + * not seem to be used as well for this family. + */ + +/* EXT interface mode configuration registers 0~1 */ +#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 0x1305 /* EXT1 */ +#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1 0x13C3 /* EXT2 */ +#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(_extint) \ + ((_extint) == 1 ? RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 : \ + (_extint) == 2 ? RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1 : \ + 0x0) +#define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(_extint) \ + (0xF << (((_extint) % 2))) +#define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET(_extint) \ + (((_extint) % 2) * 4) + +/* EXT interface RGMII TX/RX delay configuration registers 0~2 */ +#define RTL8365MB_EXT_RGMXF_REG0 0x1306 /* EXT0 */ +#define RTL8365MB_EXT_RGMXF_REG1 0x1307 /* EXT1 */ +#define RTL8365MB_EXT_RGMXF_REG2 0x13C5 /* EXT2 */ +#define RTL8365MB_EXT_RGMXF_REG(_extint) \ + ((_extint) == 0 ? RTL8365MB_EXT_RGMXF_REG0 : \ + (_extint) == 1 ? RTL8365MB_EXT_RGMXF_REG1 : \ + (_extint) == 2 ? RTL8365MB_EXT_RGMXF_REG2 : \ + 0x0) #define RTL8365MB_EXT_RGMXF_RXDELAY_MASK 0x0007 #define RTL8365MB_EXT_RGMXF_TXDELAY_MASK 0x0008 -/* External port speed values - used in DIGITAL_INTERFACE_FORCE */ +/* External interface port speed values - used in DIGITAL_INTERFACE_FORCE */ #define RTL8365MB_PORT_SPEED_10M 0 #define RTL8365MB_PORT_SPEED_100M 1 #define RTL8365MB_PORT_SPEED_1000M 2 -/* EXT port force configuration registers 0~2 */ -#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 0x1310 -#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG1 0x1311 -#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG2 0x13C4 -#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(_extport) \ - (RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 + \ - ((_extport) & 0x1) + \ - ((((_extport) >> 1) & 0x1) * (0x13C4 - 0x1310))) +/* EXT interface force configuration registers 0~2 */ +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 0x1310 /* EXT0 */ +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG1 0x1311 /* EXT1 */ +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG2 0x13C4 /* EXT2 */ +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(_extint) \ + ((_extint) == 0 ? RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 : \ + (_extint) == 1 ? RTL8365MB_DIGITAL_INTERFACE_FORCE_REG1 : \ + (_extint) == 2 ? RTL8365MB_DIGITAL_INTERFACE_FORCE_REG2 : \ + 0x0) #define RTL8365MB_DIGITAL_INTERFACE_FORCE_EN_MASK 0x1000 #define RTL8365MB_DIGITAL_INTERFACE_FORCE_NWAY_MASK 0x0080 #define RTL8365MB_DIGITAL_INTERFACE_FORCE_TXPAUSE_MASK 0x0040 @@ -516,7 +543,7 @@ struct rtl8365mb_cpu { /** * struct rtl8365mb_port - private per-port data - * @smi: pointer to parent realtek_smi data + * @priv: pointer to parent realtek_priv data * @index: DSA port index, same as dsa_port::index * @stats: link statistics populated by rtl8365mb_stats_poll, ready for atomic * access via rtl8365mb_get_stats64 @@ -524,7 +551,7 @@ struct rtl8365mb_cpu { * @mib_work: delayed work for polling MIB counters */ struct rtl8365mb_port { - struct realtek_smi *smi; + struct realtek_priv *priv; unsigned int index; struct rtnl_link_stats64 stats; spinlock_t stats_lock; @@ -533,13 +560,12 @@ struct rtl8365mb_port { /** * struct rtl8365mb - private chip-specific driver data - * @smi: pointer to parent realtek_smi data + * @priv: pointer to parent realtek_priv data * @irq: registered IRQ or zero * @chip_id: chip identifier * @chip_ver: chip silicon revision * @port_mask: mask of all ports * @learn_limit_max: maximum number of L2 addresses the chip can learn - * @cpu: CPU tagging and CPU port configuration for this chip * @mib_lock: prevent concurrent reads of MIB counters * @ports: per-port data * @jam_table: chip-specific initialization jam table @@ -548,29 +574,28 @@ struct rtl8365mb_port { * Private data for this driver. */ struct rtl8365mb { - struct realtek_smi *smi; + struct realtek_priv *priv; int irq; u32 chip_id; u32 chip_ver; u32 port_mask; u32 learn_limit_max; - struct rtl8365mb_cpu cpu; struct mutex mib_lock; struct rtl8365mb_port ports[RTL8365MB_MAX_NUM_PORTS]; const struct rtl8365mb_jam_tbl_entry *jam_table; size_t jam_size; }; -static int rtl8365mb_phy_poll_busy(struct realtek_smi *smi) +static int rtl8365mb_phy_poll_busy(struct realtek_priv *priv) { u32 val; - return regmap_read_poll_timeout(smi->map, + return regmap_read_poll_timeout(priv->map, RTL8365MB_INDIRECT_ACCESS_STATUS_REG, val, !val, 10, 100); } -static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy, +static int rtl8365mb_phy_ocp_prepare(struct realtek_priv *priv, int phy, u32 ocp_addr) { u32 val; @@ -579,7 +604,7 @@ static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy, /* Set OCP prefix */ val = FIELD_GET(RTL8365MB_PHY_OCP_ADDR_PREFIX_MASK, ocp_addr); ret = regmap_update_bits( - smi->map, RTL8365MB_GPHY_OCP_MSB_0_REG, + priv->map, RTL8365MB_GPHY_OCP_MSB_0_REG, RTL8365MB_GPHY_OCP_MSB_0_CFG_CPU_OCPADR_MASK, FIELD_PREP(RTL8365MB_GPHY_OCP_MSB_0_CFG_CPU_OCPADR_MASK, val)); if (ret) @@ -592,7 +617,7 @@ static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy, ocp_addr >> 1); val |= FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_9_6_MASK, ocp_addr >> 6); - ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG, + ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG, val); if (ret) return ret; @@ -600,17 +625,17 @@ static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy, return 0; } -static int rtl8365mb_phy_ocp_read(struct realtek_smi *smi, int phy, +static int rtl8365mb_phy_ocp_read(struct realtek_priv *priv, int phy, u32 ocp_addr, u16 *data) { u32 val; int ret; - ret = rtl8365mb_phy_poll_busy(smi); + ret = rtl8365mb_phy_poll_busy(priv); if (ret) return ret; - ret = rtl8365mb_phy_ocp_prepare(smi, phy, ocp_addr); + ret = rtl8365mb_phy_ocp_prepare(priv, phy, ocp_addr); if (ret) return ret; @@ -619,16 +644,16 @@ static int rtl8365mb_phy_ocp_read(struct realtek_smi *smi, int phy, RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_VALUE) | FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_RW_MASK, RTL8365MB_INDIRECT_ACCESS_CTRL_RW_READ); - ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val); + ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val); if (ret) return ret; - ret = rtl8365mb_phy_poll_busy(smi); + ret = rtl8365mb_phy_poll_busy(priv); if (ret) return ret; /* Get PHY register data */ - ret = regmap_read(smi->map, RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG, + ret = regmap_read(priv->map, RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG, &val); if (ret) return ret; @@ -638,22 +663,22 @@ static int rtl8365mb_phy_ocp_read(struct realtek_smi *smi, int phy, return 0; } -static int rtl8365mb_phy_ocp_write(struct realtek_smi *smi, int phy, +static int rtl8365mb_phy_ocp_write(struct realtek_priv *priv, int phy, u32 ocp_addr, u16 data) { u32 val; int ret; - ret = rtl8365mb_phy_poll_busy(smi); + ret = rtl8365mb_phy_poll_busy(priv); if (ret) return ret; - ret = rtl8365mb_phy_ocp_prepare(smi, phy, ocp_addr); + ret = rtl8365mb_phy_ocp_prepare(priv, phy, ocp_addr); if (ret) return ret; /* Set PHY register data */ - ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG, + ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG, data); if (ret) return ret; @@ -663,18 +688,18 @@ static int rtl8365mb_phy_ocp_write(struct realtek_smi *smi, int phy, RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_VALUE) | FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_RW_MASK, RTL8365MB_INDIRECT_ACCESS_CTRL_RW_WRITE); - ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val); + ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val); if (ret) return ret; - ret = rtl8365mb_phy_poll_busy(smi); + ret = rtl8365mb_phy_poll_busy(priv); if (ret) return ret; return 0; } -static int rtl8365mb_phy_read(struct realtek_smi *smi, int phy, int regnum) +static int rtl8365mb_phy_read(struct realtek_priv *priv, int phy, int regnum) { u32 ocp_addr; u16 val; @@ -688,21 +713,21 @@ static int rtl8365mb_phy_read(struct realtek_smi *smi, int phy, int regnum) ocp_addr = RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE + regnum * 2; - ret = rtl8365mb_phy_ocp_read(smi, phy, ocp_addr, &val); + ret = rtl8365mb_phy_ocp_read(priv, phy, ocp_addr, &val); if (ret) { - dev_err(smi->dev, + dev_err(priv->dev, "failed to read PHY%d reg %02x @ %04x, ret %d\n", phy, regnum, ocp_addr, ret); return ret; } - dev_dbg(smi->dev, "read PHY%d register 0x%02x @ %04x, val <- %04x\n", + dev_dbg(priv->dev, "read PHY%d register 0x%02x @ %04x, val <- %04x\n", phy, regnum, ocp_addr, val); return val; } -static int rtl8365mb_phy_write(struct realtek_smi *smi, int phy, int regnum, +static int rtl8365mb_phy_write(struct realtek_priv *priv, int phy, int regnum, u16 val) { u32 ocp_addr; @@ -716,20 +741,31 @@ static int rtl8365mb_phy_write(struct realtek_smi *smi, int phy, int regnum, ocp_addr = RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE + regnum * 2; - ret = rtl8365mb_phy_ocp_write(smi, phy, ocp_addr, val); + ret = rtl8365mb_phy_ocp_write(priv, phy, ocp_addr, val); if (ret) { - dev_err(smi->dev, + dev_err(priv->dev, "failed to write PHY%d reg %02x @ %04x, ret %d\n", phy, regnum, ocp_addr, ret); return ret; } - dev_dbg(smi->dev, "write PHY%d register 0x%02x @ %04x, val -> %04x\n", + dev_dbg(priv->dev, "write PHY%d register 0x%02x @ %04x, val -> %04x\n", phy, regnum, ocp_addr, val); return 0; } +static int rtl8365mb_dsa_phy_read(struct dsa_switch *ds, int phy, int regnum) +{ + return rtl8365mb_phy_read(ds->priv, phy, regnum); +} + +static int rtl8365mb_dsa_phy_write(struct dsa_switch *ds, int phy, int regnum, + u16 val) +{ + return rtl8365mb_phy_write(ds->priv, phy, regnum, val); +} + static enum dsa_tag_protocol rtl8365mb_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mp) @@ -737,25 +773,25 @@ rtl8365mb_get_tag_protocol(struct dsa_switch *ds, int port, return DSA_TAG_PROTO_RTL8_4; } -static int rtl8365mb_ext_config_rgmii(struct realtek_smi *smi, int port, +static int rtl8365mb_ext_config_rgmii(struct realtek_priv *priv, int port, phy_interface_t interface) { struct device_node *dn; struct dsa_port *dp; int tx_delay = 0; int rx_delay = 0; - int ext_port; + int ext_int; u32 val; int ret; - if (port == smi->cpu_port) { - ext_port = 1; - } else { - dev_err(smi->dev, "only one EXT port is currently supported\n"); + ext_int = rtl8365mb_extint_port_map[port]; + + if (ext_int <= 0) { + dev_err(priv->dev, "Port %d is not an external interface port\n", port); return -EINVAL; } - dp = dsa_to_port(smi->ds, port); + dp = dsa_to_port(priv->ds, port); dn = dp->dn; /* Set the RGMII TX/RX delay @@ -786,8 +822,8 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_smi *smi, int port, if (val == 0 || val == 2) tx_delay = val / 2; else - dev_warn(smi->dev, - "EXT port TX delay must be 0 or 2 ns\n"); + dev_warn(priv->dev, + "EXT interface TX delay must be 0 or 2 ns\n"); } if (!of_property_read_u32(dn, "rx-internal-delay-ps", &val)) { @@ -796,12 +832,12 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_smi *smi, int port, if (val <= 7) rx_delay = val; else - dev_warn(smi->dev, - "EXT port RX delay must be 0 to 2.1 ns\n"); + dev_warn(priv->dev, + "EXT interface RX delay must be 0 to 2.1 ns\n"); } ret = regmap_update_bits( - smi->map, RTL8365MB_EXT_RGMXF_REG(ext_port), + priv->map, RTL8365MB_EXT_RGMXF_REG(ext_int), RTL8365MB_EXT_RGMXF_TXDELAY_MASK | RTL8365MB_EXT_RGMXF_RXDELAY_MASK, FIELD_PREP(RTL8365MB_EXT_RGMXF_TXDELAY_MASK, tx_delay) | @@ -810,18 +846,18 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_smi *smi, int port, return ret; ret = regmap_update_bits( - smi->map, RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(ext_port), - RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(ext_port), + priv->map, RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(ext_int), + RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(ext_int), RTL8365MB_EXT_PORT_MODE_RGMII << RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET( - ext_port)); + ext_int)); if (ret) return ret; return 0; } -static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port, +static int rtl8365mb_ext_config_forcemode(struct realtek_priv *priv, int port, bool link, int speed, int duplex, bool tx_pause, bool rx_pause) { @@ -830,14 +866,14 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port, u32 r_duplex; u32 r_speed; u32 r_link; - int ext_port; + int ext_int; int val; int ret; - if (port == smi->cpu_port) { - ext_port = 1; - } else { - dev_err(smi->dev, "only one EXT port is currently supported\n"); + ext_int = rtl8365mb_extint_port_map[port]; + + if (ext_int <= 0) { + dev_err(priv->dev, "Port %d is not an external interface port\n", port); return -EINVAL; } @@ -854,7 +890,7 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port, } else if (speed == SPEED_10) { r_speed = RTL8365MB_PORT_SPEED_10M; } else { - dev_err(smi->dev, "unsupported port speed %s\n", + dev_err(priv->dev, "unsupported port speed %s\n", phy_speed_to_str(speed)); return -EINVAL; } @@ -864,7 +900,7 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port, } else if (duplex == DUPLEX_HALF) { r_duplex = 0; } else { - dev_err(smi->dev, "unsupported duplex %s\n", + dev_err(priv->dev, "unsupported duplex %s\n", phy_duplex_to_str(duplex)); return -EINVAL; } @@ -886,8 +922,8 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port, FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_DUPLEX_MASK, r_duplex) | FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_SPEED_MASK, r_speed); - ret = regmap_write(smi->map, - RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(ext_port), + ret = regmap_write(priv->map, + RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(ext_int), val); if (ret) return ret; @@ -898,13 +934,17 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port, static bool rtl8365mb_phy_mode_supported(struct dsa_switch *ds, int port, phy_interface_t interface) { - if (dsa_is_user_port(ds, port) && + int ext_int; + + ext_int = rtl8365mb_extint_port_map[port]; + + if (ext_int < 0 && (interface == PHY_INTERFACE_MODE_NA || interface == PHY_INTERFACE_MODE_INTERNAL || interface == PHY_INTERFACE_MODE_GMII)) /* Internal PHY */ return true; - else if (dsa_is_cpu_port(ds, port) && + else if ((ext_int >= 1) && phy_interface_mode_is_rgmii(interface)) /* Extension MAC */ return true; @@ -916,7 +956,7 @@ static void rtl8365mb_phylink_validate(struct dsa_switch *ds, int port, unsigned long *supported, struct phylink_link_state *state) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0 }; /* include/linux/phylink.h says: @@ -925,7 +965,7 @@ static void rtl8365mb_phylink_validate(struct dsa_switch *ds, int port, */ if (state->interface != PHY_INTERFACE_MODE_NA && !rtl8365mb_phy_mode_supported(ds, port, state->interface)) { - dev_err(smi->dev, "phy mode %s is unsupported on port %d\n", + dev_err(priv->dev, "phy mode %s is unsupported on port %d\n", phy_modes(state->interface), port); linkmode_zero(supported); return; @@ -951,26 +991,26 @@ static void rtl8365mb_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; int ret; if (!rtl8365mb_phy_mode_supported(ds, port, state->interface)) { - dev_err(smi->dev, "phy mode %s is unsupported on port %d\n", + dev_err(priv->dev, "phy mode %s is unsupported on port %d\n", phy_modes(state->interface), port); return; } if (mode != MLO_AN_PHY && mode != MLO_AN_FIXED) { - dev_err(smi->dev, + dev_err(priv->dev, "port %d supports only conventional PHY or fixed-link\n", port); return; } if (phy_interface_mode_is_rgmii(state->interface)) { - ret = rtl8365mb_ext_config_rgmii(smi, port, state->interface); + ret = rtl8365mb_ext_config_rgmii(priv, port, state->interface); if (ret) - dev_err(smi->dev, + dev_err(priv->dev, "failed to configure RGMII mode on port %d: %d\n", port, ret); return; @@ -985,20 +1025,20 @@ static void rtl8365mb_phylink_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; struct rtl8365mb_port *p; struct rtl8365mb *mb; int ret; - mb = smi->chip_data; + mb = priv->chip_data; p = &mb->ports[port]; cancel_delayed_work_sync(&p->mib_work); if (phy_interface_mode_is_rgmii(interface)) { - ret = rtl8365mb_ext_config_forcemode(smi, port, false, 0, 0, + ret = rtl8365mb_ext_config_forcemode(priv, port, false, 0, 0, false, false); if (ret) - dev_err(smi->dev, + dev_err(priv->dev, "failed to reset forced mode on port %d: %d\n", port, ret); @@ -1013,21 +1053,21 @@ static void rtl8365mb_phylink_mac_link_up(struct dsa_switch *ds, int port, int duplex, bool tx_pause, bool rx_pause) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; struct rtl8365mb_port *p; struct rtl8365mb *mb; int ret; - mb = smi->chip_data; + mb = priv->chip_data; p = &mb->ports[port]; schedule_delayed_work(&p->mib_work, 0); if (phy_interface_mode_is_rgmii(interface)) { - ret = rtl8365mb_ext_config_forcemode(smi, port, true, speed, + ret = rtl8365mb_ext_config_forcemode(priv, port, true, speed, duplex, tx_pause, rx_pause); if (ret) - dev_err(smi->dev, + dev_err(priv->dev, "failed to force mode on port %d: %d\n", port, ret); @@ -1038,7 +1078,7 @@ static void rtl8365mb_phylink_mac_link_up(struct dsa_switch *ds, int port, static void rtl8365mb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; enum rtl8365mb_stp_state val; int msti = 0; @@ -1057,36 +1097,36 @@ static void rtl8365mb_port_stp_state_set(struct dsa_switch *ds, int port, val = RTL8365MB_STP_STATE_FORWARDING; break; default: - dev_err(smi->dev, "invalid STP state: %u\n", state); + dev_err(priv->dev, "invalid STP state: %u\n", state); return; } - regmap_update_bits(smi->map, RTL8365MB_MSTI_CTRL_REG(msti, port), + regmap_update_bits(priv->map, RTL8365MB_MSTI_CTRL_REG(msti, port), RTL8365MB_MSTI_CTRL_PORT_STATE_MASK(port), val << RTL8365MB_MSTI_CTRL_PORT_STATE_OFFSET(port)); } -static int rtl8365mb_port_set_learning(struct realtek_smi *smi, int port, +static int rtl8365mb_port_set_learning(struct realtek_priv *priv, int port, bool enable) { - struct rtl8365mb *mb = smi->chip_data; + struct rtl8365mb *mb = priv->chip_data; /* Enable/disable learning by limiting the number of L2 addresses the * port can learn. Realtek documentation states that a limit of zero * disables learning. When enabling learning, set it to the chip's * maximum. */ - return regmap_write(smi->map, RTL8365MB_LUT_PORT_LEARN_LIMIT_REG(port), + return regmap_write(priv->map, RTL8365MB_LUT_PORT_LEARN_LIMIT_REG(port), enable ? mb->learn_limit_max : 0); } -static int rtl8365mb_port_set_isolation(struct realtek_smi *smi, int port, +static int rtl8365mb_port_set_isolation(struct realtek_priv *priv, int port, u32 mask) { - return regmap_write(smi->map, RTL8365MB_PORT_ISOLATION_REG(port), mask); + return regmap_write(priv->map, RTL8365MB_PORT_ISOLATION_REG(port), mask); } -static int rtl8365mb_mib_counter_read(struct realtek_smi *smi, int port, +static int rtl8365mb_mib_counter_read(struct realtek_priv *priv, int port, u32 offset, u32 length, u64 *mibvalue) { u64 tmpvalue = 0; @@ -1098,13 +1138,13 @@ static int rtl8365mb_mib_counter_read(struct realtek_smi *smi, int port, * and then poll the control register before reading the value from some * counter registers. */ - ret = regmap_write(smi->map, RTL8365MB_MIB_ADDRESS_REG, + ret = regmap_write(priv->map, RTL8365MB_MIB_ADDRESS_REG, RTL8365MB_MIB_ADDRESS(port, offset)); if (ret) return ret; /* Poll for completion */ - ret = regmap_read_poll_timeout(smi->map, RTL8365MB_MIB_CTRL0_REG, val, + ret = regmap_read_poll_timeout(priv->map, RTL8365MB_MIB_CTRL0_REG, val, !(val & RTL8365MB_MIB_CTRL0_BUSY_MASK), 10, 100); if (ret) @@ -1126,7 +1166,7 @@ static int rtl8365mb_mib_counter_read(struct realtek_smi *smi, int port, /* Read the MIB counter 16 bits at a time */ for (i = 0; i < length; i++) { - ret = regmap_read(smi->map, + ret = regmap_read(priv->map, RTL8365MB_MIB_COUNTER_REG(offset - i), &val); if (ret) return ret; @@ -1142,21 +1182,21 @@ static int rtl8365mb_mib_counter_read(struct realtek_smi *smi, int port, static void rtl8365mb_get_ethtool_stats(struct dsa_switch *ds, int port, u64 *data) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; struct rtl8365mb *mb; int ret; int i; - mb = smi->chip_data; + mb = priv->chip_data; mutex_lock(&mb->mib_lock); for (i = 0; i < RTL8365MB_MIB_END; i++) { struct rtl8365mb_mib_counter *mib = &rtl8365mb_mib_counters[i]; - ret = rtl8365mb_mib_counter_read(smi, port, mib->offset, + ret = rtl8365mb_mib_counter_read(priv, port, mib->offset, mib->length, &data[i]); if (ret) { - dev_err(smi->dev, + dev_err(priv->dev, "failed to read port %d counters: %d\n", port, ret); break; @@ -1190,15 +1230,15 @@ static int rtl8365mb_get_sset_count(struct dsa_switch *ds, int port, int sset) static void rtl8365mb_get_phy_stats(struct dsa_switch *ds, int port, struct ethtool_eth_phy_stats *phy_stats) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; struct rtl8365mb_mib_counter *mib; struct rtl8365mb *mb; - mb = smi->chip_data; + mb = priv->chip_data; mib = &rtl8365mb_mib_counters[RTL8365MB_MIB_dot3StatsSymbolErrors]; mutex_lock(&mb->mib_lock); - rtl8365mb_mib_counter_read(smi, port, mib->offset, mib->length, + rtl8365mb_mib_counter_read(priv, port, mib->offset, mib->length, &phy_stats->SymbolErrorDuringCarrier); mutex_unlock(&mb->mib_lock); } @@ -1226,12 +1266,12 @@ static void rtl8365mb_get_mac_stats(struct dsa_switch *ds, int port, [RTL8365MB_MIB_dot3StatsExcessiveCollisions] = 1, }; - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; struct rtl8365mb *mb; int ret; int i; - mb = smi->chip_data; + mb = priv->chip_data; mutex_lock(&mb->mib_lock); for (i = 0; i < RTL8365MB_MIB_END; i++) { @@ -1241,7 +1281,7 @@ static void rtl8365mb_get_mac_stats(struct dsa_switch *ds, int port, if (!cnt[i]) continue; - ret = rtl8365mb_mib_counter_read(smi, port, mib->offset, + ret = rtl8365mb_mib_counter_read(priv, port, mib->offset, mib->length, &cnt[i]); if (ret) break; @@ -1291,20 +1331,20 @@ static void rtl8365mb_get_mac_stats(struct dsa_switch *ds, int port, static void rtl8365mb_get_ctrl_stats(struct dsa_switch *ds, int port, struct ethtool_eth_ctrl_stats *ctrl_stats) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; struct rtl8365mb_mib_counter *mib; struct rtl8365mb *mb; - mb = smi->chip_data; + mb = priv->chip_data; mib = &rtl8365mb_mib_counters[RTL8365MB_MIB_dot3ControlInUnknownOpcodes]; mutex_lock(&mb->mib_lock); - rtl8365mb_mib_counter_read(smi, port, mib->offset, mib->length, + rtl8365mb_mib_counter_read(priv, port, mib->offset, mib->length, &ctrl_stats->UnsupportedOpcodesReceived); mutex_unlock(&mb->mib_lock); } -static void rtl8365mb_stats_update(struct realtek_smi *smi, int port) +static void rtl8365mb_stats_update(struct realtek_priv *priv, int port) { u64 cnt[RTL8365MB_MIB_END] = { [RTL8365MB_MIB_ifOutOctets] = 1, @@ -1323,7 +1363,7 @@ static void rtl8365mb_stats_update(struct realtek_smi *smi, int port) [RTL8365MB_MIB_dot3StatsFCSErrors] = 1, [RTL8365MB_MIB_dot3StatsLateCollisions] = 1, }; - struct rtl8365mb *mb = smi->chip_data; + struct rtl8365mb *mb = priv->chip_data; struct rtnl_link_stats64 *stats; int ret; int i; @@ -1338,7 +1378,7 @@ static void rtl8365mb_stats_update(struct realtek_smi *smi, int port) if (!cnt[i]) continue; - ret = rtl8365mb_mib_counter_read(smi, port, c->offset, + ret = rtl8365mb_mib_counter_read(priv, port, c->offset, c->length, &cnt[i]); if (ret) break; @@ -1388,9 +1428,9 @@ static void rtl8365mb_stats_poll(struct work_struct *work) struct rtl8365mb_port *p = container_of(to_delayed_work(work), struct rtl8365mb_port, mib_work); - struct realtek_smi *smi = p->smi; + struct realtek_priv *priv = p->priv; - rtl8365mb_stats_update(smi, p->index); + rtl8365mb_stats_update(priv, p->index); schedule_delayed_work(&p->mib_work, RTL8365MB_STATS_INTERVAL_JIFFIES); } @@ -1398,11 +1438,11 @@ static void rtl8365mb_stats_poll(struct work_struct *work) static void rtl8365mb_get_stats64(struct dsa_switch *ds, int port, struct rtnl_link_stats64 *s) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; struct rtl8365mb_port *p; struct rtl8365mb *mb; - mb = smi->chip_data; + mb = priv->chip_data; p = &mb->ports[port]; spin_lock(&p->stats_lock); @@ -1410,9 +1450,9 @@ static void rtl8365mb_get_stats64(struct dsa_switch *ds, int port, spin_unlock(&p->stats_lock); } -static void rtl8365mb_stats_setup(struct realtek_smi *smi) +static void rtl8365mb_stats_setup(struct realtek_priv *priv) { - struct rtl8365mb *mb = smi->chip_data; + struct rtl8365mb *mb = priv->chip_data; int i; /* Per-chip global mutex to protect MIB counter access, since doing @@ -1420,10 +1460,10 @@ static void rtl8365mb_stats_setup(struct realtek_smi *smi) */ mutex_init(&mb->mib_lock); - for (i = 0; i < smi->num_ports; i++) { + for (i = 0; i < priv->num_ports; i++) { struct rtl8365mb_port *p = &mb->ports[i]; - if (dsa_is_unused_port(smi->ds, i)) + if (dsa_is_unused_port(priv->ds, i)) continue; /* Per-port spinlock to protect the stats64 data */ @@ -1436,45 +1476,45 @@ static void rtl8365mb_stats_setup(struct realtek_smi *smi) } } -static void rtl8365mb_stats_teardown(struct realtek_smi *smi) +static void rtl8365mb_stats_teardown(struct realtek_priv *priv) { - struct rtl8365mb *mb = smi->chip_data; + struct rtl8365mb *mb = priv->chip_data; int i; - for (i = 0; i < smi->num_ports; i++) { + for (i = 0; i < priv->num_ports; i++) { struct rtl8365mb_port *p = &mb->ports[i]; - if (dsa_is_unused_port(smi->ds, i)) + if (dsa_is_unused_port(priv->ds, i)) continue; cancel_delayed_work_sync(&p->mib_work); } } -static int rtl8365mb_get_and_clear_status_reg(struct realtek_smi *smi, u32 reg, +static int rtl8365mb_get_and_clear_status_reg(struct realtek_priv *priv, u32 reg, u32 *val) { int ret; - ret = regmap_read(smi->map, reg, val); + ret = regmap_read(priv->map, reg, val); if (ret) return ret; - return regmap_write(smi->map, reg, *val); + return regmap_write(priv->map, reg, *val); } static irqreturn_t rtl8365mb_irq(int irq, void *data) { - struct realtek_smi *smi = data; + struct realtek_priv *priv = data; unsigned long line_changes = 0; struct rtl8365mb *mb; u32 stat; int line; int ret; - mb = smi->chip_data; + mb = priv->chip_data; - ret = rtl8365mb_get_and_clear_status_reg(smi, RTL8365MB_INTR_STATUS_REG, + ret = rtl8365mb_get_and_clear_status_reg(priv, RTL8365MB_INTR_STATUS_REG, &stat); if (ret) goto out_error; @@ -1485,14 +1525,14 @@ static irqreturn_t rtl8365mb_irq(int irq, void *data) u32 val; ret = rtl8365mb_get_and_clear_status_reg( - smi, RTL8365MB_PORT_LINKUP_IND_REG, &val); + priv, RTL8365MB_PORT_LINKUP_IND_REG, &val); if (ret) goto out_error; linkup_ind = FIELD_GET(RTL8365MB_PORT_LINKUP_IND_MASK, val); ret = rtl8365mb_get_and_clear_status_reg( - smi, RTL8365MB_PORT_LINKDOWN_IND_REG, &val); + priv, RTL8365MB_PORT_LINKDOWN_IND_REG, &val); if (ret) goto out_error; @@ -1504,8 +1544,8 @@ static irqreturn_t rtl8365mb_irq(int irq, void *data) if (!line_changes) goto out_none; - for_each_set_bit(line, &line_changes, smi->num_ports) { - int child_irq = irq_find_mapping(smi->irqdomain, line); + for_each_set_bit(line, &line_changes, priv->num_ports) { + int child_irq = irq_find_mapping(priv->irqdomain, line); handle_nested_irq(child_irq); } @@ -1513,7 +1553,7 @@ static irqreturn_t rtl8365mb_irq(int irq, void *data) return IRQ_HANDLED; out_error: - dev_err(smi->dev, "failed to read interrupt status: %d\n", ret); + dev_err(priv->dev, "failed to read interrupt status: %d\n", ret); out_none: return IRQ_NONE; @@ -1548,27 +1588,27 @@ static const struct irq_domain_ops rtl8365mb_irqdomain_ops = { .xlate = irq_domain_xlate_onecell, }; -static int rtl8365mb_set_irq_enable(struct realtek_smi *smi, bool enable) +static int rtl8365mb_set_irq_enable(struct realtek_priv *priv, bool enable) { - return regmap_update_bits(smi->map, RTL8365MB_INTR_CTRL_REG, + return regmap_update_bits(priv->map, RTL8365MB_INTR_CTRL_REG, RTL8365MB_INTR_LINK_CHANGE_MASK, FIELD_PREP(RTL8365MB_INTR_LINK_CHANGE_MASK, enable ? 1 : 0)); } -static int rtl8365mb_irq_enable(struct realtek_smi *smi) +static int rtl8365mb_irq_enable(struct realtek_priv *priv) { - return rtl8365mb_set_irq_enable(smi, true); + return rtl8365mb_set_irq_enable(priv, true); } -static int rtl8365mb_irq_disable(struct realtek_smi *smi) +static int rtl8365mb_irq_disable(struct realtek_priv *priv) { - return rtl8365mb_set_irq_enable(smi, false); + return rtl8365mb_set_irq_enable(priv, false); } -static int rtl8365mb_irq_setup(struct realtek_smi *smi) +static int rtl8365mb_irq_setup(struct realtek_priv *priv) { - struct rtl8365mb *mb = smi->chip_data; + struct rtl8365mb *mb = priv->chip_data; struct device_node *intc; u32 irq_trig; int virq; @@ -1577,9 +1617,9 @@ static int rtl8365mb_irq_setup(struct realtek_smi *smi) int ret; int i; - intc = of_get_child_by_name(smi->dev->of_node, "interrupt-controller"); + intc = of_get_child_by_name(priv->dev->of_node, "interrupt-controller"); if (!intc) { - dev_err(smi->dev, "missing child interrupt-controller node\n"); + dev_err(priv->dev, "missing child interrupt-controller node\n"); return -EINVAL; } @@ -1587,24 +1627,24 @@ static int rtl8365mb_irq_setup(struct realtek_smi *smi) irq = of_irq_get(intc, 0); if (irq <= 0) { if (irq != -EPROBE_DEFER) - dev_err(smi->dev, "failed to get parent irq: %d\n", + dev_err(priv->dev, "failed to get parent irq: %d\n", irq); ret = irq ? irq : -EINVAL; goto out_put_node; } - smi->irqdomain = irq_domain_add_linear(intc, smi->num_ports, - &rtl8365mb_irqdomain_ops, smi); - if (!smi->irqdomain) { - dev_err(smi->dev, "failed to add irq domain\n"); + priv->irqdomain = irq_domain_add_linear(intc, priv->num_ports, + &rtl8365mb_irqdomain_ops, priv); + if (!priv->irqdomain) { + dev_err(priv->dev, "failed to add irq domain\n"); ret = -ENOMEM; goto out_put_node; } - for (i = 0; i < smi->num_ports; i++) { - virq = irq_create_mapping(smi->irqdomain, i); + for (i = 0; i < priv->num_ports; i++) { + virq = irq_create_mapping(priv->irqdomain, i); if (!virq) { - dev_err(smi->dev, + dev_err(priv->dev, "failed to create irq domain mapping\n"); ret = -EINVAL; goto out_remove_irqdomain; @@ -1625,40 +1665,40 @@ static int rtl8365mb_irq_setup(struct realtek_smi *smi) val = RTL8365MB_INTR_POLARITY_LOW; break; default: - dev_err(smi->dev, "unsupported irq trigger type %u\n", + dev_err(priv->dev, "unsupported irq trigger type %u\n", irq_trig); ret = -EINVAL; goto out_remove_irqdomain; } - ret = regmap_update_bits(smi->map, RTL8365MB_INTR_POLARITY_REG, + ret = regmap_update_bits(priv->map, RTL8365MB_INTR_POLARITY_REG, RTL8365MB_INTR_POLARITY_MASK, FIELD_PREP(RTL8365MB_INTR_POLARITY_MASK, val)); if (ret) goto out_remove_irqdomain; /* Disable the interrupt in case the chip has it enabled on reset */ - ret = rtl8365mb_irq_disable(smi); + ret = rtl8365mb_irq_disable(priv); if (ret) goto out_remove_irqdomain; /* Clear the interrupt status register */ - ret = regmap_write(smi->map, RTL8365MB_INTR_STATUS_REG, + ret = regmap_write(priv->map, RTL8365MB_INTR_STATUS_REG, RTL8365MB_INTR_ALL_MASK); if (ret) goto out_remove_irqdomain; ret = request_threaded_irq(irq, NULL, rtl8365mb_irq, IRQF_ONESHOT, - "rtl8365mb", smi); + "rtl8365mb", priv); if (ret) { - dev_err(smi->dev, "failed to request irq: %d\n", ret); + dev_err(priv->dev, "failed to request irq: %d\n", ret); goto out_remove_irqdomain; } /* Store the irq so that we know to free it during teardown */ mb->irq = irq; - ret = rtl8365mb_irq_enable(smi); + ret = rtl8365mb_irq_enable(priv); if (ret) goto out_free_irq; @@ -1667,17 +1707,17 @@ static int rtl8365mb_irq_setup(struct realtek_smi *smi) return 0; out_free_irq: - free_irq(mb->irq, smi); + free_irq(mb->irq, priv); mb->irq = 0; out_remove_irqdomain: - for (i = 0; i < smi->num_ports; i++) { - virq = irq_find_mapping(smi->irqdomain, i); + for (i = 0; i < priv->num_ports; i++) { + virq = irq_find_mapping(priv->irqdomain, i); irq_dispose_mapping(virq); } - irq_domain_remove(smi->irqdomain); - smi->irqdomain = NULL; + irq_domain_remove(priv->irqdomain); + priv->irqdomain = NULL; out_put_node: of_node_put(intc); @@ -1685,36 +1725,34 @@ out_put_node: return ret; } -static void rtl8365mb_irq_teardown(struct realtek_smi *smi) +static void rtl8365mb_irq_teardown(struct realtek_priv *priv) { - struct rtl8365mb *mb = smi->chip_data; + struct rtl8365mb *mb = priv->chip_data; int virq; int i; if (mb->irq) { - free_irq(mb->irq, smi); + free_irq(mb->irq, priv); mb->irq = 0; } - if (smi->irqdomain) { - for (i = 0; i < smi->num_ports; i++) { - virq = irq_find_mapping(smi->irqdomain, i); + if (priv->irqdomain) { + for (i = 0; i < priv->num_ports; i++) { + virq = irq_find_mapping(priv->irqdomain, i); irq_dispose_mapping(virq); } - irq_domain_remove(smi->irqdomain); - smi->irqdomain = NULL; + irq_domain_remove(priv->irqdomain); + priv->irqdomain = NULL; } } -static int rtl8365mb_cpu_config(struct realtek_smi *smi) +static int rtl8365mb_cpu_config(struct realtek_priv *priv, const struct rtl8365mb_cpu *cpu) { - struct rtl8365mb *mb = smi->chip_data; - struct rtl8365mb_cpu *cpu = &mb->cpu; u32 val; int ret; - ret = regmap_update_bits(smi->map, RTL8365MB_CPU_PORT_MASK_REG, + ret = regmap_update_bits(priv->map, RTL8365MB_CPU_PORT_MASK_REG, RTL8365MB_CPU_PORT_MASK_MASK, FIELD_PREP(RTL8365MB_CPU_PORT_MASK_MASK, cpu->mask)); @@ -1726,26 +1764,26 @@ static int rtl8365mb_cpu_config(struct realtek_smi *smi) FIELD_PREP(RTL8365MB_CPU_CTRL_TAG_POSITION_MASK, cpu->position) | FIELD_PREP(RTL8365MB_CPU_CTRL_RXBYTECOUNT_MASK, cpu->rx_length) | FIELD_PREP(RTL8365MB_CPU_CTRL_TAG_FORMAT_MASK, cpu->format) | - FIELD_PREP(RTL8365MB_CPU_CTRL_TRAP_PORT_MASK, cpu->trap_port) | + FIELD_PREP(RTL8365MB_CPU_CTRL_TRAP_PORT_MASK, cpu->trap_port & 0x7) | FIELD_PREP(RTL8365MB_CPU_CTRL_TRAP_PORT_EXT_MASK, - cpu->trap_port >> 3); - ret = regmap_write(smi->map, RTL8365MB_CPU_CTRL_REG, val); + cpu->trap_port >> 3 & 0x1); + ret = regmap_write(priv->map, RTL8365MB_CPU_CTRL_REG, val); if (ret) return ret; return 0; } -static int rtl8365mb_switch_init(struct realtek_smi *smi) +static int rtl8365mb_switch_init(struct realtek_priv *priv) { - struct rtl8365mb *mb = smi->chip_data; + struct rtl8365mb *mb = priv->chip_data; int ret; int i; /* Do any chip-specific init jam before getting to the common stuff */ if (mb->jam_table) { for (i = 0; i < mb->jam_size; i++) { - ret = regmap_write(smi->map, mb->jam_table[i].reg, + ret = regmap_write(priv->map, mb->jam_table[i].reg, mb->jam_table[i].val); if (ret) return ret; @@ -1754,7 +1792,7 @@ static int rtl8365mb_switch_init(struct realtek_smi *smi) /* Common init jam */ for (i = 0; i < ARRAY_SIZE(rtl8365mb_init_jam_common); i++) { - ret = regmap_write(smi->map, rtl8365mb_init_jam_common[i].reg, + ret = regmap_write(priv->map, rtl8365mb_init_jam_common[i].reg, rtl8365mb_init_jam_common[i].val); if (ret) return ret; @@ -1763,75 +1801,86 @@ static int rtl8365mb_switch_init(struct realtek_smi *smi) return 0; } -static int rtl8365mb_reset_chip(struct realtek_smi *smi) +static int rtl8365mb_reset_chip(struct realtek_priv *priv) { u32 val; - realtek_smi_write_reg_noack(smi, RTL8365MB_CHIP_RESET_REG, - FIELD_PREP(RTL8365MB_CHIP_RESET_HW_MASK, - 1)); + priv->write_reg_noack(priv, RTL8365MB_CHIP_RESET_REG, + FIELD_PREP(RTL8365MB_CHIP_RESET_HW_MASK, 1)); /* Realtek documentation says the chip needs 1 second to reset. Sleep * for 100 ms before accessing any registers to prevent ACK timeouts. */ msleep(100); - return regmap_read_poll_timeout(smi->map, RTL8365MB_CHIP_RESET_REG, val, + return regmap_read_poll_timeout(priv->map, RTL8365MB_CHIP_RESET_REG, val, !(val & RTL8365MB_CHIP_RESET_HW_MASK), 20000, 1e6); } static int rtl8365mb_setup(struct dsa_switch *ds) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; + struct rtl8365mb_cpu cpu = {0}; + struct dsa_port *cpu_dp; struct rtl8365mb *mb; int ret; int i; - mb = smi->chip_data; + mb = priv->chip_data; - ret = rtl8365mb_reset_chip(smi); + ret = rtl8365mb_reset_chip(priv); if (ret) { - dev_err(smi->dev, "failed to reset chip: %d\n", ret); + dev_err(priv->dev, "failed to reset chip: %d\n", ret); goto out_error; } /* Configure switch to vendor-defined initial state */ - ret = rtl8365mb_switch_init(smi); + ret = rtl8365mb_switch_init(priv); if (ret) { - dev_err(smi->dev, "failed to initialize switch: %d\n", ret); + dev_err(priv->dev, "failed to initialize switch: %d\n", ret); goto out_error; } /* Set up cascading IRQs */ - ret = rtl8365mb_irq_setup(smi); + ret = rtl8365mb_irq_setup(priv); if (ret == -EPROBE_DEFER) return ret; else if (ret) - dev_info(smi->dev, "no interrupt support\n"); + dev_info(priv->dev, "no interrupt support\n"); /* Configure CPU tagging */ - ret = rtl8365mb_cpu_config(smi); + cpu.trap_port = RTL8365MB_MAX_NUM_PORTS; + dsa_switch_for_each_cpu_port(cpu_dp, priv->ds) { + cpu.mask |= BIT(cpu_dp->index); + + if (cpu.trap_port == RTL8365MB_MAX_NUM_PORTS) + cpu.trap_port = cpu_dp->index; + } + + cpu.enable = cpu.mask > 0; + cpu.insert = RTL8365MB_CPU_INSERT_TO_ALL; + cpu.position = RTL8365MB_CPU_POS_AFTER_SA; + cpu.rx_length = RTL8365MB_CPU_RXLEN_64BYTES; + cpu.format = RTL8365MB_CPU_FORMAT_8BYTES; + + ret = rtl8365mb_cpu_config(priv, &cpu); if (ret) goto out_teardown_irq; /* Configure ports */ - for (i = 0; i < smi->num_ports; i++) { + for (i = 0; i < priv->num_ports; i++) { struct rtl8365mb_port *p = &mb->ports[i]; - if (dsa_is_unused_port(smi->ds, i)) + if (dsa_is_unused_port(priv->ds, i)) continue; - /* Set up per-port private data */ - p->smi = smi; - p->index = i; - /* Forward only to the CPU */ - ret = rtl8365mb_port_set_isolation(smi, i, BIT(smi->cpu_port)); + ret = rtl8365mb_port_set_isolation(priv, i, cpu.mask); if (ret) goto out_teardown_irq; /* Disable learning */ - ret = rtl8365mb_port_set_learning(smi, i, false); + ret = rtl8365mb_port_set_learning(priv, i, false); if (ret) goto out_teardown_irq; @@ -1839,29 +1888,35 @@ static int rtl8365mb_setup(struct dsa_switch *ds) * ports will still forward frames to the CPU despite being * administratively down by default. */ - rtl8365mb_port_stp_state_set(smi->ds, i, BR_STATE_DISABLED); + rtl8365mb_port_stp_state_set(priv->ds, i, BR_STATE_DISABLED); + + /* Set up per-port private data */ + p->priv = priv; + p->index = i; } /* Set maximum packet length to 1536 bytes */ - ret = regmap_update_bits(smi->map, RTL8365MB_CFG0_MAX_LEN_REG, + ret = regmap_update_bits(priv->map, RTL8365MB_CFG0_MAX_LEN_REG, RTL8365MB_CFG0_MAX_LEN_MASK, FIELD_PREP(RTL8365MB_CFG0_MAX_LEN_MASK, 1536)); if (ret) goto out_teardown_irq; - ret = realtek_smi_setup_mdio(smi); - if (ret) { - dev_err(smi->dev, "could not set up MDIO bus\n"); - goto out_teardown_irq; + if (priv->setup_interface) { + ret = priv->setup_interface(ds); + if (ret) { + dev_err(priv->dev, "could not set up MDIO bus\n"); + goto out_teardown_irq; + } } /* Start statistics counter polling */ - rtl8365mb_stats_setup(smi); + rtl8365mb_stats_setup(priv); return 0; out_teardown_irq: - rtl8365mb_irq_teardown(smi); + rtl8365mb_irq_teardown(priv); out_error: return ret; @@ -1869,10 +1924,10 @@ out_error: static void rtl8365mb_teardown(struct dsa_switch *ds) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; - rtl8365mb_stats_teardown(smi); - rtl8365mb_irq_teardown(smi); + rtl8365mb_stats_teardown(priv); + rtl8365mb_irq_teardown(priv); } static int rtl8365mb_get_chip_id_and_ver(struct regmap *map, u32 *id, u32 *ver) @@ -1902,48 +1957,57 @@ static int rtl8365mb_get_chip_id_and_ver(struct regmap *map, u32 *id, u32 *ver) return 0; } -static int rtl8365mb_detect(struct realtek_smi *smi) +static int rtl8365mb_detect(struct realtek_priv *priv) { - struct rtl8365mb *mb = smi->chip_data; + struct rtl8365mb *mb = priv->chip_data; u32 chip_id; u32 chip_ver; int ret; - ret = rtl8365mb_get_chip_id_and_ver(smi->map, &chip_id, &chip_ver); + ret = rtl8365mb_get_chip_id_and_ver(priv->map, &chip_id, &chip_ver); if (ret) { - dev_err(smi->dev, "failed to read chip id and version: %d\n", + dev_err(priv->dev, "failed to read chip id and version: %d\n", ret); return ret; } switch (chip_id) { case RTL8365MB_CHIP_ID_8365MB_VC: - dev_info(smi->dev, - "found an RTL8365MB-VC switch (ver=0x%04x)\n", - chip_ver); + switch (chip_ver) { + case RTL8365MB_CHIP_VER_8365MB_VC: + dev_info(priv->dev, + "found an RTL8365MB-VC switch (ver=0x%04x)\n", + chip_ver); + break; + case RTL8365MB_CHIP_VER_8367RB: + dev_info(priv->dev, + "found an RTL8367RB-VB switch (ver=0x%04x)\n", + chip_ver); + break; + case RTL8365MB_CHIP_VER_8367S: + dev_info(priv->dev, + "found an RTL8367S switch (ver=0x%04x)\n", + chip_ver); + break; + default: + dev_err(priv->dev, "unrecognized switch version (ver=0x%04x)", + chip_ver); + return -ENODEV; + } - smi->cpu_port = RTL8365MB_CPU_PORT_NUM_8365MB_VC; - smi->num_ports = smi->cpu_port + 1; + priv->num_ports = RTL8365MB_MAX_NUM_PORTS; - mb->smi = smi; + mb->priv = priv; mb->chip_id = chip_id; mb->chip_ver = chip_ver; - mb->port_mask = BIT(smi->num_ports) - 1; - mb->learn_limit_max = RTL8365MB_LEARN_LIMIT_MAX_8365MB_VC; + mb->port_mask = GENMASK(priv->num_ports - 1, 0); + mb->learn_limit_max = RTL8365MB_LEARN_LIMIT_MAX; mb->jam_table = rtl8365mb_init_jam_8365mb_vc; mb->jam_size = ARRAY_SIZE(rtl8365mb_init_jam_8365mb_vc); - mb->cpu.enable = 1; - mb->cpu.mask = BIT(smi->cpu_port); - mb->cpu.trap_port = smi->cpu_port; - mb->cpu.insert = RTL8365MB_CPU_INSERT_TO_ALL; - mb->cpu.position = RTL8365MB_CPU_POS_AFTER_SA; - mb->cpu.rx_length = RTL8365MB_CPU_RXLEN_64BYTES; - mb->cpu.format = RTL8365MB_CPU_FORMAT_8BYTES; - break; default: - dev_err(smi->dev, + dev_err(priv->dev, "found an unknown Realtek switch (id=0x%04x, ver=0x%04x)\n", chip_id, chip_ver); return -ENODEV; @@ -1952,7 +2016,25 @@ static int rtl8365mb_detect(struct realtek_smi *smi) return 0; } -static const struct dsa_switch_ops rtl8365mb_switch_ops = { +static const struct dsa_switch_ops rtl8365mb_switch_ops_smi = { + .get_tag_protocol = rtl8365mb_get_tag_protocol, + .setup = rtl8365mb_setup, + .teardown = rtl8365mb_teardown, + .phylink_validate = rtl8365mb_phylink_validate, + .phylink_mac_config = rtl8365mb_phylink_mac_config, + .phylink_mac_link_down = rtl8365mb_phylink_mac_link_down, + .phylink_mac_link_up = rtl8365mb_phylink_mac_link_up, + .port_stp_state_set = rtl8365mb_port_stp_state_set, + .get_strings = rtl8365mb_get_strings, + .get_ethtool_stats = rtl8365mb_get_ethtool_stats, + .get_sset_count = rtl8365mb_get_sset_count, + .get_eth_phy_stats = rtl8365mb_get_phy_stats, + .get_eth_mac_stats = rtl8365mb_get_mac_stats, + .get_eth_ctrl_stats = rtl8365mb_get_ctrl_stats, + .get_stats64 = rtl8365mb_get_stats64, +}; + +static const struct dsa_switch_ops rtl8365mb_switch_ops_mdio = { .get_tag_protocol = rtl8365mb_get_tag_protocol, .setup = rtl8365mb_setup, .teardown = rtl8365mb_teardown, @@ -1960,6 +2042,8 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops = { .phylink_mac_config = rtl8365mb_phylink_mac_config, .phylink_mac_link_down = rtl8365mb_phylink_mac_link_down, .phylink_mac_link_up = rtl8365mb_phylink_mac_link_up, + .phy_read = rtl8365mb_dsa_phy_read, + .phy_write = rtl8365mb_dsa_phy_write, .port_stp_state_set = rtl8365mb_port_stp_state_set, .get_strings = rtl8365mb_get_strings, .get_ethtool_stats = rtl8365mb_get_ethtool_stats, @@ -1970,18 +2054,23 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops = { .get_stats64 = rtl8365mb_get_stats64, }; -static const struct realtek_smi_ops rtl8365mb_smi_ops = { +static const struct realtek_ops rtl8365mb_ops = { .detect = rtl8365mb_detect, .phy_read = rtl8365mb_phy_read, .phy_write = rtl8365mb_phy_write, }; -const struct realtek_smi_variant rtl8365mb_variant = { - .ds_ops = &rtl8365mb_switch_ops, - .ops = &rtl8365mb_smi_ops, +const struct realtek_variant rtl8365mb_variant = { + .ds_ops_smi = &rtl8365mb_switch_ops_smi, + .ds_ops_mdio = &rtl8365mb_switch_ops_mdio, + .ops = &rtl8365mb_ops, .clk_delay = 10, .cmd_read = 0xb9, .cmd_write = 0xb8, .chip_data_sz = sizeof(struct rtl8365mb), }; EXPORT_SYMBOL_GPL(rtl8365mb_variant); + +MODULE_AUTHOR("Alvin Å ipraga <alsi@bang-olufsen.dk>"); +MODULE_DESCRIPTION("Driver for RTL8365MB-VC ethernet switch"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/realtek/rtl8366-core.c index bdb8d8d34880..dc5f75be3017 100644 --- a/drivers/net/dsa/rtl8366.c +++ b/drivers/net/dsa/realtek/rtl8366-core.c @@ -11,18 +11,18 @@ #include <linux/if_bridge.h> #include <net/dsa.h> -#include "realtek-smi-core.h" +#include "realtek.h" -int rtl8366_mc_is_used(struct realtek_smi *smi, int mc_index, int *used) +int rtl8366_mc_is_used(struct realtek_priv *priv, int mc_index, int *used) { int ret; int i; *used = 0; - for (i = 0; i < smi->num_ports; i++) { + for (i = 0; i < priv->num_ports; i++) { int index = 0; - ret = smi->ops->get_mc_index(smi, i, &index); + ret = priv->ops->get_mc_index(priv, i, &index); if (ret) return ret; @@ -38,13 +38,13 @@ EXPORT_SYMBOL_GPL(rtl8366_mc_is_used); /** * rtl8366_obtain_mc() - retrieve or allocate a VLAN member configuration - * @smi: the Realtek SMI device instance + * @priv: the Realtek SMI device instance * @vid: the VLAN ID to look up or allocate * @vlanmc: the pointer will be assigned to a pointer to a valid member config * if successful * @return: index of a new member config or negative error number */ -static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid, +static int rtl8366_obtain_mc(struct realtek_priv *priv, int vid, struct rtl8366_vlan_mc *vlanmc) { struct rtl8366_vlan_4k vlan4k; @@ -52,10 +52,10 @@ static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid, int i; /* Try to find an existing member config entry for this VID */ - for (i = 0; i < smi->num_vlan_mc; i++) { - ret = smi->ops->get_vlan_mc(smi, i, vlanmc); + for (i = 0; i < priv->num_vlan_mc; i++) { + ret = priv->ops->get_vlan_mc(priv, i, vlanmc); if (ret) { - dev_err(smi->dev, "error searching for VLAN MC %d for VID %d\n", + dev_err(priv->dev, "error searching for VLAN MC %d for VID %d\n", i, vid); return ret; } @@ -65,19 +65,19 @@ static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid, } /* We have no MC entry for this VID, try to find an empty one */ - for (i = 0; i < smi->num_vlan_mc; i++) { - ret = smi->ops->get_vlan_mc(smi, i, vlanmc); + for (i = 0; i < priv->num_vlan_mc; i++) { + ret = priv->ops->get_vlan_mc(priv, i, vlanmc); if (ret) { - dev_err(smi->dev, "error searching for VLAN MC %d for VID %d\n", + dev_err(priv->dev, "error searching for VLAN MC %d for VID %d\n", i, vid); return ret; } if (vlanmc->vid == 0 && vlanmc->member == 0) { /* Update the entry from the 4K table */ - ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k); + ret = priv->ops->get_vlan_4k(priv, vid, &vlan4k); if (ret) { - dev_err(smi->dev, "error looking for 4K VLAN MC %d for VID %d\n", + dev_err(priv->dev, "error looking for 4K VLAN MC %d for VID %d\n", i, vid); return ret; } @@ -86,30 +86,30 @@ static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid, vlanmc->member = vlan4k.member; vlanmc->untag = vlan4k.untag; vlanmc->fid = vlan4k.fid; - ret = smi->ops->set_vlan_mc(smi, i, vlanmc); + ret = priv->ops->set_vlan_mc(priv, i, vlanmc); if (ret) { - dev_err(smi->dev, "unable to set/update VLAN MC %d for VID %d\n", + dev_err(priv->dev, "unable to set/update VLAN MC %d for VID %d\n", i, vid); return ret; } - dev_dbg(smi->dev, "created new MC at index %d for VID %d\n", + dev_dbg(priv->dev, "created new MC at index %d for VID %d\n", i, vid); return i; } } /* MC table is full, try to find an unused entry and replace it */ - for (i = 0; i < smi->num_vlan_mc; i++) { + for (i = 0; i < priv->num_vlan_mc; i++) { int used; - ret = rtl8366_mc_is_used(smi, i, &used); + ret = rtl8366_mc_is_used(priv, i, &used); if (ret) return ret; if (!used) { /* Update the entry from the 4K table */ - ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k); + ret = priv->ops->get_vlan_4k(priv, vid, &vlan4k); if (ret) return ret; @@ -117,23 +117,23 @@ static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid, vlanmc->member = vlan4k.member; vlanmc->untag = vlan4k.untag; vlanmc->fid = vlan4k.fid; - ret = smi->ops->set_vlan_mc(smi, i, vlanmc); + ret = priv->ops->set_vlan_mc(priv, i, vlanmc); if (ret) { - dev_err(smi->dev, "unable to set/update VLAN MC %d for VID %d\n", + dev_err(priv->dev, "unable to set/update VLAN MC %d for VID %d\n", i, vid); return ret; } - dev_dbg(smi->dev, "recycled MC at index %i for VID %d\n", + dev_dbg(priv->dev, "recycled MC at index %i for VID %d\n", i, vid); return i; } } - dev_err(smi->dev, "all VLAN member configurations are in use\n"); + dev_err(priv->dev, "all VLAN member configurations are in use\n"); return -ENOSPC; } -int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member, +int rtl8366_set_vlan(struct realtek_priv *priv, int vid, u32 member, u32 untag, u32 fid) { struct rtl8366_vlan_mc vlanmc; @@ -141,31 +141,31 @@ int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member, int mc; int ret; - if (!smi->ops->is_vlan_valid(smi, vid)) + if (!priv->ops->is_vlan_valid(priv, vid)) return -EINVAL; - dev_dbg(smi->dev, + dev_dbg(priv->dev, "setting VLAN%d 4k members: 0x%02x, untagged: 0x%02x\n", vid, member, untag); /* Update the 4K table */ - ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k); + ret = priv->ops->get_vlan_4k(priv, vid, &vlan4k); if (ret) return ret; vlan4k.member |= member; vlan4k.untag |= untag; vlan4k.fid = fid; - ret = smi->ops->set_vlan_4k(smi, &vlan4k); + ret = priv->ops->set_vlan_4k(priv, &vlan4k); if (ret) return ret; - dev_dbg(smi->dev, + dev_dbg(priv->dev, "resulting VLAN%d 4k members: 0x%02x, untagged: 0x%02x\n", vid, vlan4k.member, vlan4k.untag); /* Find or allocate a member config for this VID */ - ret = rtl8366_obtain_mc(smi, vid, &vlanmc); + ret = rtl8366_obtain_mc(priv, vid, &vlanmc); if (ret < 0) return ret; mc = ret; @@ -176,12 +176,12 @@ int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member, vlanmc.fid = fid; /* Commit updates to the MC entry */ - ret = smi->ops->set_vlan_mc(smi, mc, &vlanmc); + ret = priv->ops->set_vlan_mc(priv, mc, &vlanmc); if (ret) - dev_err(smi->dev, "failed to commit changes to VLAN MC index %d for VID %d\n", + dev_err(priv->dev, "failed to commit changes to VLAN MC index %d for VID %d\n", mc, vid); else - dev_dbg(smi->dev, + dev_dbg(priv->dev, "resulting VLAN%d MC members: 0x%02x, untagged: 0x%02x\n", vid, vlanmc.member, vlanmc.untag); @@ -189,37 +189,37 @@ int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member, } EXPORT_SYMBOL_GPL(rtl8366_set_vlan); -int rtl8366_set_pvid(struct realtek_smi *smi, unsigned int port, +int rtl8366_set_pvid(struct realtek_priv *priv, unsigned int port, unsigned int vid) { struct rtl8366_vlan_mc vlanmc; int mc; int ret; - if (!smi->ops->is_vlan_valid(smi, vid)) + if (!priv->ops->is_vlan_valid(priv, vid)) return -EINVAL; /* Find or allocate a member config for this VID */ - ret = rtl8366_obtain_mc(smi, vid, &vlanmc); + ret = rtl8366_obtain_mc(priv, vid, &vlanmc); if (ret < 0) return ret; mc = ret; - ret = smi->ops->set_mc_index(smi, port, mc); + ret = priv->ops->set_mc_index(priv, port, mc); if (ret) { - dev_err(smi->dev, "set PVID: failed to set MC index %d for port %d\n", + dev_err(priv->dev, "set PVID: failed to set MC index %d for port %d\n", mc, port); return ret; } - dev_dbg(smi->dev, "set PVID: the PVID for port %d set to %d using existing MC index %d\n", + dev_dbg(priv->dev, "set PVID: the PVID for port %d set to %d using existing MC index %d\n", port, vid, mc); return 0; } EXPORT_SYMBOL_GPL(rtl8366_set_pvid); -int rtl8366_enable_vlan4k(struct realtek_smi *smi, bool enable) +int rtl8366_enable_vlan4k(struct realtek_priv *priv, bool enable) { int ret; @@ -229,52 +229,52 @@ int rtl8366_enable_vlan4k(struct realtek_smi *smi, bool enable) */ if (enable) { /* Make sure VLAN is ON */ - ret = smi->ops->enable_vlan(smi, true); + ret = priv->ops->enable_vlan(priv, true); if (ret) return ret; - smi->vlan_enabled = true; + priv->vlan_enabled = true; } - ret = smi->ops->enable_vlan4k(smi, enable); + ret = priv->ops->enable_vlan4k(priv, enable); if (ret) return ret; - smi->vlan4k_enabled = enable; + priv->vlan4k_enabled = enable; return 0; } EXPORT_SYMBOL_GPL(rtl8366_enable_vlan4k); -int rtl8366_enable_vlan(struct realtek_smi *smi, bool enable) +int rtl8366_enable_vlan(struct realtek_priv *priv, bool enable) { int ret; - ret = smi->ops->enable_vlan(smi, enable); + ret = priv->ops->enable_vlan(priv, enable); if (ret) return ret; - smi->vlan_enabled = enable; + priv->vlan_enabled = enable; /* If we turn VLAN off, make sure that we turn off * 4k VLAN as well, if that happened to be on. */ if (!enable) { - smi->vlan4k_enabled = false; - ret = smi->ops->enable_vlan4k(smi, false); + priv->vlan4k_enabled = false; + ret = priv->ops->enable_vlan4k(priv, false); } return ret; } EXPORT_SYMBOL_GPL(rtl8366_enable_vlan); -int rtl8366_reset_vlan(struct realtek_smi *smi) +int rtl8366_reset_vlan(struct realtek_priv *priv) { struct rtl8366_vlan_mc vlanmc; int ret; int i; - rtl8366_enable_vlan(smi, false); - rtl8366_enable_vlan4k(smi, false); + rtl8366_enable_vlan(priv, false); + rtl8366_enable_vlan4k(priv, false); /* Clear the 16 VLAN member configurations */ vlanmc.vid = 0; @@ -282,8 +282,8 @@ int rtl8366_reset_vlan(struct realtek_smi *smi) vlanmc.member = 0; vlanmc.untag = 0; vlanmc.fid = 0; - for (i = 0; i < smi->num_vlan_mc; i++) { - ret = smi->ops->set_vlan_mc(smi, i, &vlanmc); + for (i = 0; i < priv->num_vlan_mc; i++) { + ret = priv->ops->set_vlan_mc(priv, i, &vlanmc); if (ret) return ret; } @@ -298,12 +298,12 @@ int rtl8366_vlan_add(struct dsa_switch *ds, int port, { bool untagged = !!(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED); bool pvid = !!(vlan->flags & BRIDGE_VLAN_INFO_PVID); - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; u32 member = 0; u32 untag = 0; int ret; - if (!smi->ops->is_vlan_valid(smi, vlan->vid)) { + if (!priv->ops->is_vlan_valid(priv, vlan->vid)) { NL_SET_ERR_MSG_MOD(extack, "VLAN ID not valid"); return -EINVAL; } @@ -312,13 +312,13 @@ int rtl8366_vlan_add(struct dsa_switch *ds, int port, * FIXME: what's with this 4k business? * Just rtl8366_enable_vlan() seems inconclusive. */ - ret = rtl8366_enable_vlan4k(smi, true); + ret = rtl8366_enable_vlan4k(priv, true); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Failed to enable VLAN 4K"); return ret; } - dev_dbg(smi->dev, "add VLAN %d on port %d, %s, %s\n", + dev_dbg(priv->dev, "add VLAN %d on port %d, %s, %s\n", vlan->vid, port, untagged ? "untagged" : "tagged", pvid ? "PVID" : "no PVID"); @@ -327,18 +327,18 @@ int rtl8366_vlan_add(struct dsa_switch *ds, int port, if (untagged) untag |= BIT(port); - ret = rtl8366_set_vlan(smi, vlan->vid, member, untag, 0); + ret = rtl8366_set_vlan(priv, vlan->vid, member, untag, 0); if (ret) { - dev_err(smi->dev, "failed to set up VLAN %04x", vlan->vid); + dev_err(priv->dev, "failed to set up VLAN %04x", vlan->vid); return ret; } if (!pvid) return 0; - ret = rtl8366_set_pvid(smi, port, vlan->vid); + ret = rtl8366_set_pvid(priv, port, vlan->vid); if (ret) { - dev_err(smi->dev, "failed to set PVID on port %d to VLAN %04x", + dev_err(priv->dev, "failed to set PVID on port %d to VLAN %04x", port, vlan->vid); return ret; } @@ -350,15 +350,15 @@ EXPORT_SYMBOL_GPL(rtl8366_vlan_add); int rtl8366_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; int ret, i; - dev_dbg(smi->dev, "del VLAN %d on port %d\n", vlan->vid, port); + dev_dbg(priv->dev, "del VLAN %d on port %d\n", vlan->vid, port); - for (i = 0; i < smi->num_vlan_mc; i++) { + for (i = 0; i < priv->num_vlan_mc; i++) { struct rtl8366_vlan_mc vlanmc; - ret = smi->ops->get_vlan_mc(smi, i, &vlanmc); + ret = priv->ops->get_vlan_mc(priv, i, &vlanmc); if (ret) return ret; @@ -376,9 +376,9 @@ int rtl8366_vlan_del(struct dsa_switch *ds, int port, vlanmc.priority = 0; vlanmc.fid = 0; } - ret = smi->ops->set_vlan_mc(smi, i, &vlanmc); + ret = priv->ops->set_vlan_mc(priv, i, &vlanmc); if (ret) { - dev_err(smi->dev, + dev_err(priv->dev, "failed to remove VLAN %04x\n", vlan->vid); return ret; @@ -394,15 +394,15 @@ EXPORT_SYMBOL_GPL(rtl8366_vlan_del); void rtl8366_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; struct rtl8366_mib_counter *mib; int i; - if (port >= smi->num_ports) + if (port >= priv->num_ports) return; - for (i = 0; i < smi->num_mib_counters; i++) { - mib = &smi->mib_counters[i]; + for (i = 0; i < priv->num_mib_counters; i++) { + mib = &priv->mib_counters[i]; strncpy(data + i * ETH_GSTRING_LEN, mib->name, ETH_GSTRING_LEN); } @@ -411,35 +411,35 @@ EXPORT_SYMBOL_GPL(rtl8366_get_strings); int rtl8366_get_sset_count(struct dsa_switch *ds, int port, int sset) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; /* We only support SS_STATS */ if (sset != ETH_SS_STATS) return 0; - if (port >= smi->num_ports) + if (port >= priv->num_ports) return -EINVAL; - return smi->num_mib_counters; + return priv->num_mib_counters; } EXPORT_SYMBOL_GPL(rtl8366_get_sset_count); void rtl8366_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; int i; int ret; - if (port >= smi->num_ports) + if (port >= priv->num_ports) return; - for (i = 0; i < smi->num_mib_counters; i++) { + for (i = 0; i < priv->num_mib_counters; i++) { struct rtl8366_mib_counter *mib; u64 mibvalue = 0; - mib = &smi->mib_counters[i]; - ret = smi->ops->get_mib_counter(smi, port, mib, &mibvalue); + mib = &priv->mib_counters[i]; + ret = priv->ops->get_mib_counter(priv, port, mib, &mibvalue); if (ret) { - dev_err(smi->dev, "error reading MIB counter %s\n", + dev_err(priv->dev, "error reading MIB counter %s\n", mib->name); } data[i] = mibvalue; diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c index ecc19bd5115f..fb6565e68401 100644 --- a/drivers/net/dsa/rtl8366rb.c +++ b/drivers/net/dsa/realtek/rtl8366rb.c @@ -21,7 +21,7 @@ #include <linux/of_irq.h> #include <linux/regmap.h> -#include "realtek-smi-core.h" +#include "realtek.h" #define RTL8366RB_PORT_NUM_CPU 5 #define RTL8366RB_NUM_PORTS 6 @@ -396,7 +396,7 @@ static struct rtl8366_mib_counter rtl8366rb_mib_counters[] = { { 0, 70, 2, "IfOutBroadcastPkts" }, }; -static int rtl8366rb_get_mib_counter(struct realtek_smi *smi, +static int rtl8366rb_get_mib_counter(struct realtek_priv *priv, int port, struct rtl8366_mib_counter *mib, u64 *mibvalue) @@ -412,12 +412,12 @@ static int rtl8366rb_get_mib_counter(struct realtek_smi *smi, /* Writing access counter address first * then ASIC will prepare 64bits counter wait for being retrived */ - ret = regmap_write(smi->map, addr, 0); /* Write whatever */ + ret = regmap_write(priv->map, addr, 0); /* Write whatever */ if (ret) return ret; /* Read MIB control register */ - ret = regmap_read(smi->map, RTL8366RB_MIB_CTRL_REG, &val); + ret = regmap_read(priv->map, RTL8366RB_MIB_CTRL_REG, &val); if (ret) return -EIO; @@ -430,7 +430,7 @@ static int rtl8366rb_get_mib_counter(struct realtek_smi *smi, /* Read each individual MIB 16 bits at the time */ *mibvalue = 0; for (i = mib->length; i > 0; i--) { - ret = regmap_read(smi->map, addr + (i - 1), &val); + ret = regmap_read(priv->map, addr + (i - 1), &val); if (ret) return ret; *mibvalue = (*mibvalue << 16) | (val & 0xFFFF); @@ -455,38 +455,38 @@ static u32 rtl8366rb_get_irqmask(struct irq_data *d) static void rtl8366rb_mask_irq(struct irq_data *d) { - struct realtek_smi *smi = irq_data_get_irq_chip_data(d); + struct realtek_priv *priv = irq_data_get_irq_chip_data(d); int ret; - ret = regmap_update_bits(smi->map, RTL8366RB_INTERRUPT_MASK_REG, + ret = regmap_update_bits(priv->map, RTL8366RB_INTERRUPT_MASK_REG, rtl8366rb_get_irqmask(d), 0); if (ret) - dev_err(smi->dev, "could not mask IRQ\n"); + dev_err(priv->dev, "could not mask IRQ\n"); } static void rtl8366rb_unmask_irq(struct irq_data *d) { - struct realtek_smi *smi = irq_data_get_irq_chip_data(d); + struct realtek_priv *priv = irq_data_get_irq_chip_data(d); int ret; - ret = regmap_update_bits(smi->map, RTL8366RB_INTERRUPT_MASK_REG, + ret = regmap_update_bits(priv->map, RTL8366RB_INTERRUPT_MASK_REG, rtl8366rb_get_irqmask(d), rtl8366rb_get_irqmask(d)); if (ret) - dev_err(smi->dev, "could not unmask IRQ\n"); + dev_err(priv->dev, "could not unmask IRQ\n"); } static irqreturn_t rtl8366rb_irq(int irq, void *data) { - struct realtek_smi *smi = data; + struct realtek_priv *priv = data; u32 stat; int ret; /* This clears the IRQ status register */ - ret = regmap_read(smi->map, RTL8366RB_INTERRUPT_STATUS_REG, + ret = regmap_read(priv->map, RTL8366RB_INTERRUPT_STATUS_REG, &stat); if (ret) { - dev_err(smi->dev, "can't read interrupt status\n"); + dev_err(priv->dev, "can't read interrupt status\n"); return IRQ_NONE; } stat &= RTL8366RB_INTERRUPT_VALID; @@ -502,7 +502,7 @@ static irqreturn_t rtl8366rb_irq(int irq, void *data) */ if (line < 12 && line > 5) line -= 5; - child_irq = irq_find_mapping(smi->irqdomain, line); + child_irq = irq_find_mapping(priv->irqdomain, line); handle_nested_irq(child_irq); } return IRQ_HANDLED; @@ -538,7 +538,7 @@ static const struct irq_domain_ops rtl8366rb_irqdomain_ops = { .xlate = irq_domain_xlate_onecell, }; -static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi) +static int rtl8366rb_setup_cascaded_irq(struct realtek_priv *priv) { struct device_node *intc; unsigned long irq_trig; @@ -547,24 +547,24 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi) u32 val; int i; - intc = of_get_child_by_name(smi->dev->of_node, "interrupt-controller"); + intc = of_get_child_by_name(priv->dev->of_node, "interrupt-controller"); if (!intc) { - dev_err(smi->dev, "missing child interrupt-controller node\n"); + dev_err(priv->dev, "missing child interrupt-controller node\n"); return -EINVAL; } /* RB8366RB IRQs cascade off this one */ irq = of_irq_get(intc, 0); if (irq <= 0) { - dev_err(smi->dev, "failed to get parent IRQ\n"); + dev_err(priv->dev, "failed to get parent IRQ\n"); ret = irq ? irq : -EINVAL; goto out_put_node; } /* This clears the IRQ status register */ - ret = regmap_read(smi->map, RTL8366RB_INTERRUPT_STATUS_REG, + ret = regmap_read(priv->map, RTL8366RB_INTERRUPT_STATUS_REG, &val); if (ret) { - dev_err(smi->dev, "can't read interrupt status\n"); + dev_err(priv->dev, "can't read interrupt status\n"); goto out_put_node; } @@ -573,48 +573,48 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi) switch (irq_trig) { case IRQF_TRIGGER_RISING: case IRQF_TRIGGER_HIGH: - dev_info(smi->dev, "active high/rising IRQ\n"); + dev_info(priv->dev, "active high/rising IRQ\n"); val = 0; break; case IRQF_TRIGGER_FALLING: case IRQF_TRIGGER_LOW: - dev_info(smi->dev, "active low/falling IRQ\n"); + dev_info(priv->dev, "active low/falling IRQ\n"); val = RTL8366RB_INTERRUPT_POLARITY; break; } - ret = regmap_update_bits(smi->map, RTL8366RB_INTERRUPT_CONTROL_REG, + ret = regmap_update_bits(priv->map, RTL8366RB_INTERRUPT_CONTROL_REG, RTL8366RB_INTERRUPT_POLARITY, val); if (ret) { - dev_err(smi->dev, "could not configure IRQ polarity\n"); + dev_err(priv->dev, "could not configure IRQ polarity\n"); goto out_put_node; } - ret = devm_request_threaded_irq(smi->dev, irq, NULL, + ret = devm_request_threaded_irq(priv->dev, irq, NULL, rtl8366rb_irq, IRQF_ONESHOT, - "RTL8366RB", smi); + "RTL8366RB", priv); if (ret) { - dev_err(smi->dev, "unable to request irq: %d\n", ret); + dev_err(priv->dev, "unable to request irq: %d\n", ret); goto out_put_node; } - smi->irqdomain = irq_domain_add_linear(intc, - RTL8366RB_NUM_INTERRUPT, - &rtl8366rb_irqdomain_ops, - smi); - if (!smi->irqdomain) { - dev_err(smi->dev, "failed to create IRQ domain\n"); + priv->irqdomain = irq_domain_add_linear(intc, + RTL8366RB_NUM_INTERRUPT, + &rtl8366rb_irqdomain_ops, + priv); + if (!priv->irqdomain) { + dev_err(priv->dev, "failed to create IRQ domain\n"); ret = -EINVAL; goto out_put_node; } - for (i = 0; i < smi->num_ports; i++) - irq_set_parent(irq_create_mapping(smi->irqdomain, i), irq); + for (i = 0; i < priv->num_ports; i++) + irq_set_parent(irq_create_mapping(priv->irqdomain, i), irq); out_put_node: of_node_put(intc); return ret; } -static int rtl8366rb_set_addr(struct realtek_smi *smi) +static int rtl8366rb_set_addr(struct realtek_priv *priv) { u8 addr[ETH_ALEN]; u16 val; @@ -622,18 +622,18 @@ static int rtl8366rb_set_addr(struct realtek_smi *smi) eth_random_addr(addr); - dev_info(smi->dev, "set MAC: %02X:%02X:%02X:%02X:%02X:%02X\n", + dev_info(priv->dev, "set MAC: %02X:%02X:%02X:%02X:%02X:%02X\n", addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); val = addr[0] << 8 | addr[1]; - ret = regmap_write(smi->map, RTL8366RB_SMAR0, val); + ret = regmap_write(priv->map, RTL8366RB_SMAR0, val); if (ret) return ret; val = addr[2] << 8 | addr[3]; - ret = regmap_write(smi->map, RTL8366RB_SMAR1, val); + ret = regmap_write(priv->map, RTL8366RB_SMAR1, val); if (ret) return ret; val = addr[4] << 8 | addr[5]; - ret = regmap_write(smi->map, RTL8366RB_SMAR2, val); + ret = regmap_write(priv->map, RTL8366RB_SMAR2, val); if (ret) return ret; @@ -765,7 +765,7 @@ static const struct rtl8366rb_jam_tbl_entry rtl8366rb_green_jam[] = { /* Function that jams the tables in the proper registers */ static int rtl8366rb_jam_table(const struct rtl8366rb_jam_tbl_entry *jam_table, - int jam_size, struct realtek_smi *smi, + int jam_size, struct realtek_priv *priv, bool write_dbg) { u32 val; @@ -774,24 +774,24 @@ static int rtl8366rb_jam_table(const struct rtl8366rb_jam_tbl_entry *jam_table, for (i = 0; i < jam_size; i++) { if ((jam_table[i].reg & 0xBE00) == 0xBE00) { - ret = regmap_read(smi->map, + ret = regmap_read(priv->map, RTL8366RB_PHY_ACCESS_BUSY_REG, &val); if (ret) return ret; if (!(val & RTL8366RB_PHY_INT_BUSY)) { - ret = regmap_write(smi->map, - RTL8366RB_PHY_ACCESS_CTRL_REG, - RTL8366RB_PHY_CTRL_WRITE); + ret = regmap_write(priv->map, + RTL8366RB_PHY_ACCESS_CTRL_REG, + RTL8366RB_PHY_CTRL_WRITE); if (ret) return ret; } } if (write_dbg) - dev_dbg(smi->dev, "jam %04x into register %04x\n", + dev_dbg(priv->dev, "jam %04x into register %04x\n", jam_table[i].val, jam_table[i].reg); - ret = regmap_write(smi->map, + ret = regmap_write(priv->map, jam_table[i].reg, jam_table[i].val); if (ret) @@ -802,7 +802,7 @@ static int rtl8366rb_jam_table(const struct rtl8366rb_jam_tbl_entry *jam_table, static int rtl8366rb_setup(struct dsa_switch *ds) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; const struct rtl8366rb_jam_tbl_entry *jam_table; struct rtl8366rb *rb; u32 chip_ver = 0; @@ -812,11 +812,11 @@ static int rtl8366rb_setup(struct dsa_switch *ds) int ret; int i; - rb = smi->chip_data; + rb = priv->chip_data; - ret = regmap_read(smi->map, RTL8366RB_CHIP_ID_REG, &chip_id); + ret = regmap_read(priv->map, RTL8366RB_CHIP_ID_REG, &chip_id); if (ret) { - dev_err(smi->dev, "unable to read chip id\n"); + dev_err(priv->dev, "unable to read chip id\n"); return ret; } @@ -824,18 +824,18 @@ static int rtl8366rb_setup(struct dsa_switch *ds) case RTL8366RB_CHIP_ID_8366: break; default: - dev_err(smi->dev, "unknown chip id (%04x)\n", chip_id); + dev_err(priv->dev, "unknown chip id (%04x)\n", chip_id); return -ENODEV; } - ret = regmap_read(smi->map, RTL8366RB_CHIP_VERSION_CTRL_REG, + ret = regmap_read(priv->map, RTL8366RB_CHIP_VERSION_CTRL_REG, &chip_ver); if (ret) { - dev_err(smi->dev, "unable to read chip version\n"); + dev_err(priv->dev, "unable to read chip version\n"); return ret; } - dev_info(smi->dev, "RTL%04x ver %u chip found\n", + dev_info(priv->dev, "RTL%04x ver %u chip found\n", chip_id, chip_ver & RTL8366RB_CHIP_VERSION_MASK); /* Do the init dance using the right jam table */ @@ -872,20 +872,20 @@ static int rtl8366rb_setup(struct dsa_switch *ds) jam_size = ARRAY_SIZE(rtl8366rb_init_jam_dgn3500); } - ret = rtl8366rb_jam_table(jam_table, jam_size, smi, true); + ret = rtl8366rb_jam_table(jam_table, jam_size, priv, true); if (ret) return ret; /* Isolate all user ports so they can only send packets to itself and the CPU port */ for (i = 0; i < RTL8366RB_PORT_NUM_CPU; i++) { - ret = regmap_write(smi->map, RTL8366RB_PORT_ISO(i), + ret = regmap_write(priv->map, RTL8366RB_PORT_ISO(i), RTL8366RB_PORT_ISO_PORTS(BIT(RTL8366RB_PORT_NUM_CPU)) | RTL8366RB_PORT_ISO_EN); if (ret) return ret; } /* CPU port can send packets to all ports */ - ret = regmap_write(smi->map, RTL8366RB_PORT_ISO(RTL8366RB_PORT_NUM_CPU), + ret = regmap_write(priv->map, RTL8366RB_PORT_ISO(RTL8366RB_PORT_NUM_CPU), RTL8366RB_PORT_ISO_PORTS(dsa_user_ports(ds)) | RTL8366RB_PORT_ISO_EN); if (ret) @@ -893,26 +893,26 @@ static int rtl8366rb_setup(struct dsa_switch *ds) /* Set up the "green ethernet" feature */ ret = rtl8366rb_jam_table(rtl8366rb_green_jam, - ARRAY_SIZE(rtl8366rb_green_jam), smi, false); + ARRAY_SIZE(rtl8366rb_green_jam), priv, false); if (ret) return ret; - ret = regmap_write(smi->map, + ret = regmap_write(priv->map, RTL8366RB_GREEN_FEATURE_REG, (chip_ver == 1) ? 0x0007 : 0x0003); if (ret) return ret; /* Vendor driver sets 0x240 in registers 0xc and 0xd (undocumented) */ - ret = regmap_write(smi->map, 0x0c, 0x240); + ret = regmap_write(priv->map, 0x0c, 0x240); if (ret) return ret; - ret = regmap_write(smi->map, 0x0d, 0x240); + ret = regmap_write(priv->map, 0x0d, 0x240); if (ret) return ret; /* Set some random MAC address */ - ret = rtl8366rb_set_addr(smi); + ret = rtl8366rb_set_addr(priv); if (ret) return ret; @@ -921,21 +921,21 @@ static int rtl8366rb_setup(struct dsa_switch *ds) * If you set RTL8368RB_CPU_NO_TAG (bit 15) in this registers * the custom tag is turned off. */ - ret = regmap_update_bits(smi->map, RTL8368RB_CPU_CTRL_REG, + ret = regmap_update_bits(priv->map, RTL8368RB_CPU_CTRL_REG, 0xFFFF, - BIT(smi->cpu_port)); + BIT(priv->cpu_port)); if (ret) return ret; /* Make sure we default-enable the fixed CPU port */ - ret = regmap_update_bits(smi->map, RTL8366RB_PECR, - BIT(smi->cpu_port), + ret = regmap_update_bits(priv->map, RTL8366RB_PECR, + BIT(priv->cpu_port), 0); if (ret) return ret; /* Set maximum packet length to 1536 bytes */ - ret = regmap_update_bits(smi->map, RTL8366RB_SGCR, + ret = regmap_update_bits(priv->map, RTL8366RB_SGCR, RTL8366RB_SGCR_MAX_LENGTH_MASK, RTL8366RB_SGCR_MAX_LENGTH_1536); if (ret) @@ -945,13 +945,13 @@ static int rtl8366rb_setup(struct dsa_switch *ds) rb->max_mtu[i] = 1532; /* Disable learning for all ports */ - ret = regmap_write(smi->map, RTL8366RB_PORT_LEARNDIS_CTRL, + ret = regmap_write(priv->map, RTL8366RB_PORT_LEARNDIS_CTRL, RTL8366RB_PORT_ALL); if (ret) return ret; /* Enable auto ageing for all ports */ - ret = regmap_write(smi->map, RTL8366RB_SECURITY_CTRL, 0); + ret = regmap_write(priv->map, RTL8366RB_SECURITY_CTRL, 0); if (ret) return ret; @@ -962,30 +962,30 @@ static int rtl8366rb_setup(struct dsa_switch *ds) * connected to something exotic such as fiber, then this might * be worth experimenting with. */ - ret = regmap_update_bits(smi->map, RTL8366RB_PMC0, + ret = regmap_update_bits(priv->map, RTL8366RB_PMC0, RTL8366RB_PMC0_P4_IOMODE_MASK, 0 << RTL8366RB_PMC0_P4_IOMODE_SHIFT); if (ret) return ret; /* Accept all packets by default, we enable filtering on-demand */ - ret = regmap_write(smi->map, RTL8366RB_VLAN_INGRESS_CTRL1_REG, + ret = regmap_write(priv->map, RTL8366RB_VLAN_INGRESS_CTRL1_REG, 0); if (ret) return ret; - ret = regmap_write(smi->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG, + ret = regmap_write(priv->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG, 0); if (ret) return ret; /* Don't drop packets whose DA has not been learned */ - ret = regmap_update_bits(smi->map, RTL8366RB_SSCR2, + ret = regmap_update_bits(priv->map, RTL8366RB_SSCR2, RTL8366RB_SSCR2_DROP_UNKNOWN_DA, 0); if (ret) return ret; /* Set blinking, TODO: make this configurable */ - ret = regmap_update_bits(smi->map, RTL8366RB_LED_BLINKRATE_REG, + ret = regmap_update_bits(priv->map, RTL8366RB_LED_BLINKRATE_REG, RTL8366RB_LED_BLINKRATE_MASK, RTL8366RB_LED_BLINKRATE_56MS); if (ret) @@ -996,15 +996,15 @@ static int rtl8366rb_setup(struct dsa_switch *ds) * behaviour (no individual config) but we can set up each * LED separately. */ - if (smi->leds_disabled) { + if (priv->leds_disabled) { /* Turn everything off */ - regmap_update_bits(smi->map, + regmap_update_bits(priv->map, RTL8366RB_LED_0_1_CTRL_REG, 0x0FFF, 0); - regmap_update_bits(smi->map, + regmap_update_bits(priv->map, RTL8366RB_LED_2_3_CTRL_REG, 0x0FFF, 0); - regmap_update_bits(smi->map, + regmap_update_bits(priv->map, RTL8366RB_INTERRUPT_CONTROL_REG, RTL8366RB_P4_RGMII_LED, 0); @@ -1014,7 +1014,7 @@ static int rtl8366rb_setup(struct dsa_switch *ds) val = RTL8366RB_LED_FORCE; } for (i = 0; i < 4; i++) { - ret = regmap_update_bits(smi->map, + ret = regmap_update_bits(priv->map, RTL8366RB_LED_CTRL_REG, 0xf << (i * 4), val << (i * 4)); @@ -1022,18 +1022,20 @@ static int rtl8366rb_setup(struct dsa_switch *ds) return ret; } - ret = rtl8366_reset_vlan(smi); + ret = rtl8366_reset_vlan(priv); if (ret) return ret; - ret = rtl8366rb_setup_cascaded_irq(smi); + ret = rtl8366rb_setup_cascaded_irq(priv); if (ret) - dev_info(smi->dev, "no interrupt support\n"); + dev_info(priv->dev, "no interrupt support\n"); - ret = realtek_smi_setup_mdio(smi); - if (ret) { - dev_info(smi->dev, "could not set up MDIO bus\n"); - return -ENODEV; + if (priv->setup_interface) { + ret = priv->setup_interface(ds); + if (ret) { + dev_err(priv->dev, "could not set up MDIO bus\n"); + return -ENODEV; + } } return 0; @@ -1052,35 +1054,35 @@ rtl8366rb_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface, struct phy_device *phydev, int speed, int duplex, bool tx_pause, bool rx_pause) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; int ret; - if (port != smi->cpu_port) + if (port != priv->cpu_port) return; - dev_dbg(smi->dev, "MAC link up on CPU port (%d)\n", port); + dev_dbg(priv->dev, "MAC link up on CPU port (%d)\n", port); /* Force the fixed CPU port into 1Gbit mode, no autonegotiation */ - ret = regmap_update_bits(smi->map, RTL8366RB_MAC_FORCE_CTRL_REG, + ret = regmap_update_bits(priv->map, RTL8366RB_MAC_FORCE_CTRL_REG, BIT(port), BIT(port)); if (ret) { - dev_err(smi->dev, "failed to force 1Gbit on CPU port\n"); + dev_err(priv->dev, "failed to force 1Gbit on CPU port\n"); return; } - ret = regmap_update_bits(smi->map, RTL8366RB_PAACR2, + ret = regmap_update_bits(priv->map, RTL8366RB_PAACR2, 0xFF00U, RTL8366RB_PAACR_CPU_PORT << 8); if (ret) { - dev_err(smi->dev, "failed to set PAACR on CPU port\n"); + dev_err(priv->dev, "failed to set PAACR on CPU port\n"); return; } /* Enable the CPU port */ - ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port), + ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port), 0); if (ret) { - dev_err(smi->dev, "failed to enable the CPU port\n"); + dev_err(priv->dev, "failed to enable the CPU port\n"); return; } } @@ -1089,99 +1091,99 @@ static void rtl8366rb_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; int ret; - if (port != smi->cpu_port) + if (port != priv->cpu_port) return; - dev_dbg(smi->dev, "MAC link down on CPU port (%d)\n", port); + dev_dbg(priv->dev, "MAC link down on CPU port (%d)\n", port); /* Disable the CPU port */ - ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port), + ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port), BIT(port)); if (ret) { - dev_err(smi->dev, "failed to disable the CPU port\n"); + dev_err(priv->dev, "failed to disable the CPU port\n"); return; } } -static void rb8366rb_set_port_led(struct realtek_smi *smi, +static void rb8366rb_set_port_led(struct realtek_priv *priv, int port, bool enable) { u16 val = enable ? 0x3f : 0; int ret; - if (smi->leds_disabled) + if (priv->leds_disabled) return; switch (port) { case 0: - ret = regmap_update_bits(smi->map, + ret = regmap_update_bits(priv->map, RTL8366RB_LED_0_1_CTRL_REG, 0x3F, val); break; case 1: - ret = regmap_update_bits(smi->map, + ret = regmap_update_bits(priv->map, RTL8366RB_LED_0_1_CTRL_REG, 0x3F << RTL8366RB_LED_1_OFFSET, val << RTL8366RB_LED_1_OFFSET); break; case 2: - ret = regmap_update_bits(smi->map, + ret = regmap_update_bits(priv->map, RTL8366RB_LED_2_3_CTRL_REG, 0x3F, val); break; case 3: - ret = regmap_update_bits(smi->map, + ret = regmap_update_bits(priv->map, RTL8366RB_LED_2_3_CTRL_REG, 0x3F << RTL8366RB_LED_3_OFFSET, val << RTL8366RB_LED_3_OFFSET); break; case 4: - ret = regmap_update_bits(smi->map, + ret = regmap_update_bits(priv->map, RTL8366RB_INTERRUPT_CONTROL_REG, RTL8366RB_P4_RGMII_LED, enable ? RTL8366RB_P4_RGMII_LED : 0); break; default: - dev_err(smi->dev, "no LED for port %d\n", port); + dev_err(priv->dev, "no LED for port %d\n", port); return; } if (ret) - dev_err(smi->dev, "error updating LED on port %d\n", port); + dev_err(priv->dev, "error updating LED on port %d\n", port); } static int rtl8366rb_port_enable(struct dsa_switch *ds, int port, struct phy_device *phy) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; int ret; - dev_dbg(smi->dev, "enable port %d\n", port); - ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port), + dev_dbg(priv->dev, "enable port %d\n", port); + ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port), 0); if (ret) return ret; - rb8366rb_set_port_led(smi, port, true); + rb8366rb_set_port_led(priv, port, true); return 0; } static void rtl8366rb_port_disable(struct dsa_switch *ds, int port) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; int ret; - dev_dbg(smi->dev, "disable port %d\n", port); - ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port), + dev_dbg(priv->dev, "disable port %d\n", port); + ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port), BIT(port)); if (ret) return; - rb8366rb_set_port_led(smi, port, false); + rb8366rb_set_port_led(priv, port, false); } static int @@ -1189,7 +1191,7 @@ rtl8366rb_port_bridge_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, bool *tx_fwd_offload) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; unsigned int port_bitmap = 0; int ret, i; @@ -1202,17 +1204,17 @@ rtl8366rb_port_bridge_join(struct dsa_switch *ds, int port, if (!dsa_port_offloads_bridge(dsa_to_port(ds, i), &bridge)) continue; /* Join this port to each other port on the bridge */ - ret = regmap_update_bits(smi->map, RTL8366RB_PORT_ISO(i), + ret = regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(i), RTL8366RB_PORT_ISO_PORTS(BIT(port)), RTL8366RB_PORT_ISO_PORTS(BIT(port))); if (ret) - dev_err(smi->dev, "failed to join port %d\n", port); + dev_err(priv->dev, "failed to join port %d\n", port); port_bitmap |= BIT(i); } /* Set the bits for the ports we can access */ - return regmap_update_bits(smi->map, RTL8366RB_PORT_ISO(port), + return regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(port), RTL8366RB_PORT_ISO_PORTS(port_bitmap), RTL8366RB_PORT_ISO_PORTS(port_bitmap)); } @@ -1221,7 +1223,7 @@ static void rtl8366rb_port_bridge_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; unsigned int port_bitmap = 0; int ret, i; @@ -1234,28 +1236,30 @@ rtl8366rb_port_bridge_leave(struct dsa_switch *ds, int port, if (!dsa_port_offloads_bridge(dsa_to_port(ds, i), &bridge)) continue; /* Remove this port from any other port on the bridge */ - ret = regmap_update_bits(smi->map, RTL8366RB_PORT_ISO(i), + ret = regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(i), RTL8366RB_PORT_ISO_PORTS(BIT(port)), 0); if (ret) - dev_err(smi->dev, "failed to leave port %d\n", port); + dev_err(priv->dev, "failed to leave port %d\n", port); port_bitmap |= BIT(i); } /* Clear the bits for the ports we can not access, leave ourselves */ - regmap_update_bits(smi->map, RTL8366RB_PORT_ISO(port), + regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(port), RTL8366RB_PORT_ISO_PORTS(port_bitmap), 0); } /** * rtl8366rb_drop_untagged() - make the switch drop untagged and C-tagged frames - * @smi: SMI state container + * @priv: SMI state container * @port: the port to drop untagged and C-tagged frames on * @drop: whether to drop or pass untagged and C-tagged frames + * + * Return: zero for success, a negative number on error. */ -static int rtl8366rb_drop_untagged(struct realtek_smi *smi, int port, bool drop) +static int rtl8366rb_drop_untagged(struct realtek_priv *priv, int port, bool drop) { - return regmap_update_bits(smi->map, RTL8366RB_VLAN_INGRESS_CTRL1_REG, + return regmap_update_bits(priv->map, RTL8366RB_VLAN_INGRESS_CTRL1_REG, RTL8366RB_VLAN_INGRESS_CTRL1_DROP(port), drop ? RTL8366RB_VLAN_INGRESS_CTRL1_DROP(port) : 0); } @@ -1264,17 +1268,17 @@ static int rtl8366rb_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, struct netlink_ext_ack *extack) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; struct rtl8366rb *rb; int ret; - rb = smi->chip_data; + rb = priv->chip_data; - dev_dbg(smi->dev, "port %d: %s VLAN filtering\n", port, + dev_dbg(priv->dev, "port %d: %s VLAN filtering\n", port, vlan_filtering ? "enable" : "disable"); /* If the port is not in the member set, the frame will be dropped */ - ret = regmap_update_bits(smi->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG, + ret = regmap_update_bits(priv->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG, BIT(port), vlan_filtering ? BIT(port) : 0); if (ret) return ret; @@ -1284,9 +1288,9 @@ static int rtl8366rb_vlan_filtering(struct dsa_switch *ds, int port, * filtering on a port, we need to accept any frames. */ if (vlan_filtering) - ret = rtl8366rb_drop_untagged(smi, port, !rb->pvid_enabled[port]); + ret = rtl8366rb_drop_untagged(priv, port, !rb->pvid_enabled[port]); else - ret = rtl8366rb_drop_untagged(smi, port, false); + ret = rtl8366rb_drop_untagged(priv, port, false); return ret; } @@ -1308,11 +1312,11 @@ rtl8366rb_port_bridge_flags(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; int ret; if (flags.mask & BR_LEARNING) { - ret = regmap_update_bits(smi->map, RTL8366RB_PORT_LEARNDIS_CTRL, + ret = regmap_update_bits(priv->map, RTL8366RB_PORT_LEARNDIS_CTRL, BIT(port), (flags.val & BR_LEARNING) ? 0 : BIT(port)); if (ret) @@ -1325,7 +1329,7 @@ rtl8366rb_port_bridge_flags(struct dsa_switch *ds, int port, static void rtl8366rb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; u32 val; int i; @@ -1344,13 +1348,13 @@ rtl8366rb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) val = RTL8366RB_STP_STATE_FORWARDING; break; default: - dev_err(smi->dev, "unknown bridge state requested\n"); + dev_err(priv->dev, "unknown bridge state requested\n"); return; } /* Set the same status for the port on all the FIDs */ for (i = 0; i < RTL8366RB_NUM_FIDS; i++) { - regmap_update_bits(smi->map, RTL8366RB_STP_STATE_BASE + i, + regmap_update_bits(priv->map, RTL8366RB_STP_STATE_BASE + i, RTL8366RB_STP_STATE_MASK(port), RTL8366RB_STP_STATE(port, val)); } @@ -1359,26 +1363,26 @@ rtl8366rb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) static void rtl8366rb_port_fast_age(struct dsa_switch *ds, int port) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; /* This will age out any learned L2 entries */ - regmap_update_bits(smi->map, RTL8366RB_SECURITY_CTRL, + regmap_update_bits(priv->map, RTL8366RB_SECURITY_CTRL, BIT(port), BIT(port)); /* Restore the normal state of things */ - regmap_update_bits(smi->map, RTL8366RB_SECURITY_CTRL, + regmap_update_bits(priv->map, RTL8366RB_SECURITY_CTRL, BIT(port), 0); } static int rtl8366rb_change_mtu(struct dsa_switch *ds, int port, int new_mtu) { - struct realtek_smi *smi = ds->priv; + struct realtek_priv *priv = ds->priv; struct rtl8366rb *rb; unsigned int max_mtu; u32 len; int i; /* Cache the per-port MTU setting */ - rb = smi->chip_data; + rb = priv->chip_data; rb->max_mtu[port] = new_mtu; /* Roof out the MTU for the entire switch to the greatest @@ -1406,7 +1410,7 @@ static int rtl8366rb_change_mtu(struct dsa_switch *ds, int port, int new_mtu) else len = RTL8366RB_SGCR_MAX_LENGTH_16000; - return regmap_update_bits(smi->map, RTL8366RB_SGCR, + return regmap_update_bits(priv->map, RTL8366RB_SGCR, RTL8366RB_SGCR_MAX_LENGTH_MASK, len); } @@ -1419,7 +1423,7 @@ static int rtl8366rb_max_mtu(struct dsa_switch *ds, int port) return 15996; } -static int rtl8366rb_get_vlan_4k(struct realtek_smi *smi, u32 vid, +static int rtl8366rb_get_vlan_4k(struct realtek_priv *priv, u32 vid, struct rtl8366_vlan_4k *vlan4k) { u32 data[3]; @@ -1432,19 +1436,19 @@ static int rtl8366rb_get_vlan_4k(struct realtek_smi *smi, u32 vid, return -EINVAL; /* write VID */ - ret = regmap_write(smi->map, RTL8366RB_VLAN_TABLE_WRITE_BASE, + ret = regmap_write(priv->map, RTL8366RB_VLAN_TABLE_WRITE_BASE, vid & RTL8366RB_VLAN_VID_MASK); if (ret) return ret; /* write table access control word */ - ret = regmap_write(smi->map, RTL8366RB_TABLE_ACCESS_CTRL_REG, + ret = regmap_write(priv->map, RTL8366RB_TABLE_ACCESS_CTRL_REG, RTL8366RB_TABLE_VLAN_READ_CTRL); if (ret) return ret; for (i = 0; i < 3; i++) { - ret = regmap_read(smi->map, + ret = regmap_read(priv->map, RTL8366RB_VLAN_TABLE_READ_BASE + i, &data[i]); if (ret) @@ -1460,7 +1464,7 @@ static int rtl8366rb_get_vlan_4k(struct realtek_smi *smi, u32 vid, return 0; } -static int rtl8366rb_set_vlan_4k(struct realtek_smi *smi, +static int rtl8366rb_set_vlan_4k(struct realtek_priv *priv, const struct rtl8366_vlan_4k *vlan4k) { u32 data[3]; @@ -1480,7 +1484,7 @@ static int rtl8366rb_set_vlan_4k(struct realtek_smi *smi, data[2] = vlan4k->fid & RTL8366RB_VLAN_FID_MASK; for (i = 0; i < 3; i++) { - ret = regmap_write(smi->map, + ret = regmap_write(priv->map, RTL8366RB_VLAN_TABLE_WRITE_BASE + i, data[i]); if (ret) @@ -1488,13 +1492,13 @@ static int rtl8366rb_set_vlan_4k(struct realtek_smi *smi, } /* write table access control word */ - ret = regmap_write(smi->map, RTL8366RB_TABLE_ACCESS_CTRL_REG, + ret = regmap_write(priv->map, RTL8366RB_TABLE_ACCESS_CTRL_REG, RTL8366RB_TABLE_VLAN_WRITE_CTRL); return ret; } -static int rtl8366rb_get_vlan_mc(struct realtek_smi *smi, u32 index, +static int rtl8366rb_get_vlan_mc(struct realtek_priv *priv, u32 index, struct rtl8366_vlan_mc *vlanmc) { u32 data[3]; @@ -1507,7 +1511,7 @@ static int rtl8366rb_get_vlan_mc(struct realtek_smi *smi, u32 index, return -EINVAL; for (i = 0; i < 3; i++) { - ret = regmap_read(smi->map, + ret = regmap_read(priv->map, RTL8366RB_VLAN_MC_BASE(index) + i, &data[i]); if (ret) @@ -1525,7 +1529,7 @@ static int rtl8366rb_get_vlan_mc(struct realtek_smi *smi, u32 index, return 0; } -static int rtl8366rb_set_vlan_mc(struct realtek_smi *smi, u32 index, +static int rtl8366rb_set_vlan_mc(struct realtek_priv *priv, u32 index, const struct rtl8366_vlan_mc *vlanmc) { u32 data[3]; @@ -1549,7 +1553,7 @@ static int rtl8366rb_set_vlan_mc(struct realtek_smi *smi, u32 index, data[2] = vlanmc->fid & RTL8366RB_VLAN_FID_MASK; for (i = 0; i < 3; i++) { - ret = regmap_write(smi->map, + ret = regmap_write(priv->map, RTL8366RB_VLAN_MC_BASE(index) + i, data[i]); if (ret) @@ -1559,15 +1563,15 @@ static int rtl8366rb_set_vlan_mc(struct realtek_smi *smi, u32 index, return 0; } -static int rtl8366rb_get_mc_index(struct realtek_smi *smi, int port, int *val) +static int rtl8366rb_get_mc_index(struct realtek_priv *priv, int port, int *val) { u32 data; int ret; - if (port >= smi->num_ports) + if (port >= priv->num_ports) return -EINVAL; - ret = regmap_read(smi->map, RTL8366RB_PORT_VLAN_CTRL_REG(port), + ret = regmap_read(priv->map, RTL8366RB_PORT_VLAN_CTRL_REG(port), &data); if (ret) return ret; @@ -1578,22 +1582,22 @@ static int rtl8366rb_get_mc_index(struct realtek_smi *smi, int port, int *val) return 0; } -static int rtl8366rb_set_mc_index(struct realtek_smi *smi, int port, int index) +static int rtl8366rb_set_mc_index(struct realtek_priv *priv, int port, int index) { struct rtl8366rb *rb; bool pvid_enabled; int ret; - rb = smi->chip_data; + rb = priv->chip_data; pvid_enabled = !!index; - if (port >= smi->num_ports || index >= RTL8366RB_NUM_VLANS) + if (port >= priv->num_ports || index >= RTL8366RB_NUM_VLANS) return -EINVAL; - ret = regmap_update_bits(smi->map, RTL8366RB_PORT_VLAN_CTRL_REG(port), - RTL8366RB_PORT_VLAN_CTRL_MASK << + ret = regmap_update_bits(priv->map, RTL8366RB_PORT_VLAN_CTRL_REG(port), + RTL8366RB_PORT_VLAN_CTRL_MASK << RTL8366RB_PORT_VLAN_CTRL_SHIFT(port), - (index & RTL8366RB_PORT_VLAN_CTRL_MASK) << + (index & RTL8366RB_PORT_VLAN_CTRL_MASK) << RTL8366RB_PORT_VLAN_CTRL_SHIFT(port)); if (ret) return ret; @@ -1604,17 +1608,17 @@ static int rtl8366rb_set_mc_index(struct realtek_smi *smi, int port, int index) * not drop any untagged or C-tagged frames. Make sure to update the * filtering setting. */ - if (dsa_port_is_vlan_filtering(dsa_to_port(smi->ds, port))) - ret = rtl8366rb_drop_untagged(smi, port, !pvid_enabled); + if (dsa_port_is_vlan_filtering(dsa_to_port(priv->ds, port))) + ret = rtl8366rb_drop_untagged(priv, port, !pvid_enabled); return ret; } -static bool rtl8366rb_is_vlan_valid(struct realtek_smi *smi, unsigned int vlan) +static bool rtl8366rb_is_vlan_valid(struct realtek_priv *priv, unsigned int vlan) { unsigned int max = RTL8366RB_NUM_VLANS - 1; - if (smi->vlan4k_enabled) + if (priv->vlan4k_enabled) max = RTL8366RB_NUM_VIDS - 1; if (vlan > max) @@ -1623,23 +1627,23 @@ static bool rtl8366rb_is_vlan_valid(struct realtek_smi *smi, unsigned int vlan) return true; } -static int rtl8366rb_enable_vlan(struct realtek_smi *smi, bool enable) +static int rtl8366rb_enable_vlan(struct realtek_priv *priv, bool enable) { - dev_dbg(smi->dev, "%s VLAN\n", enable ? "enable" : "disable"); - return regmap_update_bits(smi->map, + dev_dbg(priv->dev, "%s VLAN\n", enable ? "enable" : "disable"); + return regmap_update_bits(priv->map, RTL8366RB_SGCR, RTL8366RB_SGCR_EN_VLAN, enable ? RTL8366RB_SGCR_EN_VLAN : 0); } -static int rtl8366rb_enable_vlan4k(struct realtek_smi *smi, bool enable) +static int rtl8366rb_enable_vlan4k(struct realtek_priv *priv, bool enable) { - dev_dbg(smi->dev, "%s VLAN 4k\n", enable ? "enable" : "disable"); - return regmap_update_bits(smi->map, RTL8366RB_SGCR, + dev_dbg(priv->dev, "%s VLAN 4k\n", enable ? "enable" : "disable"); + return regmap_update_bits(priv->map, RTL8366RB_SGCR, RTL8366RB_SGCR_EN_VLAN_4KTB, enable ? RTL8366RB_SGCR_EN_VLAN_4KTB : 0); } -static int rtl8366rb_phy_read(struct realtek_smi *smi, int phy, int regnum) +static int rtl8366rb_phy_read(struct realtek_priv *priv, int phy, int regnum) { u32 val; u32 reg; @@ -1648,32 +1652,32 @@ static int rtl8366rb_phy_read(struct realtek_smi *smi, int phy, int regnum) if (phy > RTL8366RB_PHY_NO_MAX) return -EINVAL; - ret = regmap_write(smi->map, RTL8366RB_PHY_ACCESS_CTRL_REG, + ret = regmap_write(priv->map, RTL8366RB_PHY_ACCESS_CTRL_REG, RTL8366RB_PHY_CTRL_READ); if (ret) return ret; reg = 0x8000 | (1 << (phy + RTL8366RB_PHY_NO_OFFSET)) | regnum; - ret = regmap_write(smi->map, reg, 0); + ret = regmap_write(priv->map, reg, 0); if (ret) { - dev_err(smi->dev, + dev_err(priv->dev, "failed to write PHY%d reg %04x @ %04x, ret %d\n", phy, regnum, reg, ret); return ret; } - ret = regmap_read(smi->map, RTL8366RB_PHY_ACCESS_DATA_REG, &val); + ret = regmap_read(priv->map, RTL8366RB_PHY_ACCESS_DATA_REG, &val); if (ret) return ret; - dev_dbg(smi->dev, "read PHY%d register 0x%04x @ %08x, val <- %04x\n", + dev_dbg(priv->dev, "read PHY%d register 0x%04x @ %08x, val <- %04x\n", phy, regnum, reg, val); return val; } -static int rtl8366rb_phy_write(struct realtek_smi *smi, int phy, int regnum, +static int rtl8366rb_phy_write(struct realtek_priv *priv, int phy, int regnum, u16 val) { u32 reg; @@ -1682,34 +1686,45 @@ static int rtl8366rb_phy_write(struct realtek_smi *smi, int phy, int regnum, if (phy > RTL8366RB_PHY_NO_MAX) return -EINVAL; - ret = regmap_write(smi->map, RTL8366RB_PHY_ACCESS_CTRL_REG, + ret = regmap_write(priv->map, RTL8366RB_PHY_ACCESS_CTRL_REG, RTL8366RB_PHY_CTRL_WRITE); if (ret) return ret; reg = 0x8000 | (1 << (phy + RTL8366RB_PHY_NO_OFFSET)) | regnum; - dev_dbg(smi->dev, "write PHY%d register 0x%04x @ %04x, val -> %04x\n", + dev_dbg(priv->dev, "write PHY%d register 0x%04x @ %04x, val -> %04x\n", phy, regnum, reg, val); - ret = regmap_write(smi->map, reg, val); + ret = regmap_write(priv->map, reg, val); if (ret) return ret; return 0; } -static int rtl8366rb_reset_chip(struct realtek_smi *smi) +static int rtl8366rb_dsa_phy_read(struct dsa_switch *ds, int phy, int regnum) +{ + return rtl8366rb_phy_read(ds->priv, phy, regnum); +} + +static int rtl8366rb_dsa_phy_write(struct dsa_switch *ds, int phy, int regnum, + u16 val) +{ + return rtl8366rb_phy_write(ds->priv, phy, regnum, val); +} + +static int rtl8366rb_reset_chip(struct realtek_priv *priv) { int timeout = 10; u32 val; int ret; - realtek_smi_write_reg_noack(smi, RTL8366RB_RESET_CTRL_REG, - RTL8366RB_CHIP_CTRL_RESET_HW); + priv->write_reg_noack(priv, RTL8366RB_RESET_CTRL_REG, + RTL8366RB_CHIP_CTRL_RESET_HW); do { usleep_range(20000, 25000); - ret = regmap_read(smi->map, RTL8366RB_RESET_CTRL_REG, &val); + ret = regmap_read(priv->map, RTL8366RB_RESET_CTRL_REG, &val); if (ret) return ret; @@ -1718,21 +1733,21 @@ static int rtl8366rb_reset_chip(struct realtek_smi *smi) } while (--timeout); if (!timeout) { - dev_err(smi->dev, "timeout waiting for the switch to reset\n"); + dev_err(priv->dev, "timeout waiting for the switch to reset\n"); return -EIO; } return 0; } -static int rtl8366rb_detect(struct realtek_smi *smi) +static int rtl8366rb_detect(struct realtek_priv *priv) { - struct device *dev = smi->dev; + struct device *dev = priv->dev; int ret; u32 val; /* Detect device */ - ret = regmap_read(smi->map, 0x5c, &val); + ret = regmap_read(priv->map, 0x5c, &val); if (ret) { dev_err(dev, "can't get chip ID (%d)\n", ret); return ret; @@ -1745,11 +1760,11 @@ static int rtl8366rb_detect(struct realtek_smi *smi) return -ENODEV; case 0x5937: dev_info(dev, "found an RTL8366RB switch\n"); - smi->cpu_port = RTL8366RB_PORT_NUM_CPU; - smi->num_ports = RTL8366RB_NUM_PORTS; - smi->num_vlan_mc = RTL8366RB_NUM_VLANS; - smi->mib_counters = rtl8366rb_mib_counters; - smi->num_mib_counters = ARRAY_SIZE(rtl8366rb_mib_counters); + priv->cpu_port = RTL8366RB_PORT_NUM_CPU; + priv->num_ports = RTL8366RB_NUM_PORTS; + priv->num_vlan_mc = RTL8366RB_NUM_VLANS; + priv->mib_counters = rtl8366rb_mib_counters; + priv->num_mib_counters = ARRAY_SIZE(rtl8366rb_mib_counters); break; default: dev_info(dev, "found an Unknown Realtek switch (id=0x%04x)\n", @@ -1757,14 +1772,14 @@ static int rtl8366rb_detect(struct realtek_smi *smi) break; } - ret = rtl8366rb_reset_chip(smi); + ret = rtl8366rb_reset_chip(priv); if (ret) return ret; return 0; } -static const struct dsa_switch_ops rtl8366rb_switch_ops = { +static const struct dsa_switch_ops rtl8366rb_switch_ops_smi = { .get_tag_protocol = rtl8366_get_tag_protocol, .setup = rtl8366rb_setup, .phylink_mac_link_up = rtl8366rb_mac_link_up, @@ -1787,7 +1802,32 @@ static const struct dsa_switch_ops rtl8366rb_switch_ops = { .port_max_mtu = rtl8366rb_max_mtu, }; -static const struct realtek_smi_ops rtl8366rb_smi_ops = { +static const struct dsa_switch_ops rtl8366rb_switch_ops_mdio = { + .get_tag_protocol = rtl8366_get_tag_protocol, + .setup = rtl8366rb_setup, + .phy_read = rtl8366rb_dsa_phy_read, + .phy_write = rtl8366rb_dsa_phy_write, + .phylink_mac_link_up = rtl8366rb_mac_link_up, + .phylink_mac_link_down = rtl8366rb_mac_link_down, + .get_strings = rtl8366_get_strings, + .get_ethtool_stats = rtl8366_get_ethtool_stats, + .get_sset_count = rtl8366_get_sset_count, + .port_bridge_join = rtl8366rb_port_bridge_join, + .port_bridge_leave = rtl8366rb_port_bridge_leave, + .port_vlan_filtering = rtl8366rb_vlan_filtering, + .port_vlan_add = rtl8366_vlan_add, + .port_vlan_del = rtl8366_vlan_del, + .port_enable = rtl8366rb_port_enable, + .port_disable = rtl8366rb_port_disable, + .port_pre_bridge_flags = rtl8366rb_port_pre_bridge_flags, + .port_bridge_flags = rtl8366rb_port_bridge_flags, + .port_stp_state_set = rtl8366rb_port_stp_state_set, + .port_fast_age = rtl8366rb_port_fast_age, + .port_change_mtu = rtl8366rb_change_mtu, + .port_max_mtu = rtl8366rb_max_mtu, +}; + +static const struct realtek_ops rtl8366rb_ops = { .detect = rtl8366rb_detect, .get_vlan_mc = rtl8366rb_get_vlan_mc, .set_vlan_mc = rtl8366rb_set_vlan_mc, @@ -1803,12 +1843,17 @@ static const struct realtek_smi_ops rtl8366rb_smi_ops = { .phy_write = rtl8366rb_phy_write, }; -const struct realtek_smi_variant rtl8366rb_variant = { - .ds_ops = &rtl8366rb_switch_ops, - .ops = &rtl8366rb_smi_ops, +const struct realtek_variant rtl8366rb_variant = { + .ds_ops_smi = &rtl8366rb_switch_ops_smi, + .ds_ops_mdio = &rtl8366rb_switch_ops_mdio, + .ops = &rtl8366rb_ops, .clk_delay = 10, .cmd_read = 0xa9, .cmd_write = 0xa8, .chip_data_sz = sizeof(struct rtl8366rb), }; EXPORT_SYMBOL_GPL(rtl8366rb_variant); + +MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>"); +MODULE_DESCRIPTION("Driver for RTL8366RB ethernet switch"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c index 0730352cdd57..bc06fe6bac6b 100644 --- a/drivers/net/dsa/xrs700x/xrs700x.c +++ b/drivers/net/dsa/xrs700x/xrs700x.c @@ -442,34 +442,27 @@ static void xrs700x_teardown(struct dsa_switch *ds) cancel_delayed_work_sync(&priv->mib_work); } -static void xrs700x_phylink_validate(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state) +static void xrs700x_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; - switch (port) { case 0: + __set_bit(PHY_INTERFACE_MODE_RMII, + config->supported_interfaces); + config->mac_capabilities = MAC_10FD | MAC_100FD; break; + case 1: case 2: case 3: - phylink_set(mask, 1000baseT_Full); + phy_interface_set_rgmii(config->supported_interfaces); + config->mac_capabilities = MAC_10FD | MAC_100FD | MAC_1000FD; break; + default: - linkmode_zero(supported); dev_err(ds->dev, "Unsupported port: %i\n", port); - return; + break; } - - phylink_set_port_modes(mask); - - /* The switch only supports full duplex. */ - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Full); - - linkmode_and(supported, supported, mask); - linkmode_and(state->advertising, state->advertising, mask); } static void xrs700x_mac_link_up(struct dsa_switch *ds, int port, @@ -703,7 +696,7 @@ static const struct dsa_switch_ops xrs700x_ops = { .setup = xrs700x_setup, .teardown = xrs700x_teardown, .port_stp_state_set = xrs700x_port_stp_state_set, - .phylink_validate = xrs700x_phylink_validate, + .phylink_get_caps = xrs700x_phylink_get_caps, .phylink_mac_link_up = xrs700x_mac_link_up, .get_strings = xrs700x_get_strings, .get_sset_count = xrs700x_get_sset_count, diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 53080fd143dc..07444aead3fd 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1400,10 +1400,9 @@ static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag) struct sk_buff *skb; if (!first_frag) - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_copybreak); + skb = napi_alloc_skb(rx_ring->napi, rx_ring->rx_copybreak); else - skb = build_skb(first_frag, ENA_PAGE_SIZE); + skb = napi_build_skb(first_frag, ENA_PAGE_SIZE); if (unlikely(!skb)) { ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4f94136a011a..c313221348c5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -233,6 +233,7 @@ static const u16 bnxt_async_events_arr[] = { ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST, ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP, ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT, + ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE, }; static struct workqueue_struct *bnxt_pf_wq; @@ -2079,6 +2080,16 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2) (BNXT_EVENT_RING_TYPE(data2) == \ ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_RX) +#define BNXT_EVENT_PHC_EVENT_TYPE(data1) \ + (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK) >>\ + ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT) + +#define BNXT_EVENT_PHC_RTC_UPDATE(data1) \ + (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK) >>\ + ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT) + +#define BNXT_PHC_BITS 48 + static int bnxt_async_event_process(struct bnxt *bp, struct hwrm_async_event_cmpl *cmpl) { @@ -2258,6 +2269,24 @@ static int bnxt_async_event_process(struct bnxt *bp, bnxt_event_error_report(bp, data1, data2); goto async_event_process_exit; } + case ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE: { + switch (BNXT_EVENT_PHC_EVENT_TYPE(data1)) { + case ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE: + if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) { + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + u64 ns; + + spin_lock_bh(&ptp->ptp_lock); + bnxt_ptp_update_current_time(bp); + ns = (((u64)BNXT_EVENT_PHC_RTC_UPDATE(data1) << + BNXT_PHC_BITS) | ptp->current_time); + bnxt_ptp_rtc_timecounter_init(ptp, ns); + spin_unlock_bh(&ptp->ptp_lock); + } + break; + } + goto async_event_process_exit; + } case ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE: { u16 seq_id = le32_to_cpu(cmpl->event_data2) & 0xffff; @@ -7414,6 +7443,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) struct hwrm_port_mac_ptp_qcfg_output *resp; struct hwrm_port_mac_ptp_qcfg_input *req; struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + bool phc_cfg; u8 flags; int rc; @@ -7456,7 +7486,8 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) rc = -ENODEV; goto exit; } - rc = bnxt_ptp_init(bp); + phc_cfg = (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED) != 0; + rc = bnxt_ptp_init(bp, phc_cfg); if (rc) netdev_warn(bp->dev, "PTP initialization failed.\n"); exit: @@ -7514,6 +7545,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->fw_cap |= BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED; if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED)) bp->fw_cap |= BNXT_FW_CAP_PTP_PPS; + if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED) + bp->fw_cap |= BNXT_FW_CAP_PTP_RTC; if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_HOT_RESET_IF_SUPPORT)) bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF; if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED)) @@ -10288,6 +10321,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) /* VF-reps may need to be re-opened after the PF is re-opened */ if (BNXT_PF(bp)) bnxt_vf_reps_open(bp); + bnxt_ptp_init_rtc(bp, true); return 0; open_err_irq: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 440dfeb4948b..4b023e35c765 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1957,6 +1957,7 @@ struct bnxt { #define BNXT_FW_CAP_EXT_STATS_SUPPORTED 0x00040000 #define BNXT_FW_CAP_ERR_RECOVER_RELOAD 0x00100000 #define BNXT_FW_CAP_HOT_RESET 0x00200000 + #define BNXT_FW_CAP_PTP_RTC 0x00400000 #define BNXT_FW_CAP_VLAN_RX_STRIP 0x01000000 #define BNXT_FW_CAP_VLAN_TX_INSERT 0x02000000 #define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED 0x04000000 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 003330e8cd58..5edbee92f5c4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -11,6 +11,7 @@ #include <linux/ctype.h> #include <linux/stringify.h> #include <linux/ethtool.h> +#include <linux/ethtool_netlink.h> #include <linux/linkmode.h> #include <linux/interrupt.h> #include <linux/pci.h> @@ -802,9 +803,11 @@ static void bnxt_get_ringparam(struct net_device *dev, if (bp->flags & BNXT_FLAG_AGG_RINGS) { ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT_JUM_ENA; ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT; + kernel_ering->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; } else { ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT; ering->rx_jumbo_max_pending = 0; + kernel_ering->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED; } ering->tx_max_pending = BNXT_MAX_TX_DESC_CNT; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index ea86c54247c7..b7100edbd6dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -369,6 +369,12 @@ struct cmd_nums { #define HWRM_FUNC_PTP_EXT_CFG 0x1a0UL #define HWRM_FUNC_PTP_EXT_QCFG 0x1a1UL #define HWRM_FUNC_KEY_CTX_ALLOC 0x1a2UL + #define HWRM_FUNC_BACKING_STORE_CFG_V2 0x1a3UL + #define HWRM_FUNC_BACKING_STORE_QCFG_V2 0x1a4UL + #define HWRM_FUNC_DBR_PACING_CFG 0x1a5UL + #define HWRM_FUNC_DBR_PACING_QCFG 0x1a6UL + #define HWRM_FUNC_DBR_PACING_BROADCAST_EVENT 0x1a7UL + #define HWRM_FUNC_BACKING_STORE_QCAPS_V2 0x1a8UL #define HWRM_SELFTEST_QLIST 0x200UL #define HWRM_SELFTEST_EXEC 0x201UL #define HWRM_SELFTEST_IRQ 0x202UL @@ -390,6 +396,9 @@ struct cmd_nums { #define HWRM_MFG_PRVSN_IMPORT_CERT 0x212UL #define HWRM_MFG_PRVSN_GET_STATE 0x213UL #define HWRM_MFG_GET_NVM_MEASUREMENT 0x214UL + #define HWRM_MFG_PSOC_QSTATUS 0x215UL + #define HWRM_MFG_SELFTEST_QLIST 0x216UL + #define HWRM_MFG_SELFTEST_EXEC 0x217UL #define HWRM_TF 0x2bcUL #define HWRM_TF_VERSION_GET 0x2bdUL #define HWRM_TF_SESSION_OPEN 0x2c6UL @@ -532,8 +541,8 @@ struct hwrm_err_output { #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 2 -#define HWRM_VERSION_RSVD 63 -#define HWRM_VERSION_STR "1.10.2.63" +#define HWRM_VERSION_RSVD 73 +#define HWRM_VERSION_STR "1.10.2.73" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -757,10 +766,11 @@ struct hwrm_async_event_cmpl { #define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE 0x40UL #define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE 0x41UL #define ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST 0x42UL - #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_MASTER 0x43UL + #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE 0x43UL #define ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP 0x44UL #define ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT 0x45UL - #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x46UL + #define ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD 0x46UL + #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x47UL #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL #define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR @@ -1112,34 +1122,37 @@ struct hwrm_async_event_cmpl_echo_request { __le32 event_data1; }; -/* hwrm_async_event_cmpl_phc_master (size:128b/16B) */ -struct hwrm_async_event_cmpl_phc_master { +/* hwrm_async_event_cmpl_phc_update (size:128b/16B) */ +struct hwrm_async_event_cmpl_phc_update { __le16 type; - #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_MASK 0x3fUL - #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_SFT 0 - #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT 0x2eUL - #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_LAST ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT + #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT __le16 event_id; - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER 0x43UL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_LAST ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE 0x43UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE __le32 event_data2; - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_SFT 0 - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_MASK 0xffff0000UL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_SFT 16 + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_SFT 0 + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_MASK 0xffff0000UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_SFT 16 u8 opaque_v; - #define ASYNC_EVENT_CMPL_PHC_MASTER_V 0x1UL - #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_MASK 0xfeUL - #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_SFT 1 + #define ASYNC_EVENT_CMPL_PHC_UPDATE_V 0x1UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_SFT 1 u8 timestamp_lo; __le16 timestamp_hi; __le32 event_data1; - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_MASK 0xfUL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_SFT 0 - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_MASTER 0x1UL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_SECONDARY 0x2UL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER 0x3UL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_LAST ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK 0xfUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT 0 + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_MASTER 0x1UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_SECONDARY 0x2UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_FAILOVER 0x3UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE 0x4UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK 0xffff0UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT 4 }; /* hwrm_async_event_cmpl_pps_timestamp (size:128b/16B) */ @@ -1330,6 +1343,30 @@ struct hwrm_async_event_cmpl_error_report_nvm { #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_ERASE }; +/* hwrm_async_event_cmpl_error_report_doorbell_drop_threshold (size:128b/16B) */ +struct hwrm_async_event_cmpl_error_report_doorbell_drop_threshold { + __le16 type; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT 0x45UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_V 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD +}; + /* hwrm_func_reset_input (size:192b/24B) */ struct hwrm_func_reset_input { __le16 req_type; @@ -1589,6 +1626,10 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_EXT_EP_RATE_CONTROL 0x800000UL #define FUNC_QCAPS_RESP_FLAGS_EXT_MIN_BW_SUPPORTED 0x1000000UL #define FUNC_QCAPS_RESP_FLAGS_EXT_TX_COAL_CMPL_CAP 0x2000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED 0x4000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_REQUIRED 0x8000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED 0x10000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_DBR_PACING_SUPPORTED 0x20000000UL u8 max_schqs; u8 mpc_chnls_cap; #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE 0x1UL @@ -2455,7 +2496,7 @@ struct hwrm_func_backing_store_qcaps_output { __le16 rkc_entry_size; __le32 tkc_max_entries; __le32 rkc_max_entries; - u8 rsvd[7]; + u8 rsvd1[7]; u8 valid; }; @@ -3164,7 +3205,7 @@ struct hwrm_func_ptp_pin_cfg_output { u8 valid; }; -/* hwrm_func_ptp_cfg_input (size:320b/40B) */ +/* hwrm_func_ptp_cfg_input (size:384b/48B) */ struct hwrm_func_ptp_cfg_input { __le16 req_type; __le16 cmpl_ring; @@ -3178,6 +3219,7 @@ struct hwrm_func_ptp_cfg_input { #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PERIOD 0x8UL #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_UP 0x10UL #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PHASE 0x20UL + #define FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME 0x40UL u8 ptp_pps_event; #define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_INTERNAL 0x1UL #define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_EXTERNAL 0x2UL @@ -3204,6 +3246,7 @@ struct hwrm_func_ptp_cfg_input { __le32 ptp_freq_adj_ext_up; __le32 ptp_freq_adj_ext_phase_lower; __le32 ptp_freq_adj_ext_phase_upper; + __le64 ptp_set_time; }; /* hwrm_func_ptp_cfg_output (size:128b/16B) */ @@ -3243,6 +3286,308 @@ struct hwrm_func_ptp_ts_query_output { u8 valid; }; +/* hwrm_func_ptp_ext_cfg_input (size:256b/32B) */ +struct hwrm_func_ptp_ext_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 enables; + #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_MASTER_FID 0x1UL + #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_FID 0x2UL + #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_MODE 0x4UL + #define FUNC_PTP_EXT_CFG_REQ_ENABLES_FAILOVER_TIMER 0x8UL + __le16 phc_master_fid; + __le16 phc_sec_fid; + u8 phc_sec_mode; + #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_SWITCH 0x0UL + #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_ALL 0x1UL + #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY 0x2UL + #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_LAST FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY + u8 unused_0; + __le32 failover_timer; + u8 unused_1[4]; +}; + +/* hwrm_func_ptp_ext_cfg_output (size:128b/16B) */ +struct hwrm_func_ptp_ext_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_ptp_ext_qcfg_input (size:192b/24B) */ +struct hwrm_func_ptp_ext_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 unused_0[8]; +}; + +/* hwrm_func_ptp_ext_qcfg_output (size:256b/32B) */ +struct hwrm_func_ptp_ext_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 phc_master_fid; + __le16 phc_sec_fid; + __le16 phc_active_fid0; + __le16 phc_active_fid1; + __le32 last_failover_event; + __le16 from_fid; + __le16 to_fid; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_backing_store_cfg_v2_input (size:448b/56B) */ +struct hwrm_func_backing_store_cfg_v2_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 type; + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TKC 0x13UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RKC 0x14UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID + __le16 instance; + __le32 flags; + #define FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_PREBOOT_MODE 0x1UL + __le64 page_dir; + __le32 num_entries; + __le16 entry_size; + u8 page_size_pbl_level; + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_SFT 0 + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LAST FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2 + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G + u8 subtype_valid_cnt; + __le32 split_entry_0; + __le32 split_entry_1; + __le32 split_entry_2; + __le32 split_entry_3; +}; + +/* hwrm_func_backing_store_cfg_v2_output (size:128b/16B) */ +struct hwrm_func_backing_store_cfg_v2_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 rsvd0[7]; + u8 valid; +}; + +/* hwrm_func_backing_store_qcfg_v2_input (size:192b/24B) */ +struct hwrm_func_backing_store_qcfg_v2_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 type; + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TKC 0x13UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RKC 0x14UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID + __le16 instance; + u8 rsvd[4]; +}; + +/* hwrm_func_backing_store_qcfg_v2_output (size:448b/56B) */ +struct hwrm_func_backing_store_qcfg_v2_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 type; + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TKC 0x13UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RKC 0x14UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID + __le16 instance; + __le32 flags; + __le64 page_dir; + __le32 num_entries; + u8 page_size_pbl_level; + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_MASK 0xfUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_SFT 0 + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2 + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_SFT 4 + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G + u8 subtype_valid_cnt; + u8 rsvd[2]; + __le32 split_entry_0; + __le32 split_entry_1; + __le32 split_entry_2; + __le32 split_entry_3; + u8 rsvd2[7]; + u8 valid; +}; + +/* qpc_split_entries (size:128b/16B) */ +struct qpc_split_entries { + __le32 qp_num_l2_entries; + __le32 qp_num_qp1_entries; + __le32 rsvd[2]; +}; + +/* srq_split_entries (size:128b/16B) */ +struct srq_split_entries { + __le32 srq_num_l2_entries; + __le32 rsvd; + __le32 rsvd2[2]; +}; + +/* cq_split_entries (size:128b/16B) */ +struct cq_split_entries { + __le32 cq_num_l2_entries; + __le32 rsvd; + __le32 rsvd2[2]; +}; + +/* vnic_split_entries (size:128b/16B) */ +struct vnic_split_entries { + __le32 vnic_num_vnic_entries; + __le32 rsvd; + __le32 rsvd2[2]; +}; + +/* mrav_split_entries (size:128b/16B) */ +struct mrav_split_entries { + __le32 mrav_num_av_entries; + __le32 rsvd; + __le32 rsvd2[2]; +}; + +/* hwrm_func_backing_store_qcaps_v2_input (size:192b/24B) */ +struct hwrm_func_backing_store_qcaps_v2_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 type; + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TKC 0x13UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RKC 0x14UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID + u8 rsvd[6]; +}; + +/* hwrm_func_backing_store_qcaps_v2_output (size:448b/56B) */ +struct hwrm_func_backing_store_qcaps_v2_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 type; + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TKC 0x13UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RKC 0x14UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID + __le16 entry_size; + __le32 flags; + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID 0x2UL + __le32 instance_bit_map; + u8 ctx_init_value; + u8 ctx_init_offset; + u8 entry_multiple; + u8 rsvd; + __le32 max_num_entries; + __le32 min_num_entries; + __le16 next_valid_type; + u8 subtype_valid_cnt; + u8 rsvd2; + __le32 split_entry_0; + __le32 split_entry_1; + __le32 split_entry_2; + __le32 split_entry_3; + u8 rsvd3[3]; + u8 valid; +}; + /* hwrm_func_drv_if_change_input (size:192b/24B) */ struct hwrm_func_drv_if_change_input { __le16 req_type; @@ -3741,7 +4086,7 @@ struct hwrm_port_phy_qcfg_output { u8 valid; }; -/* hwrm_port_mac_cfg_input (size:384b/48B) */ +/* hwrm_port_mac_cfg_input (size:448b/56B) */ struct hwrm_port_mac_cfg_input { __le16 req_type; __le16 cmpl_ring; @@ -3807,7 +4152,8 @@ struct hwrm_port_mac_cfg_input { #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT 5 u8 unused_0[3]; __le32 ptp_freq_adj_ppb; - __le32 ptp_adj_phase; + u8 unused_1[4]; + __le64 ptp_adj_phase; }; /* hwrm_port_mac_cfg_output (size:128b/16B) */ @@ -3850,6 +4196,7 @@ struct hwrm_port_mac_ptp_qcfg_output { #define PORT_MAC_PTP_QCFG_RESP_FLAGS_ONE_STEP_TX_TS 0x4UL #define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS 0x8UL #define PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK 0x10UL + #define PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED 0x20UL u8 unused_0[3]; __le32 rx_ts_reg_off_lower; __le32 rx_ts_reg_off_upper; @@ -4339,7 +4686,8 @@ struct hwrm_port_phy_qcaps_output { #define PORT_PHY_QCAPS_RESP_PORT_CNT_2 0x2UL #define PORT_PHY_QCAPS_RESP_PORT_CNT_3 0x3UL #define PORT_PHY_QCAPS_RESP_PORT_CNT_4 0x4UL - #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST PORT_PHY_QCAPS_RESP_PORT_CNT_4 + #define PORT_PHY_QCAPS_RESP_PORT_CNT_12 0xcUL + #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST PORT_PHY_QCAPS_RESP_PORT_CNT_12 __le16 supported_speeds_force_mode; #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MBHD 0x1UL #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MB 0x2UL @@ -4399,7 +4747,7 @@ struct hwrm_port_phy_qcaps_output { __le16 flags2; #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED 0x1UL #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED 0x2UL - u8 unused_0[1]; + u8 internal_port_cnt; u8 valid; }; @@ -6221,12 +6569,13 @@ struct hwrm_vnic_rss_cfg_input { __le16 target_id; __le64 resp_addr; __le32 hash_type; - #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 0x1UL - #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 0x2UL - #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 0x4UL - #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 0x8UL - #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6 0x10UL - #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6 0x20UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 0x1UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 0x2UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 0x4UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 0x8UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6 0x10UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6 0x20UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL 0x40UL __le16 vnic_id; u8 ring_table_pair_index; u8 hash_mode_flags; @@ -7898,6 +8247,7 @@ struct hwrm_cfa_adv_flow_mgnt_qcaps_output { u8 valid; }; +/* hwrm_tunnel_dst_port_query_input (size:192b/24B) */ struct hwrm_tunnel_dst_port_query_input { __le16 req_type; __le16 cmpl_ring; @@ -8909,6 +9259,50 @@ struct hwrm_dbg_qcfg_output { u8 valid; }; +/* hwrm_dbg_crashdump_medium_cfg_input (size:320b/40B) */ +struct hwrm_dbg_crashdump_medium_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 output_dest_flags; + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_TYPE_DDR 0x1UL + __le16 pg_size_lvl; + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_MASK 0x3UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_SFT 0 + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_0 0x0UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_1 0x1UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2 0x2UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LAST DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2 + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_MASK 0x1cUL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_SFT 2 + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_4K (0x0UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8K (0x1UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_64K (0x2UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_2M (0x3UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8M (0x4UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G (0x5UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_LAST DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_MASK 0xffe0UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_SFT 5 + __le32 size; + __le32 coredump_component_disable_flags; + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_NVRAM 0x1UL + __le32 unused_0; + __le64 pbl; +}; + +/* hwrm_dbg_crashdump_medium_cfg_output (size:128b/16B) */ +struct hwrm_dbg_crashdump_medium_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_1[7]; + u8 valid; +}; + /* coredump_segment_record (size:128b/16B) */ struct coredump_segment_record { __le16 component_id; @@ -9372,8 +9766,35 @@ struct hwrm_nvm_install_update_output { __le16 resp_len; __le64 installed_items; u8 result; - #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL - #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS + #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_FAILURE 0xffUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_MALLOC_FAILURE 0xfdUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_INDEX_PARAMETER 0xfbUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TYPE_PARAMETER 0xf3UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PREREQUISITE 0xf2UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_FILE_HEADER 0xecUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_SIGNATURE 0xebUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_STREAM 0xeaUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_LENGTH 0xe9UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_MANIFEST 0xe8UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TRAILER 0xe7UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_CHECKSUM 0xe6UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_ITEM_CHECKSUM 0xe5UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DATA_LENGTH 0xe4UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DIRECTIVE 0xe1UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_CHIP_REV 0xceUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_DEVICE_ID 0xcdUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_VENDOR 0xccUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_ID 0xcbUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_PLATFORM 0xc5UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_DUPLICATE_ITEM 0xc4UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_ZERO_LENGTH_ITEM 0xc3UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_CHECKSUM_ERROR 0xb9UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_DATA_ERROR 0xb8UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_AUTHENTICATION_ERROR 0xb7UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_NOT_FOUND 0xb0UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED 0xa7UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED u8 problem_item; #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_NONE 0x0UL #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE 0xffUL diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 48520967746f..a0b321a19361 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -19,6 +19,20 @@ #include "bnxt_hwrm.h" #include "bnxt_ptp.h" +static int bnxt_ptp_cfg_settime(struct bnxt *bp, u64 time) +{ + struct hwrm_func_ptp_cfg_input *req; + int rc; + + rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG); + if (rc) + return rc; + + req->enables = cpu_to_le16(FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME); + req->ptp_set_time = cpu_to_le64(time); + return hwrm_req_send(bp, req); +} + int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off) { unsigned int ptp_class; @@ -48,6 +62,9 @@ static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info, ptp_info); u64 ns = timespec64_to_ns(ts); + if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC) + return bnxt_ptp_cfg_settime(ptp->bp, ns); + spin_lock_bh(&ptp->ptp_lock); timecounter_init(&ptp->tc, &ptp->cc, ns); spin_unlock_bh(&ptp->ptp_lock); @@ -131,11 +148,47 @@ static int bnxt_ptp_gettimex(struct ptp_clock_info *ptp_info, return 0; } +/* Caller holds ptp_lock */ +void bnxt_ptp_update_current_time(struct bnxt *bp) +{ + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + + bnxt_refclk_read(ptp->bp, NULL, &ptp->current_time); + WRITE_ONCE(ptp->old_time, ptp->current_time); +} + +static int bnxt_ptp_adjphc(struct bnxt_ptp_cfg *ptp, s64 delta) +{ + struct hwrm_port_mac_cfg_input *req; + int rc; + + rc = hwrm_req_init(ptp->bp, req, HWRM_PORT_MAC_CFG); + if (rc) + return rc; + + req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_ADJ_PHASE); + req->ptp_adj_phase = cpu_to_le64(delta); + + rc = hwrm_req_send(ptp->bp, req); + if (rc) { + netdev_err(ptp->bp->dev, "ptp adjphc failed. rc = %x\n", rc); + } else { + spin_lock_bh(&ptp->ptp_lock); + bnxt_ptp_update_current_time(ptp->bp); + spin_unlock_bh(&ptp->ptp_lock); + } + + return rc; +} + static int bnxt_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta) { struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg, ptp_info); + if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC) + return bnxt_ptp_adjphc(ptp, delta); + spin_lock_bh(&ptp->ptp_lock); timecounter_adjtime(&ptp->tc, delta); spin_unlock_bh(&ptp->ptp_lock); @@ -714,7 +767,70 @@ static bool bnxt_pps_config_ok(struct bnxt *bp) return !(bp->fw_cap & BNXT_FW_CAP_PTP_PPS) == !ptp->ptp_info.pin_config; } -int bnxt_ptp_init(struct bnxt *bp) +static void bnxt_ptp_timecounter_init(struct bnxt *bp, bool init_tc) +{ + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + + if (!ptp->ptp_clock) { + memset(&ptp->cc, 0, sizeof(ptp->cc)); + ptp->cc.read = bnxt_cc_read; + ptp->cc.mask = CYCLECOUNTER_MASK(48); + ptp->cc.shift = 0; + ptp->cc.mult = 1; + ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD; + } + if (init_tc) + timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real())); +} + +/* Caller holds ptp_lock */ +void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns) +{ + timecounter_init(&ptp->tc, &ptp->cc, ns); + /* For RTC, cycle_last must be in sync with the timecounter value. */ + ptp->tc.cycle_last = ns & ptp->cc.mask; +} + +int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg) +{ + struct timespec64 tsp; + u64 ns; + int rc; + + if (!bp->ptp_cfg || !(bp->fw_cap & BNXT_FW_CAP_PTP_RTC)) + return -ENODEV; + + if (!phc_cfg) { + ktime_get_real_ts64(&tsp); + ns = timespec64_to_ns(&tsp); + rc = bnxt_ptp_cfg_settime(bp, ns); + if (rc) + return rc; + } else { + rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME, &ns); + if (rc) + return rc; + } + spin_lock_bh(&bp->ptp_cfg->ptp_lock); + bnxt_ptp_rtc_timecounter_init(bp->ptp_cfg, ns); + spin_unlock_bh(&bp->ptp_cfg->ptp_lock); + + return 0; +} + +static void bnxt_ptp_free(struct bnxt *bp) +{ + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + + if (ptp->ptp_clock) { + ptp_clock_unregister(ptp->ptp_clock); + ptp->ptp_clock = NULL; + kfree(ptp->ptp_info.pin_config); + ptp->ptp_info.pin_config = NULL; + } +} + +int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; int rc; @@ -726,26 +842,23 @@ int bnxt_ptp_init(struct bnxt *bp) if (rc) return rc; + if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) { + bnxt_ptp_timecounter_init(bp, false); + rc = bnxt_ptp_init_rtc(bp, phc_cfg); + if (rc) + goto out; + } + if (ptp->ptp_clock && bnxt_pps_config_ok(bp)) return 0; - if (ptp->ptp_clock) { - ptp_clock_unregister(ptp->ptp_clock); - ptp->ptp_clock = NULL; - kfree(ptp->ptp_info.pin_config); - ptp->ptp_info.pin_config = NULL; - } + bnxt_ptp_free(bp); + atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS); spin_lock_init(&ptp->ptp_lock); - memset(&ptp->cc, 0, sizeof(ptp->cc)); - ptp->cc.read = bnxt_cc_read; - ptp->cc.mask = CYCLECOUNTER_MASK(48); - ptp->cc.shift = 0; - ptp->cc.mult = 1; - - ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD; - timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real())); + if (!(bp->fw_cap & BNXT_FW_CAP_PTP_RTC)) + bnxt_ptp_timecounter_init(bp, true); ptp->ptp_info = bnxt_ptp_caps; if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) { @@ -757,8 +870,8 @@ int bnxt_ptp_init(struct bnxt *bp) int err = PTR_ERR(ptp->ptp_clock); ptp->ptp_clock = NULL; - bnxt_unmap_ptp_regs(bp); - return err; + rc = err; + goto out; } if (bp->flags & BNXT_FLAG_CHIP_P5) { spin_lock_bh(&ptp->ptp_lock); @@ -768,6 +881,11 @@ int bnxt_ptp_init(struct bnxt *bp) ptp_schedule_worker(ptp->ptp_clock, 0); } return 0; + +out: + bnxt_ptp_free(bp); + bnxt_unmap_ptp_regs(bp); + return rc; } void bnxt_ptp_clear(struct bnxt *bp) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 7c528e1f8713..373baf45884b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -131,12 +131,15 @@ do { \ #endif int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off); +void bnxt_ptp_update_current_time(struct bnxt *bp); void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2); void bnxt_ptp_reapply_pps(struct bnxt *bp); int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb); int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts); -int bnxt_ptp_init(struct bnxt *bp); +void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns); +int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg); +int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg); void bnxt_ptp_clear(struct bnxt *bp); #endif diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 87f1056e29ff..cfe09117fe6c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1368,7 +1368,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) if (!p->eee_enabled) { bcmgenet_eee_enable_set(dev, false); } else { - ret = phy_init_eee(dev->phydev, 0); + ret = phy_init_eee(dev->phydev, false); if (ret) { netif_err(priv, hw, dev, "EEE initialization failed\n"); return ret; diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 9ddbee7de72b..f0a7d8396a4a 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -12,6 +12,7 @@ #include <linux/ptp_clock_kernel.h> #include <linux/net_tstamp.h> #include <linux/interrupt.h> +#include <linux/phy/phy.h> #if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) || defined(CONFIG_MACB_USE_HWSTAMP) #define MACB_EXT_DESC @@ -1291,6 +1292,9 @@ struct macb { u32 wol; struct macb_ptp_info *ptp_info; /* macb-ptp interface */ + + struct phy *sgmii_phy; /* for ZynqMP SGMII mode */ + #ifdef MACB_EXT_DESC uint8_t hw_dma_cap; #endif diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index a363da928e8b..1ce20bf52f72 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -34,7 +34,9 @@ #include <linux/udp.h> #include <linux/tcp.h> #include <linux/iopoll.h> +#include <linux/phy/phy.h> #include <linux/pm_runtime.h> +#include <linux/reset.h> #include "macb.h" /* This structure is only used for MACB on SiFive FU540 devices */ @@ -2739,10 +2741,14 @@ static int macb_open(struct net_device *dev) macb_init_hw(bp); - err = macb_phylink_connect(bp); + err = phy_power_on(bp->sgmii_phy); if (err) goto reset_hw; + err = macb_phylink_connect(bp); + if (err) + goto phy_off; + netif_tx_start_all_queues(dev); if (bp->ptp_info) @@ -2750,6 +2756,9 @@ static int macb_open(struct net_device *dev) return 0; +phy_off: + phy_power_off(bp->sgmii_phy); + reset_hw: macb_reset_hw(bp); for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) @@ -2775,6 +2784,8 @@ static int macb_close(struct net_device *dev) phylink_stop(bp->phylink); phylink_disconnect_phy(bp->phylink); + phy_power_off(bp->sgmii_phy); + spin_lock_irqsave(&bp->lock, flags); macb_reset_hw(bp); netif_carrier_off(dev); @@ -4544,13 +4555,55 @@ static const struct macb_config np4_config = { .usrio = &macb_default_usrio, }; +static int zynqmp_init(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct macb *bp = netdev_priv(dev); + int ret; + + if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) { + /* Ensure PS-GTR PHY device used in SGMII mode is ready */ + bp->sgmii_phy = devm_phy_get(&pdev->dev, "sgmii-phy"); + + if (IS_ERR(bp->sgmii_phy)) { + ret = PTR_ERR(bp->sgmii_phy); + dev_err_probe(&pdev->dev, ret, + "failed to get PS-GTR PHY\n"); + return ret; + } + + ret = phy_init(bp->sgmii_phy); + if (ret) { + dev_err(&pdev->dev, "failed to init PS-GTR PHY: %d\n", + ret); + return ret; + } + } + + /* Fully reset GEM controller at hardware level using zynqmp-reset driver, + * if mapped in device tree. + */ + ret = device_reset_optional(&pdev->dev); + if (ret) { + dev_err_probe(&pdev->dev, ret, "failed to reset controller"); + phy_exit(bp->sgmii_phy); + return ret; + } + + ret = macb_init(pdev); + if (ret) + phy_exit(bp->sgmii_phy); + + return ret; +} + static const struct macb_config zynqmp_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH, .dma_burst_length = 16, .clk_init = macb_clk_init, - .init = macb_init, + .init = zynqmp_init, .jumbo_max_len = 10240, .usrio = &macb_default_usrio, }; @@ -4767,7 +4820,7 @@ static int macb_probe(struct platform_device *pdev) err = macb_mii_init(bp); if (err) - goto err_out_free_netdev; + goto err_out_phy_exit; netif_carrier_off(dev); @@ -4792,6 +4845,9 @@ err_out_unregister_mdio: mdiobus_unregister(bp->mii_bus); mdiobus_free(bp->mii_bus); +err_out_phy_exit: + phy_exit(bp->sgmii_phy); + err_out_free_netdev: free_netdev(dev); @@ -4813,6 +4869,7 @@ static int macb_remove(struct platform_device *pdev) if (dev) { bp = netdev_priv(dev); + phy_exit(bp->sgmii_phy); mdiobus_unregister(bp->mii_bus); mdiobus_free(bp->mii_bus); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 574a32f23f96..2f6484dc186a 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1409,7 +1409,8 @@ static acpi_status bgx_acpi_register_phy(acpi_handle handle, struct device *dev = &bgx->pdev->dev; struct acpi_device *adev; - if (acpi_bus_get_device(handle, &adev)) + adev = acpi_fetch_acpi_dev(handle); + if (!adev) goto out; acpi_get_mac_address(dev, adev, bgx->lmac[bgx->acpi_lmac_idx].mac); diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index c78b99a497df..8014eb33937c 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -2363,11 +2363,13 @@ static void gemini_port_save_mac_addr(struct gemini_ethernet_port *port) static int gemini_ethernet_port_probe(struct platform_device *pdev) { char *port_names[2] = { "ethernet0", "ethernet1" }; + struct device_node *np = pdev->dev.of_node; struct gemini_ethernet_port *port; struct device *dev = &pdev->dev; struct gemini_ethernet *geth; struct net_device *netdev; struct device *parent; + u8 mac[ETH_ALEN]; unsigned int id; int irq; int ret; @@ -2473,6 +2475,12 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev) netif_napi_add(netdev, &port->napi, gmac_napi_poll, DEFAULT_NAPI_WEIGHT); + ret = of_get_mac_address(np, mac); + if (!ret) { + dev_info(dev, "Setting macaddr from DT %pM\n", mac); + memcpy(port->mac_addr, mac, ETH_ALEN); + } + if (is_valid_ether_addr((void *)port->mac_addr)) { eth_hw_addr_set(netdev, (u8 *)port->mac_addr); } else { diff --git a/drivers/net/ethernet/dec/tulip/pnic.c b/drivers/net/ethernet/dec/tulip/pnic.c index 3fb39e32e1b4..653bde48ef44 100644 --- a/drivers/net/ethernet/dec/tulip/pnic.c +++ b/drivers/net/ethernet/dec/tulip/pnic.c @@ -21,7 +21,7 @@ void pnic_do_nway(struct net_device *dev) struct tulip_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->base_addr; u32 phy_reg = ioread32(ioaddr + 0xB8); - u32 new_csr6 = tp->csr6 & ~0x40C40200; + u32 new_csr6; if (phy_reg & 0x78000000) { /* Ignore baseT4 */ if (phy_reg & 0x20000000) dev->if_port = 5; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index 623d113b6581..521f036d1c00 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -100,6 +100,14 @@ static int dpaa2_mac_get_if_mode(struct fwnode_handle *dpmac_node, return err; } +static struct phylink_pcs *dpaa2_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) +{ + struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config); + + return mac->pcs; +} + static void dpaa2_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { @@ -172,6 +180,7 @@ static void dpaa2_mac_link_down(struct phylink_config *config, static const struct phylink_mac_ops dpaa2_mac_phylink_ops = { .validate = phylink_generic_validate, + .mac_select_pcs = dpaa2_mac_select_pcs, .mac_config = dpaa2_mac_config, .mac_link_up = dpaa2_mac_link_up, .mac_link_down = dpaa2_mac_link_down, @@ -303,9 +312,6 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac) } mac->phylink = phylink; - if (mac->pcs) - phylink_set_pcs(mac->phylink, mac->pcs); - err = phylink_fwnode_phy_connect(mac->phylink, dpmac_node, 0); if (err) { netdev_err(net_dev, "phylink_fwnode_phy_connect() = %d\n", err); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index ed16a5ac9ad0..a0c75c717073 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -934,18 +934,21 @@ static void enetc_mdiobus_destroy(struct enetc_pf *pf) enetc_imdio_remove(pf); } +static struct phylink_pcs * +enetc_pl_mac_select_pcs(struct phylink_config *config, phy_interface_t iface) +{ + struct enetc_pf *pf = phylink_to_enetc_pf(config); + + return pf->pcs; +} + static void enetc_pl_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { struct enetc_pf *pf = phylink_to_enetc_pf(config); - struct enetc_ndev_priv *priv; enetc_mac_config(&pf->si->hw, state->interface); - - priv = netdev_priv(pf->si->ndev); - if (pf->pcs) - phylink_set_pcs(priv->phylink, pf->pcs); } static void enetc_force_rgmii_mac(struct enetc_hw *hw, int speed, int duplex) @@ -1062,6 +1065,7 @@ static void enetc_pl_mac_link_down(struct phylink_config *config, static const struct phylink_mac_ops enetc_mac_phylink_ops = { .validate = phylink_generic_validate, + .mac_select_pcs = enetc_pl_mac_select_pcs, .mac_config = enetc_pl_mac_config, .mac_link_up = enetc_pl_mac_link_up, .mac_link_down = enetc_pl_mac_link_down, diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 796133de527e..11227f51404c 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2797,7 +2797,7 @@ static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable) int ret = 0; if (enable) { - ret = phy_init_eee(ndev->phydev, 0); + ret = phy_init_eee(ndev->phydev, false); if (ret) return ret; diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index af99017a5453..7d49c28215f3 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -101,7 +101,6 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable) u32 val, tempval; struct timespec64 ts; u64 ns; - val = 0; if (fep->pps_enable == enable) return 0; diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index 266e562bd67a..ef8058a17188 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -14,6 +14,7 @@ #include <linux/acpi.h> #include <linux/acpi_mdio.h> +#include <linux/clk.h> #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/mdio.h> @@ -36,9 +37,10 @@ struct tgec_mdio_controller { } __packed; #define MDIO_STAT_ENC BIT(6) -#define MDIO_STAT_CLKDIV(x) (((x>>1) & 0xff) << 8) +#define MDIO_STAT_CLKDIV(x) (((x) & 0x1ff) << 7) #define MDIO_STAT_BSY BIT(0) #define MDIO_STAT_RD_ER BIT(1) +#define MDIO_STAT_PRE_DIS BIT(5) #define MDIO_CTL_DEV_ADDR(x) (x & 0x1f) #define MDIO_CTL_PORT_ADDR(x) ((x & 0x1f) << 5) #define MDIO_CTL_PRE_DIS BIT(10) @@ -50,6 +52,8 @@ struct tgec_mdio_controller { struct mdio_fsl_priv { struct tgec_mdio_controller __iomem *mdio_base; + struct clk *enet_clk; + u32 mdc_freq; bool is_little_endian; bool has_a009885; bool has_a011043; @@ -254,6 +258,50 @@ irq_restore: return ret; } +static int xgmac_mdio_set_mdc_freq(struct mii_bus *bus) +{ + struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv; + struct tgec_mdio_controller __iomem *regs = priv->mdio_base; + struct device *dev = bus->parent; + u32 mdio_stat, div; + + if (device_property_read_u32(dev, "clock-frequency", &priv->mdc_freq)) + return 0; + + priv->enet_clk = devm_clk_get(dev, NULL); + if (IS_ERR(priv->enet_clk)) { + dev_err(dev, "Input clock unknown, not changing MDC frequency"); + return PTR_ERR(priv->enet_clk); + } + + div = ((clk_get_rate(priv->enet_clk) / priv->mdc_freq) - 1) / 2; + if (div < 5 || div > 0x1ff) { + dev_err(dev, "Requested MDC frequency is out of range, ignoring"); + return -EINVAL; + } + + mdio_stat = xgmac_read32(®s->mdio_stat, priv->is_little_endian); + mdio_stat &= ~MDIO_STAT_CLKDIV(0x1ff); + mdio_stat |= MDIO_STAT_CLKDIV(div); + xgmac_write32(mdio_stat, ®s->mdio_stat, priv->is_little_endian); + return 0; +} + +static void xgmac_mdio_set_suppress_preamble(struct mii_bus *bus) +{ + struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv; + struct tgec_mdio_controller __iomem *regs = priv->mdio_base; + struct device *dev = bus->parent; + u32 mdio_stat; + + if (!device_property_read_bool(dev, "suppress-preamble")) + return; + + mdio_stat = xgmac_read32(®s->mdio_stat, priv->is_little_endian); + mdio_stat |= MDIO_STAT_PRE_DIS; + xgmac_write32(mdio_stat, ®s->mdio_stat, priv->is_little_endian); +} + static int xgmac_mdio_probe(struct platform_device *pdev) { struct fwnode_handle *fwnode; @@ -273,7 +321,7 @@ static int xgmac_mdio_probe(struct platform_device *pdev) return -EINVAL; } - bus = mdiobus_alloc_size(sizeof(struct mdio_fsl_priv)); + bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(struct mdio_fsl_priv)); if (!bus) return -ENOMEM; @@ -284,13 +332,11 @@ static int xgmac_mdio_probe(struct platform_device *pdev) bus->probe_capabilities = MDIOBUS_C22_C45; snprintf(bus->id, MII_BUS_ID_SIZE, "%pa", &res->start); - /* Set the PHY base address */ priv = bus->priv; - priv->mdio_base = ioremap(res->start, resource_size(res)); - if (!priv->mdio_base) { - ret = -ENOMEM; - goto err_ioremap; - } + priv->mdio_base = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!priv->mdio_base) + return -ENOMEM; /* For both ACPI and DT cases, endianness of MDIO controller * needs to be specified using "little-endian" property. @@ -303,6 +349,12 @@ static int xgmac_mdio_probe(struct platform_device *pdev) priv->has_a011043 = device_property_read_bool(&pdev->dev, "fsl,erratum-a011043"); + xgmac_mdio_set_suppress_preamble(bus); + + ret = xgmac_mdio_set_mdc_freq(bus); + if (ret) + return ret; + fwnode = pdev->dev.fwnode; if (is_of_node(fwnode)) ret = of_mdiobus_register(bus, to_of_node(fwnode)); @@ -312,32 +364,12 @@ static int xgmac_mdio_probe(struct platform_device *pdev) ret = -EINVAL; if (ret) { dev_err(&pdev->dev, "cannot register MDIO bus\n"); - goto err_registration; + return ret; } platform_set_drvdata(pdev, bus); return 0; - -err_registration: - iounmap(priv->mdio_base); - -err_ioremap: - mdiobus_free(bus); - - return ret; -} - -static int xgmac_mdio_remove(struct platform_device *pdev) -{ - struct mii_bus *bus = platform_get_drvdata(pdev); - struct mdio_fsl_priv *priv = bus->priv; - - mdiobus_unregister(bus); - iounmap(priv->mdio_base); - mdiobus_free(bus); - - return 0; } static const struct of_device_id xgmac_mdio_match[] = { @@ -364,7 +396,6 @@ static struct platform_driver xgmac_mdio_driver = { .acpi_match_table = xgmac_acpi_match, }, .probe = xgmac_mdio_probe, - .remove = xgmac_mdio_remove, }; module_platform_driver(xgmac_mdio_driver); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index a42aeb555f34..6fb3437f68e0 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -7388,9 +7388,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) resource_size_t flash_start, flash_len; static int cards_found; u16 aspm_disable_flag = 0; - int bars, i, err, pci_using_dac; u16 eeprom_data = 0; u16 eeprom_apme_mask = E1000_EEPROM_APME; + int bars, i, err; s32 ret_val = 0; if (ei->flags2 & FLAG2_DISABLE_ASPM_L0S) @@ -7404,17 +7404,11 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - pci_using_dac = 0; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } bars = pci_select_bars(pdev, IORESOURCE_MEM); @@ -7550,10 +7544,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_UNICAST_FLT; - if (pci_using_dac) { - netdev->features |= NETIF_F_HIGHDMA; - netdev->vlan_features |= NETIF_F_HIGHDMA; - } + netdev->features |= NETIF_F_HIGHDMA; + netdev->vlan_features |= NETIF_F_HIGHDMA; /* MTU range: 68 - max_hw_frame_size */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0c4b7dfb3b35..f531bc1df338 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -15341,12 +15341,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* set up for high or low dma */ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "DMA configuration failed: 0x%x\n", err); - goto err_dma; - } + dev_err(&pdev->dev, + "DMA configuration failed: 0x%x\n", err); + goto err_dma; } /* set up pci connections */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 945b1bb9c6f4..67e9844e2076 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -241,21 +241,25 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) { + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; + net_prefetch(xdp->data_meta); + /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - xdp->data_end - xdp->data_hard_start, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) goto out; - skb_reserve(skb, xdp->data - xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } out: xsk_buff_free(xdp); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 8125b9120615..b0bd95c85480 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -4368,12 +4368,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "DMA configuration failed: 0x%x\n", err); - goto err_dma; - } + dev_err(&pdev->dev, + "DMA configuration failed: 0x%x\n", err); + goto err_dma; } err = pci_request_regions(pdev, iavf_driver_name); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 30814435f779..f46af3b34074 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4459,8 +4459,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) /* set up for high or low DMA */ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); - if (err) - err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); if (err) { dev_err(dev, "DMA configuration failed: 0x%x\n", err); return err; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 3e38695f1c9d..c2258bee8ecb 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -983,15 +983,17 @@ static struct sk_buff * ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, struct xdp_buff *xdp) { + unsigned int metasize = xdp->data - xdp->data_meta; unsigned int size = xdp->data_end - xdp->data; unsigned int headlen; struct sk_buff *skb; /* prefetch first cache line of first page */ - net_prefetch(xdp->data); + net_prefetch(xdp->data_meta); /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, + ICE_RX_HDR_SIZE + metasize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; @@ -1003,8 +1005,13 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, - sizeof(long))); + memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta, + ALIGN(headlen + metasize, sizeof(long))); + + if (metasize) { + skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } /* if we exhaust the linear part then add what is left as a frag */ size -= headlen; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 2388837d6d6c..feb874bde171 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -428,20 +428,24 @@ static void ice_bump_ntc(struct ice_rx_ring *rx_ring) static struct sk_buff * ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) { - unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start; + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard, + net_prefetch(xdp->data_meta); + + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, xdp->data - xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } xsk_buff_free(xdp); return skb; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 38ba92022cd4..bfa321e4003f 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3164,8 +3164,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) s32 ret_val; static int global_quad_port_a; /* global quad port a indication */ const struct e1000_info *ei = igb_info_tbl[ent->driver_data]; - int err, pci_using_dac; u8 part_str[E1000_PBANUM_LENGTH]; + int err; /* Catch broken hardware that put the wrong VF device ID in * the PCIe SR-IOV capability. @@ -3180,17 +3180,11 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - pci_using_dac = 0; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } err = pci_request_mem_regions(pdev, igb_driver_name); @@ -3306,8 +3300,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (hw->mac.type >= e1000_i350) netdev->hw_features |= NETIF_F_NTUPLE; - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HIGHDMA; netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; netdev->mpls_features |= NETIF_F_HW_CSUM; diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index b78407289741..43ced78c3a2e 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2684,25 +2684,18 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct igbvf_adapter *adapter; struct e1000_hw *hw; const struct igbvf_info *ei = igbvf_info_tbl[ent->driver_data]; - static int cards_found; - int err, pci_using_dac; + int err; err = pci_enable_device_mem(pdev); if (err) return err; - pci_using_dac = 0; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } err = pci_request_regions(pdev, igbvf_driver_name); @@ -2783,10 +2776,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_GSO_PARTIAL | IGBVF_GSO_PARTIAL_FEATURES; - netdev->features = netdev->hw_features; - - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; + netdev->features = netdev->hw_features | NETIF_F_HIGHDMA; netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; netdev->mpls_features |= NETIF_F_HW_CSUM; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 2f17f36e94fd..b965fb809d84 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2446,19 +2446,20 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, struct xdp_buff *xdp) { + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; - unsigned int totalsize = metasize + datasize; struct sk_buff *skb; - skb = __napi_alloc_skb(&ring->q_vector->napi, - xdp->data_end - xdp->data_hard_start, + net_prefetch(xdp->data_meta); + + skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); - memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize); + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + if (metasize) { skb_metadata_set(skb, metasize); __skb_pull(skb, metasize); @@ -6251,23 +6252,17 @@ static int igc_probe(struct pci_dev *pdev, struct net_device *netdev; struct igc_hw *hw; const struct igc_info *ei = igc_info_tbl[ent->driver_data]; - int err, pci_using_dac; + int err; err = pci_enable_device_mem(pdev); if (err) return err; - pci_using_dac = 0; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } err = pci_request_mem_regions(pdev, igc_driver_name); @@ -6367,8 +6362,7 @@ static int igc_probe(struct pci_dev *pdev, netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= netdev->features; - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HIGHDMA; netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; netdev->mpls_features |= NETIF_F_HW_CSUM; diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 99d481904ce6..affdefcca7e3 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -361,7 +361,6 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct net_device *netdev = NULL; struct ixgb_adapter *adapter; static int cards_found = 0; - int pci_using_dac; u8 addr[ETH_ALEN]; int i; int err; @@ -370,16 +369,10 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - pci_using_dac = 0; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - pr_err("No usable DMA configuration, aborting\n"); - goto err_dma_mask; - } + if (err) { + pr_err("No usable DMA configuration, aborting\n"); + goto err_dma_mask; } err = pci_request_regions(pdev, ixgb_driver_name); @@ -444,10 +437,8 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_RXCSUM; - if (pci_using_dac) { - netdev->features |= NETIF_F_HIGHDMA; - netdev->vlan_features |= NETIF_F_HIGHDMA; - } + netdev->features |= NETIF_F_HIGHDMA; + netdev->vlan_features |= NETIF_F_HIGHDMA; /* MTU range: 68 - 16114 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 89b467006291..2c8a4a06f56a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10632,9 +10632,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct ixgbe_adapter *adapter = NULL; struct ixgbe_hw *hw; const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data]; - int i, err, pci_using_dac, expected_gts; unsigned int indices = MAX_TX_QUEUES; u8 part_str[IXGBE_PBANUM_LENGTH]; + int i, err, expected_gts; bool disable_dev = false; #ifdef IXGBE_FCOE u16 device_caps; @@ -10654,16 +10654,11 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } - pci_using_dac = 0; + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } err = pci_request_mem_regions(pdev, ixgbe_driver_name); @@ -10861,8 +10856,7 @@ skip_sriov: netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HIGHDMA; netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; netdev->hw_enc_features |= netdev->vlan_features; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index b3fd8e5cd85b..ee28929b9c5f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -207,26 +207,28 @@ bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count) } static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *bi) + const struct xdp_buff *xdp) { - unsigned int metasize = bi->xdp->data - bi->xdp->data_meta; - unsigned int datasize = bi->xdp->data_end - bi->xdp->data; + unsigned int totalsize = xdp->data_end - xdp->data_meta; + unsigned int metasize = xdp->data - xdp->data_meta; struct sk_buff *skb; + net_prefetch(xdp->data_meta); + /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - bi->xdp->data_end - bi->xdp->data_hard_start, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, bi->xdp->data - bi->xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), bi->xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } - xsk_buff_free(bi->xdp); - bi->xdp = NULL; return skb; } @@ -317,12 +319,15 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, } /* XDP_PASS path */ - skb = ixgbe_construct_skb_zc(rx_ring, bi); + skb = ixgbe_construct_skb_zc(rx_ring, bi->xdp); if (!skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; break; } + xsk_buff_free(bi->xdp); + bi->xdp = NULL; + cleaned_count++; ixgbe_inc_ntc(rx_ring); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 0015fcf1df2b..7c33be9074e9 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4511,22 +4511,17 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct ixgbevf_adapter *adapter = NULL; struct ixgbe_hw *hw = NULL; const struct ixgbevf_info *ii = ixgbevf_info_tbl[ent->driver_data]; - int err, pci_using_dac; bool disable_dev = false; + int err; err = pci_enable_device(pdev); if (err) return err; - if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, "No usable DMA configuration, aborting\n"); - goto err_dma; - } - pci_using_dac = 0; + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, "No usable DMA configuration, aborting\n"); + goto err_dma; } err = pci_request_regions(pdev, ixgbevf_driver_name); @@ -4606,10 +4601,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_GSO_PARTIAL | IXGBEVF_GSO_PARTIAL_FEATURES; - netdev->features = netdev->hw_features; - - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; + netdev->features = netdev->hw_features | NETIF_F_HIGHDMA; netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; netdev->mpls_features |= NETIF_F_SG | diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 83c8908f0cc7..f1335a1ed695 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1884,8 +1884,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp, bytes_compl += buf->skb->len; pkts_compl++; dev_kfree_skb_any(buf->skb); - } else if (buf->type == MVNETA_TYPE_XDP_TX || - buf->type == MVNETA_TYPE_XDP_NDO) { + } else if ((buf->type == MVNETA_TYPE_XDP_TX || + buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) { if (napi && buf->type == MVNETA_TYPE_XDP_TX) xdp_return_frame_rx_napi(buf->xdpf); else @@ -2060,61 +2060,104 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq) static void mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, - struct xdp_buff *xdp, struct skb_shared_info *sinfo, - int sync_len) + struct xdp_buff *xdp, int sync_len) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); int i; + if (likely(!xdp_buff_has_frags(xdp))) + goto out; + for (i = 0; i < sinfo->nr_frags; i++) page_pool_put_full_page(rxq->page_pool, skb_frag_page(&sinfo->frags[i]), true); + +out: page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data), sync_len, true); } static int mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq, - struct xdp_frame *xdpf, bool dma_map) + struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); + struct device *dev = pp->dev->dev.parent; struct mvneta_tx_desc *tx_desc; - struct mvneta_tx_buf *buf; - dma_addr_t dma_addr; + int i, num_frames = 1; + struct page *page; - if (txq->count >= txq->tx_stop_threshold) + if (unlikely(xdp_frame_has_frags(xdpf))) + num_frames += sinfo->nr_frags; + + if (txq->count + num_frames >= txq->size) return MVNETA_XDP_DROPPED; - tx_desc = mvneta_txq_next_desc_get(txq); + for (i = 0; i < num_frames; i++) { + struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; + skb_frag_t *frag = NULL; + int len = xdpf->len; + dma_addr_t dma_addr; - buf = &txq->buf[txq->txq_put_index]; - if (dma_map) { - /* ndo_xdp_xmit */ - dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data, - xdpf->len, DMA_TO_DEVICE); - if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) { - mvneta_txq_desc_put(txq); - return MVNETA_XDP_DROPPED; + if (unlikely(i)) { /* paged area */ + frag = &sinfo->frags[i - 1]; + len = skb_frag_size(frag); } - buf->type = MVNETA_TYPE_XDP_NDO; - } else { - struct page *page = virt_to_page(xdpf->data); - dma_addr = page_pool_get_dma_addr(page) + - sizeof(*xdpf) + xdpf->headroom; - dma_sync_single_for_device(pp->dev->dev.parent, dma_addr, - xdpf->len, DMA_BIDIRECTIONAL); - buf->type = MVNETA_TYPE_XDP_TX; - } - buf->xdpf = xdpf; + tx_desc = mvneta_txq_next_desc_get(txq); + if (dma_map) { + /* ndo_xdp_xmit */ + void *data; + + data = unlikely(frag) ? skb_frag_address(frag) + : xdpf->data; + dma_addr = dma_map_single(dev, data, len, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + mvneta_txq_desc_put(txq); + goto unmap; + } + + buf->type = MVNETA_TYPE_XDP_NDO; + } else { + page = unlikely(frag) ? skb_frag_page(frag) + : virt_to_page(xdpf->data); + dma_addr = page_pool_get_dma_addr(page); + if (unlikely(frag)) + dma_addr += skb_frag_off(frag); + else + dma_addr += sizeof(*xdpf) + xdpf->headroom; + dma_sync_single_for_device(dev, dma_addr, len, + DMA_BIDIRECTIONAL); + buf->type = MVNETA_TYPE_XDP_TX; + } + buf->xdpf = unlikely(i) ? NULL : xdpf; - tx_desc->command = MVNETA_TXD_FLZ_DESC; - tx_desc->buf_phys_addr = dma_addr; - tx_desc->data_size = xdpf->len; + tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC; + tx_desc->buf_phys_addr = dma_addr; + tx_desc->data_size = len; + *nxmit_byte += len; - mvneta_txq_inc_put(txq); - txq->pending++; - txq->count++; + mvneta_txq_inc_put(txq); + } + /*last descriptor */ + tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD; + + txq->pending += num_frames; + txq->count += num_frames; return MVNETA_XDP_TX; + +unmap: + for (i--; i >= 0; i--) { + mvneta_txq_desc_put(txq); + tx_desc = txq->descs + txq->next_desc_to_proc; + dma_unmap_single(dev, tx_desc->buf_phys_addr, + tx_desc->data_size, + DMA_TO_DEVICE); + } + + return MVNETA_XDP_DROPPED; } static int @@ -2123,8 +2166,8 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp) struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); struct mvneta_tx_queue *txq; struct netdev_queue *nq; + int cpu, nxmit_byte = 0; struct xdp_frame *xdpf; - int cpu; u32 ret; xdpf = xdp_convert_buff_to_frame(xdp); @@ -2136,10 +2179,10 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp) nq = netdev_get_tx_queue(pp->dev, txq->id); __netif_tx_lock(nq, cpu); - ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false); + ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false); if (ret == MVNETA_XDP_TX) { u64_stats_update_begin(&stats->syncp); - stats->es.ps.tx_bytes += xdpf->len; + stats->es.ps.tx_bytes += nxmit_byte; stats->es.ps.tx_packets++; stats->es.ps.xdp_tx++; u64_stats_update_end(&stats->syncp); @@ -2178,11 +2221,11 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame, __netif_tx_lock(nq, cpu); for (i = 0; i < num_frame; i++) { - ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true); + ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte, + true); if (ret != MVNETA_XDP_TX) break; - nxmit_byte += frames[i]->len; nxmit++; } @@ -2205,7 +2248,6 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, struct bpf_prog *prog, struct xdp_buff *xdp, u32 frame_sz, struct mvneta_stats *stats) { - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); unsigned int len, data_len, sync; u32 ret, act; @@ -2226,7 +2268,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, err = xdp_do_redirect(pp->dev, xdp, prog); if (unlikely(err)) { - mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync); + mvneta_xdp_put_buff(pp, rxq, xdp, sync); ret = MVNETA_XDP_DROPPED; } else { ret = MVNETA_XDP_REDIR; @@ -2237,7 +2279,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, case XDP_TX: ret = mvneta_xdp_xmit_back(pp, xdp); if (ret != MVNETA_XDP_TX) - mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync); + mvneta_xdp_put_buff(pp, rxq, xdp, sync); break; default: bpf_warn_invalid_xdp_action(pp->dev, prog, act); @@ -2246,7 +2288,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, trace_xdp_exception(pp->dev, prog, act); fallthrough; case XDP_DROP: - mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync); + mvneta_xdp_put_buff(pp, rxq, xdp, sync); ret = MVNETA_XDP_DROPPED; stats->xdp_drop++; break; @@ -2269,7 +2311,6 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp, int data_len = -MVNETA_MH_SIZE, len; struct net_device *dev = pp->dev; enum dma_data_direction dma_dir; - struct skb_shared_info *sinfo; if (*size > MVNETA_MAX_RX_BUF_SIZE) { len = MVNETA_MAX_RX_BUF_SIZE; @@ -2289,11 +2330,9 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp, /* Prefetch header */ prefetch(data); + xdp_buff_clear_frags_flag(xdp); xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE, data_len, false); - - sinfo = xdp_get_shared_info_from_buff(xdp); - sinfo->nr_frags = 0; } static void @@ -2301,9 +2340,9 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, struct mvneta_rx_desc *rx_desc, struct mvneta_rx_queue *rxq, struct xdp_buff *xdp, int *size, - struct skb_shared_info *xdp_sinfo, struct page *page) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); struct net_device *dev = pp->dev; enum dma_data_direction dma_dir; int data_len, len; @@ -2321,25 +2360,25 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, len, dma_dir); rx_desc->buf_phys_addr = 0; - if (data_len > 0 && xdp_sinfo->nr_frags < MAX_SKB_FRAGS) { - skb_frag_t *frag = &xdp_sinfo->frags[xdp_sinfo->nr_frags++]; + if (!xdp_buff_has_frags(xdp)) + sinfo->nr_frags = 0; + + if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) { + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++]; skb_frag_off_set(frag, pp->rx_offset_correction); skb_frag_size_set(frag, data_len); __skb_frag_set_page(frag, page); + + if (!xdp_buff_has_frags(xdp)) { + sinfo->xdp_frags_size = *size; + xdp_buff_set_frags_flag(xdp); + } + if (page_is_pfmemalloc(page)) + xdp_buff_set_frag_pfmemalloc(xdp); } else { page_pool_put_full_page(rxq->page_pool, page, true); } - - /* last fragment */ - if (len == *size) { - struct skb_shared_info *sinfo; - - sinfo = xdp_get_shared_info_from_buff(xdp); - sinfo->nr_frags = xdp_sinfo->nr_frags; - memcpy(sinfo->frags, xdp_sinfo->frags, - sinfo->nr_frags * sizeof(skb_frag_t)); - } *size -= len; } @@ -2348,8 +2387,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, struct xdp_buff *xdp, u32 desc_status) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); - int i, num_frags = sinfo->nr_frags; struct sk_buff *skb; + u8 num_frags; + + if (unlikely(xdp_buff_has_frags(xdp))) + num_frags = sinfo->nr_frags; skb = build_skb(xdp->data_hard_start, PAGE_SIZE); if (!skb) @@ -2361,13 +2403,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, skb_put(skb, xdp->data_end - xdp->data); skb->ip_summed = mvneta_rx_csum(pp, desc_status); - for (i = 0; i < num_frags; i++) { - skb_frag_t *frag = &sinfo->frags[i]; - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - skb_frag_page(frag), skb_frag_off(frag), - skb_frag_size(frag), PAGE_SIZE); - } + if (unlikely(xdp_buff_has_frags(xdp))) + xdp_update_skb_shared_info(skb, num_frags, + sinfo->xdp_frags_size, + num_frags * xdp->frame_sz, + xdp_buff_is_frag_pfmemalloc(xdp)); return skb; } @@ -2379,7 +2419,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi, { int rx_proc = 0, rx_todo, refill, size = 0; struct net_device *dev = pp->dev; - struct skb_shared_info sinfo; struct mvneta_stats ps = {}; struct bpf_prog *xdp_prog; u32 desc_status, frame_sz; @@ -2388,8 +2427,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi, xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq); xdp_buf.data_hard_start = NULL; - sinfo.nr_frags = 0; - /* Get number of received packets */ rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq); @@ -2431,7 +2468,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi, } mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf, - &size, &sinfo, page); + &size, page); } /* Middle or Last descriptor */ if (!(rx_status & MVNETA_RXD_LAST_DESC)) @@ -2439,7 +2476,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi, continue; if (size) { - mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1); + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); goto next; } @@ -2451,7 +2488,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi, if (IS_ERR(skb)) { struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); - mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1); + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); u64_stats_update_begin(&stats->syncp); stats->es.skb_alloc_error++; @@ -2468,11 +2505,10 @@ static int mvneta_rx_swbm(struct napi_struct *napi, napi_gro_receive(napi, skb); next: xdp_buf.data_hard_start = NULL; - sinfo.nr_frags = 0; } if (xdp_buf.data_hard_start) - mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1); + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); if (ps.xdp_redirect) xdp_do_flush_map(); @@ -3260,7 +3296,8 @@ static int mvneta_create_page_pool(struct mvneta_port *pp, return err; } - err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0); + err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0, + PAGE_SIZE); if (err < 0) goto err_free_pp; @@ -3740,6 +3777,7 @@ static void mvneta_percpu_disable(void *arg) static int mvneta_change_mtu(struct net_device *dev, int mtu) { struct mvneta_port *pp = netdev_priv(dev); + struct bpf_prog *prog = pp->xdp_prog; int ret; if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) { @@ -3748,8 +3786,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8); } - if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) { - netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu); + if (prog && !prog->aux->xdp_has_frags && + mtu > MVNETA_MAX_RX_BUF_SIZE) { + netdev_info(dev, "Illegal MTU %d for XDP prog without frags\n", + mtu); + return -EINVAL; } @@ -3969,6 +4010,15 @@ static const struct phylink_pcs_ops mvneta_phylink_pcs_ops = { .pcs_an_restart = mvneta_pcs_an_restart, }; +static struct phylink_pcs *mvneta_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct mvneta_port *pp = netdev_priv(ndev); + + return &pp->phylink_pcs; +} + static int mvneta_mac_prepare(struct phylink_config *config, unsigned int mode, phy_interface_t interface) { @@ -4169,13 +4219,14 @@ static void mvneta_mac_link_up(struct phylink_config *config, mvneta_port_up(pp); if (phy && pp->eee_enabled) { - pp->eee_active = phy_init_eee(phy, 0) >= 0; + pp->eee_active = phy_init_eee(phy, false) >= 0; mvneta_set_eee(pp, pp->eee_active && pp->tx_lpi_enabled); } } static const struct phylink_mac_ops mvneta_phylink_ops = { .validate = phylink_generic_validate, + .mac_select_pcs = mvneta_mac_select_pcs, .mac_prepare = mvneta_mac_prepare, .mac_config = mvneta_mac_config, .mac_finish = mvneta_mac_finish, @@ -4490,8 +4541,9 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog, struct mvneta_port *pp = netdev_priv(dev); struct bpf_prog *old_prog; - if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) { - NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP"); + if (prog && !prog->aux->xdp_has_frags && + dev->mtu > MVNETA_MAX_RX_BUF_SIZE) { + NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags"); return -EOPNOTSUPP; } @@ -5321,26 +5373,62 @@ static int mvneta_probe(struct platform_device *pdev) if (!dev) return -ENOMEM; - dev->irq = irq_of_parse_and_map(dn, 0); - if (dev->irq == 0) - return -EINVAL; + dev->tx_queue_len = MVNETA_MAX_TXD; + dev->watchdog_timeo = 5 * HZ; + dev->netdev_ops = &mvneta_netdev_ops; + dev->ethtool_ops = &mvneta_eth_tool_ops; + + pp = netdev_priv(dev); + spin_lock_init(&pp->lock); + pp->dn = dn; + + pp->rxq_def = rxq_def; + pp->indir[0] = rxq_def; err = of_get_phy_mode(dn, &phy_mode); if (err) { dev_err(&pdev->dev, "incorrect phy-mode\n"); - goto err_free_irq; + return err; } + pp->phy_interface = phy_mode; + comphy = devm_of_phy_get(&pdev->dev, dn, NULL); - if (comphy == ERR_PTR(-EPROBE_DEFER)) { - err = -EPROBE_DEFER; - goto err_free_irq; - } else if (IS_ERR(comphy)) { + if (comphy == ERR_PTR(-EPROBE_DEFER)) + return -EPROBE_DEFER; + + if (IS_ERR(comphy)) comphy = NULL; + + pp->comphy = comphy; + + pp->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(pp->base)) + return PTR_ERR(pp->base); + + /* Get special SoC configurations */ + if (of_device_is_compatible(dn, "marvell,armada-3700-neta")) + pp->neta_armada3700 = true; + + dev->irq = irq_of_parse_and_map(dn, 0); + if (dev->irq == 0) + return -EINVAL; + + pp->clk = devm_clk_get(&pdev->dev, "core"); + if (IS_ERR(pp->clk)) + pp->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(pp->clk)) { + err = PTR_ERR(pp->clk); + goto err_free_irq; } - pp = netdev_priv(dev); - spin_lock_init(&pp->lock); + clk_prepare_enable(pp->clk); + + pp->clk_bus = devm_clk_get(&pdev->dev, "bus"); + if (!IS_ERR(pp->clk_bus)) + clk_prepare_enable(pp->clk_bus); + + pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops; pp->phylink_config.dev = &dev->dev; pp->phylink_config.type = PHYLINK_NETDEV; @@ -5377,55 +5465,16 @@ static int mvneta_probe(struct platform_device *pdev) phy_mode, &mvneta_phylink_ops); if (IS_ERR(phylink)) { err = PTR_ERR(phylink); - goto err_free_irq; - } - - dev->tx_queue_len = MVNETA_MAX_TXD; - dev->watchdog_timeo = 5 * HZ; - dev->netdev_ops = &mvneta_netdev_ops; - - dev->ethtool_ops = &mvneta_eth_tool_ops; - - pp->phylink = phylink; - pp->comphy = comphy; - pp->phy_interface = phy_mode; - pp->dn = dn; - - pp->rxq_def = rxq_def; - pp->indir[0] = rxq_def; - - /* Get special SoC configurations */ - if (of_device_is_compatible(dn, "marvell,armada-3700-neta")) - pp->neta_armada3700 = true; - - pp->clk = devm_clk_get(&pdev->dev, "core"); - if (IS_ERR(pp->clk)) - pp->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(pp->clk)) { - err = PTR_ERR(pp->clk); - goto err_free_phylink; - } - - clk_prepare_enable(pp->clk); - - pp->clk_bus = devm_clk_get(&pdev->dev, "bus"); - if (!IS_ERR(pp->clk_bus)) - clk_prepare_enable(pp->clk_bus); - - pp->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(pp->base)) { - err = PTR_ERR(pp->base); goto err_clk; } - pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops; - phylink_set_pcs(phylink, &pp->phylink_pcs); + pp->phylink = phylink; /* Alloc per-cpu port structure */ pp->ports = alloc_percpu(struct mvneta_pcpu_port); if (!pp->ports) { err = -ENOMEM; - goto err_clk; + goto err_free_phylink; } /* Alloc per-cpu stats */ @@ -5569,12 +5618,12 @@ err_netdev: free_percpu(pp->stats); err_free_ports: free_percpu(pp->ports); -err_clk: - clk_disable_unprepare(pp->clk_bus); - clk_disable_unprepare(pp->clk); err_free_phylink: if (pp->phylink) phylink_destroy(pp->phylink); +err_clk: + clk_disable_unprepare(pp->clk_bus); + clk_disable_unprepare(pp->clk); err_free_irq: irq_dispose_mapping(dev->irq); return err; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 66da31f30d3e..92c0ddb602ca 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -222,8 +222,11 @@ EXPORT_SYMBOL(otx2_set_mac_address); int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu) { struct nix_frs_cfg *req; + u16 maxlen; int err; + maxlen = otx2_get_max_mtu(pfvf) + OTX2_ETH_HLEN + OTX2_HW_TIMESTAMP_LEN; + mutex_lock(&pfvf->mbox.lock); req = otx2_mbox_alloc_msg_nix_set_hw_frs(&pfvf->mbox); if (!req) { @@ -233,6 +236,10 @@ int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu) req->maxlen = pfvf->netdev->mtu + OTX2_ETH_HLEN + OTX2_HW_TIMESTAMP_LEN; + /* Use max receive length supported by hardware for loopback devices */ + if (is_otx2_lbkvf(pfvf->pdev)) + req->maxlen = maxlen; + err = otx2_sync_mbox_msg(&pfvf->mbox); mutex_unlock(&pfvf->mbox.lock); return err; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 14509fc64cce..56be20053931 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -178,6 +178,9 @@ struct otx2_hw { u16 rqpool_cnt; u16 sqpool_cnt; +#define OTX2_DEFAULT_RBUF_LEN 2048 + u16 rbuf_len; + /* NPA */ u32 stack_pg_ptrs; /* No of ptrs per stack page */ u32 stack_pg_bytes; /* Size of stack page */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index d85db90632d6..abe5267210ef 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -371,6 +371,7 @@ static void otx2_get_ringparam(struct net_device *netdev, ring->rx_pending = qs->rqe_cnt ? qs->rqe_cnt : Q_COUNT(Q_SIZE_256); ring->tx_max_pending = Q_COUNT(Q_SIZE_MAX); ring->tx_pending = qs->sqe_cnt ? qs->sqe_cnt : Q_COUNT(Q_SIZE_4K); + kernel_ring->rx_buf_len = pfvf->hw.rbuf_len; } static int otx2_set_ringparam(struct net_device *netdev, @@ -379,6 +380,8 @@ static int otx2_set_ringparam(struct net_device *netdev, struct netlink_ext_ack *extack) { struct otx2_nic *pfvf = netdev_priv(netdev); + u32 rx_buf_len = kernel_ring->rx_buf_len; + u32 old_rx_buf_len = pfvf->hw.rbuf_len; bool if_up = netif_running(netdev); struct otx2_qset *qs = &pfvf->qset; u32 rx_count, tx_count; @@ -386,6 +389,15 @@ static int otx2_set_ringparam(struct net_device *netdev, if (ring->rx_mini_pending || ring->rx_jumbo_pending) return -EINVAL; + /* Hardware supports max size of 32k for a receive buffer + * and 1536 is typical ethernet frame size. + */ + if (rx_buf_len && (rx_buf_len < 1536 || rx_buf_len > 32768)) { + netdev_err(netdev, + "Receive buffer range is 1536 - 32768"); + return -EINVAL; + } + /* Permitted lengths are 16 64 256 1K 4K 16K 64K 256K 1M */ rx_count = ring->rx_pending; /* On some silicon variants a skid or reserved CQEs are @@ -403,7 +415,8 @@ static int otx2_set_ringparam(struct net_device *netdev, Q_COUNT(Q_SIZE_4K), Q_COUNT(Q_SIZE_MAX)); tx_count = Q_COUNT(Q_SIZE(tx_count, 3)); - if (tx_count == qs->sqe_cnt && rx_count == qs->rqe_cnt) + if (tx_count == qs->sqe_cnt && rx_count == qs->rqe_cnt && + rx_buf_len == old_rx_buf_len) return 0; if (if_up) @@ -413,6 +426,8 @@ static int otx2_set_ringparam(struct net_device *netdev, qs->sqe_cnt = tx_count; qs->rqe_cnt = rx_count; + pfvf->hw.rbuf_len = rx_buf_len; + if (if_up) return netdev->netdev_ops->ndo_open(netdev); @@ -1207,6 +1222,7 @@ end: static const struct ethtool_ops otx2_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, + .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN, .get_link = otx2_get_link, .get_drvinfo = otx2_get_drvinfo, .get_strings = otx2_get_strings, @@ -1326,6 +1342,7 @@ static int otx2vf_get_link_ksettings(struct net_device *netdev, static const struct ethtool_ops otx2vf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, + .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN, .get_link = otx2_get_link, .get_drvinfo = otx2vf_get_drvinfo, .get_strings = otx2vf_get_strings, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index d39341e4ab37..86c1c2f77bd7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1311,6 +1311,9 @@ static int otx2_get_rbuf_size(struct otx2_nic *pf, int mtu) int total_size; int rbuf_size; + if (pf->hw.rbuf_len) + return ALIGN(pf->hw.rbuf_len, OTX2_ALIGN) + OTX2_HEAD_ROOM; + /* The data transferred by NIX to memory consists of actual packet * plus additional data which has timestamp and/or EDSA/HIGIG2 * headers if interface is configured in corresponding modes. @@ -2625,6 +2628,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) hw->tx_queues = qcount; hw->tot_tx_queues = qcount; hw->max_queues = qcount; + hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN; num_vec = pci_msix_vec_count(pdev); hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 925b74ebb8b0..d96c8903c67e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -586,6 +586,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) hw->tx_queues = qcount; hw->max_queues = qcount; hw->tot_tx_queues = qcount; + hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN; hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE, GFP_KERNEL); diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 89ca7960b225..4cd0747edaff 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1556,6 +1556,7 @@ static int mtk_star_probe(struct platform_device *pdev) return devm_register_netdev(dev, ndev); } +#ifdef CONFIG_OF static const struct of_device_id mtk_star_of_match[] = { { .compatible = "mediatek,mt8516-eth", }, { .compatible = "mediatek,mt8518-eth", }, @@ -1563,6 +1564,7 @@ static const struct of_device_id mtk_star_of_match[] = { { } }; MODULE_DEVICE_TABLE(of, mtk_star_of_match); +#endif static SIMPLE_DEV_PM_OPS(mtk_star_pm_ops, mtk_star_suspend, mtk_star_resume); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c index b0de6b999675..2b53738938a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -20,7 +21,7 @@ tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state, { attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->flags |= MLX5_ESW_ATTR_FLAG_ACCEPT; + attr->flags |= MLX5_ATTR_FLAG_ACCEPT; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index 26efa33de56f..bfbc91c116a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -16,12 +16,12 @@ struct mlx5e_tc_act_parse_state { unsigned int num_actions; struct mlx5e_tc_flow *flow; struct netlink_ext_ack *extack; + bool ct; bool encap; bool decap; bool mpls_push; bool ptype_host; const struct ip_tunnel_info *tun_info; - struct pedit_headers_action hdrs[__PEDIT_CMD_MAX]; int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; int if_count; struct mlx5_tc_ct_priv *ct_priv; @@ -30,7 +30,8 @@ struct mlx5e_tc_act_parse_state { struct mlx5e_tc_act { bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index); + int act_index, + struct mlx5_flow_attr *attr); int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c index 29920ef0180a..c0f08ae6a57f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c @@ -38,11 +38,12 @@ csum_offload_supported(struct mlx5e_priv *priv, static bool tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct mlx5e_tc_flow *flow = parse_state->flow; - return csum_offload_supported(flow->priv, flow->attr->action, + return csum_offload_supported(flow->priv, attr->action, act->csum_flags, parse_state->extack); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c index 06ec30cdb269..85f0cb88127f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -8,8 +8,10 @@ static bool tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { + bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; struct netlink_ext_ack *extack = parse_state->extack; if (flow_flag_test(parse_state->flow, SAMPLE)) { @@ -18,6 +20,11 @@ tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state, return false; } + if (parse_state->ct && !clear_action) { + NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supoported"); + return false; + } + return true; } @@ -27,6 +34,7 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { + bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; int err; err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr, @@ -35,11 +43,16 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, if (err) return err; - flow_flag_set(parse_state->flow, CT); if (mlx5e_is_eswitch_flow(parse_state->flow)) attr->esw_attr->split_count = attr->esw_attr->out_count; + if (!clear_action) { + attr->flags |= MLX5_ATTR_FLAG_CT; + flow_flag_set(parse_state->flow, CT); + parse_state->ct = true; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c index 2e29a23bed12..3d5f23636a02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c index f44515061228..fb1be822ad25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c @@ -8,6 +8,7 @@ static int validate_goto_chain(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { @@ -32,7 +33,7 @@ validate_goto_chain(struct mlx5e_priv *priv, } if (!mlx5_chains_backwards_supported(chains) && - dest_chain <= flow->attr->chain) { + dest_chain <= attr->chain) { NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported"); return -EOPNOTSUPP; } @@ -43,8 +44,8 @@ validate_goto_chain(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (flow->attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | - MLX5_FLOW_CONTEXT_ACTION_DECAP) && + if (attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_DECAP) && !reformat_and_fwd) { NL_SET_ERR_MSG_MOD(extack, "Goto chain is not allowed if action has reformat or decap"); @@ -57,12 +58,13 @@ validate_goto_chain(struct mlx5e_priv *priv, static bool tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; struct mlx5e_tc_flow *flow = parse_state->flow; - if (validate_goto_chain(flow->priv, flow, act, extack)) + if (validate_goto_chain(flow->priv, flow, attr, act, extack)) return false; return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c index d775c3d9edf3..e8d227595b3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) { NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index c614fc7fdc9c..99fb98b3e71b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -99,7 +99,8 @@ get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev) static bool tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; struct mlx5e_tc_flow *flow = parse_state->flow; @@ -108,8 +109,8 @@ tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv = flow->priv; struct mlx5_esw_flow_attr *esw_attr; - parse_attr = flow->attr->parse_attr; - esw_attr = flow->attr->esw_attr; + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; if (!out_dev) { /* out_dev is NULL when filters with diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c index 2c74567b6d25..16681cf6e93e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; struct mlx5e_tc_flow *flow = parse_state->flow; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c index 784fc4f68b1e..40332949509a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c @@ -8,7 +8,8 @@ static bool tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; struct mlx5e_priv *priv = parse_state->flow->priv; @@ -36,13 +37,13 @@ tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, static bool tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; - struct mlx5e_tc_flow *flow = parse_state->flow; struct net_device *filter_dev; - filter_dev = flow->attr->parse_attr->filter_dev; + filter_dev = attr->parse_attr->filter_dev; /* we only support mpls pop if it is the first action * and the filter net device is bareudp. Subsequent diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c index 79addbbef087..39f8f71bed9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c @@ -46,9 +46,9 @@ static int parse_pedit_to_modify_hdr(struct mlx5e_priv *priv, const struct flow_action_entry *act, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { + struct pedit_headers_action *hdrs = parse_attr->hdrs; u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; u8 htype = act->mangle.htype; int err = -EOPNOTSUPP; @@ -110,20 +110,20 @@ int mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { if (flow && flow_flag_test(flow, L3_TO_L2_DECAP)) return parse_pedit_to_reformat(act, parse_attr, extack); - return parse_pedit_to_modify_hdr(priv, act, namespace, parse_attr, hdrs, extack); + return parse_pedit_to_modify_hdr(priv, act, namespace, parse_attr, extack); } static bool tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -141,8 +141,7 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state, ns_type = mlx5e_get_flow_namespace(flow); - err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, - attr->parse_attr, parse_state->hdrs, + err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr, flow, parse_state->extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h index da8ab03af58f..258f030a2dc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h @@ -25,7 +25,6 @@ int mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c index 0819110193dc..6454b031ff7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c index 1c32e24e528d..9dd244147385 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c @@ -7,16 +7,16 @@ static bool tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; - struct mlx5e_tc_flow *flow = parse_state->flow; struct mlx5e_tc_flow_parse_attr *parse_attr; struct net_device *out_dev = act->dev; struct mlx5_esw_flow_attr *esw_attr; - parse_attr = flow->attr->parse_attr; - esw_attr = flow->attr->esw_attr; + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; if (!out_dev) return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c index 6699bdf5cf01..539fea13ce9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c @@ -8,7 +8,8 @@ static bool tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; @@ -27,11 +28,7 @@ tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - struct mlx5e_sample_attr *sample_attr; - - sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL); - if (!sample_attr) - return -ENOMEM; + struct mlx5e_sample_attr *sample_attr = &attr->sample_attr; sample_attr->rate = act->sample.rate; sample_attr->group_num = act->sample.psample_group->group_num; @@ -39,7 +36,7 @@ tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state, if (act->sample.truncate) sample_attr->trunc_size = act->sample.trunc_size; - attr->sample_attr = sample_attr; + attr->flags |= MLX5_ATTR_FLAG_SAMPLE; flow_flag_set(parse_state->flow, SAMPLE); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c index 046b64c2cec4..9ea293fdc434 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; @@ -27,7 +28,7 @@ tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state, { attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c index 6f4a2cf46afd..b4fa2de9711d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c @@ -8,7 +8,8 @@ static bool tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { if (!act->tunnel) { NL_SET_ERR_MSG_MOD(parse_state->extack, @@ -34,7 +35,8 @@ tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, static bool tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c index 70fc0c2d8813..6378b7558ba2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -9,7 +9,6 @@ static int add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, u32 *action, struct netlink_ext_ack *extack) { const struct flow_action_entry prio_tag_act = { @@ -26,7 +25,7 @@ add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, }; return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, - &prio_tag_act, parse_attr, hdrs, action, + &prio_tag_act, parse_attr, action, extack); } @@ -151,7 +150,8 @@ mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, static bool tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -170,8 +170,8 @@ tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, /* Replace vlan pop+push with vlan modify */ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act, - attr->parse_attr, parse_state->hdrs, - &attr->action, parse_state->extack); + attr->parse_attr, &attr->action, + parse_state->extack); } else { err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action, parse_state->extack); @@ -191,7 +191,6 @@ tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5_flow_attr *attr) { struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; - struct pedit_headers_action *hdrs = parse_state->hdrs; struct netlink_ext_ack *extack = parse_state->extack; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int err; @@ -202,7 +201,7 @@ tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, * tag rewrite. */ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; - err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs, + err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, &attr->action, extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h index 3d62f13ab61f..2fa58c6f44eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h @@ -24,7 +24,6 @@ int mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, const struct flow_action_entry *act, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, u32 *action, struct netlink_ext_ack *extack); #endif /* __MLX5_EN_TC_ACT_VLAN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c index 63e36e7f53e3..28444d4ffd73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c @@ -12,7 +12,6 @@ int mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, const struct flow_action_entry *act, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, u32 *action, struct netlink_ext_ack *extack) { u16 mask16 = VLAN_VID_MASK; @@ -44,7 +43,7 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, return -EOPNOTSUPP; } - err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr, hdrs, + err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr, NULL, extack); *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; @@ -54,7 +53,8 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, static bool tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -69,8 +69,7 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, int err; ns_type = mlx5e_get_flow_namespace(parse_state->flow); - err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, - attr->parse_attr, parse_state->hdrs, + err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, attr->parse_attr, &attr->action, parse_state->extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 31b4e39be2d3..9e0e229cf164 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -101,6 +101,7 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at post_attr->inner_match_level = MLX5_MATCH_NONE; post_attr->outer_match_level = MLX5_MATCH_NONE; post_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); + post_attr->flags &= ~MLX5_ATTR_FLAG_SAMPLE; handle->ns_type = post_act->ns_type; /* Splits were handled before post action */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c index ff4b4f8a5a9d..32230e677029 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -403,7 +403,7 @@ add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, post_attr->chain = 0; post_attr->prio = 0; post_attr->ft = default_tbl; - post_attr->flags = MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + post_attr->flags = MLX5_ATTR_FLAG_NO_IN_PORT; /* When offloading sample and encap action, if there is no valid * neigh data struct, a slow path rule is offloaded first. Source @@ -492,8 +492,7 @@ del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, struct mlx5_flow_handle * mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - u32 tunnel_id) + struct mlx5_flow_attr *attr) { struct mlx5e_post_act_handle *post_act_handle = NULL; struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; @@ -502,6 +501,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sample_flow *sample_flow; struct mlx5e_sample_attr *sample_attr; struct mlx5_flow_attr *pre_attr; + u32 tunnel_id = attr->tunnel_id; struct mlx5_eswitch *esw; u32 default_tbl_id; u32 obj_id; @@ -513,7 +513,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL); if (!sample_flow) return ERR_PTR(-ENOMEM); - sample_attr = attr->sample_attr; + sample_attr = &attr->sample_attr; sample_attr->sample_flow = sample_flow; /* For NICs with reg_c_preserve support or decap action, use @@ -546,6 +546,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, err = PTR_ERR(sample_flow->sampler); goto err_sampler; } + sample_attr->sampler_id = sample_flow->sampler->sampler_id; /* Create an id mapping reg_c0 value to sample object. */ restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE; @@ -580,13 +581,12 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, if (tunnel_id) pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; pre_attr->modify_hdr = sample_flow->restore->modify_hdr; - pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE; + pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE; pre_attr->inner_match_level = attr->inner_match_level; pre_attr->outer_match_level = attr->outer_match_level; pre_attr->chain = attr->chain; pre_attr->prio = attr->prio; - pre_attr->sample_attr = attr->sample_attr; - sample_attr->sampler_id = sample_flow->sampler->sampler_id; + pre_attr->sample_attr = *sample_attr; pre_esw_attr = pre_attr->esw_attr; pre_esw_attr->in_mdev = esw_attr->in_mdev; pre_esw_attr->in_rep = esw_attr->in_rep; @@ -633,11 +633,11 @@ mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, * will hit fw syndromes. */ esw = tc_psample->esw; - sample_flow = attr->sample_attr->sample_flow; + sample_flow = attr->sample_attr.sample_flow; mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr); sample_restore_put(tc_psample, sample_flow->restore); - mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id); + mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id); sampler_put(tc_psample, sample_flow->sampler); if (sample_flow->post_act_handle) mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h index 9ef8a49d7801..a569367eae4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h @@ -26,8 +26,7 @@ void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj struct mlx5_flow_handle * mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - u32 tunnel_id); + struct mlx5_flow_attr *attr); void mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv, @@ -45,8 +44,7 @@ mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample); static inline struct mlx5_flow_handle * mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - u32 tunnel_id) + struct mlx5_flow_attr *attr) { return ERR_PTR(-EOPNOTSUPP); } static inline void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 4a0d38d219ed..0f4d3b9dd979 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -809,7 +809,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; attr->outer_match_level = MLX5_MATCH_L4; attr->counter = entry->counter->counter; - attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) attr->esw_attr->in_mdev = priv->mdev; @@ -1787,7 +1787,6 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) */ static struct mlx5_flow_handle * __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *orig_spec, struct mlx5_flow_attr *attr) { @@ -1871,12 +1870,10 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, */ if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && attr->chain == 0) { - u32 tun_id = mlx5e_tc_get_flow_tun_id(flow); - err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type, TUNNEL_TO_REG, - tun_id); + attr->tunnel_id); if (err) { ct_dbg("Failed to set tunnel register mapping"); goto err_mapping; @@ -1926,87 +1923,19 @@ err_ft: return ERR_PTR(err); } -static struct mlx5_flow_handle * -__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5_flow_spec *orig_spec, - struct mlx5_flow_attr *attr, - struct mlx5e_tc_mod_hdr_acts *mod_acts) -{ - struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); - struct mlx5_flow_attr *pre_ct_attr; - struct mlx5_modify_hdr *mod_hdr; - struct mlx5_flow_handle *rule; - struct mlx5_ct_flow *ct_flow; - int err; - - ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); - if (!ct_flow) - return ERR_PTR(-ENOMEM); - - /* Base esw attributes on original rule attribute */ - pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); - if (!pre_ct_attr) { - err = -ENOMEM; - goto err_attr; - } - - memcpy(pre_ct_attr, attr, attr_sz); - - mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, - mod_acts->num_actions, - mod_acts->actions); - if (IS_ERR(mod_hdr)) { - err = PTR_ERR(mod_hdr); - ct_dbg("Failed to add create ct clear mod hdr"); - goto err_mod_hdr; - } - - pre_ct_attr->modify_hdr = mod_hdr; - - rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - ct_dbg("Failed to add ct clear rule"); - goto err_insert; - } - - attr->ct_attr.ct_flow = ct_flow; - ct_flow->pre_ct_attr = pre_ct_attr; - ct_flow->pre_ct_rule = rule; - return rule; - -err_insert: - mlx5_modify_header_dealloc(priv->mdev, mod_hdr); -err_mod_hdr: - netdev_warn(priv->netdev, - "Failed to offload ct clear flow, err %d\n", err); - kfree(pre_ct_attr); -err_attr: - kfree(ct_flow); - - return ERR_PTR(err); -} - struct mlx5_flow_handle * mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) { - bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; struct mlx5_flow_handle *rule; if (!priv) return ERR_PTR(-EOPNOTSUPP); mutex_lock(&priv->control_lock); - - if (clear_action) - rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts); - else - rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr); + rule = __mlx5_tc_ct_flow_offload(priv, spec, attr); mutex_unlock(&priv->control_lock); return rule; @@ -2014,14 +1943,13 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, static void __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5e_tc_flow *flow, - struct mlx5_ct_flow *ct_flow) + struct mlx5_ct_flow *ct_flow, + struct mlx5_flow_attr *attr) { struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr; struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, - pre_ct_attr); + mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr); mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); if (ct_flow->post_act_handle) { @@ -2036,7 +1964,6 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) { struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; @@ -2048,7 +1975,7 @@ mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, return; mutex_lock(&priv->control_lock); - __mlx5_tc_ct_delete_flow(priv, flow, ct_flow); + __mlx5_tc_ct_delete_flow(priv, ct_flow, attr); mutex_unlock(&priv->control_lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 99662af1e41a..2b21c7b97a52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -116,13 +116,11 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_handle * mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr); bool @@ -183,7 +181,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, static inline struct mlx5_flow_handle * mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) @@ -193,7 +190,6 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, static inline void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) { } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index f832c26ff2c3..9ffba584b982 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -37,6 +37,7 @@ struct mlx5e_tc_flow_parse_attr { const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; + struct pedit_headers_action hdrs[__PEDIT_CMD_MAX]; struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; struct ethhdr eth; @@ -107,10 +108,19 @@ struct mlx5e_tc_flow { struct rcu_head rcu_head; struct completion init_done; struct completion del_hw_done; - int tunnel_id; /* the mapped tunnel id of this flow */ struct mlx5_flow_attr *attr; }; +struct mlx5_flow_handle * +mlx5e_tc_rule_offload(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); + +void +mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer); struct mlx5_flow_handle * @@ -173,6 +183,7 @@ struct mlx5_flow_handle * mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec); + void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 9918ed8c059b..1f8d339ff0c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -488,12 +488,14 @@ static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, int out_index); void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index) + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + int out_index) { struct mlx5e_encap_entry *e = flow->encaps[out_index].e; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - if (flow->attr->esw_attr->dests[out_index].flags & + if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) mlx5e_detach_encap_route(priv, flow, out_index); @@ -733,6 +735,7 @@ static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct mlx5e_encap_entry *e, bool new_encap_entry, unsigned long tbl_time_before, @@ -740,6 +743,7 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, @@ -748,7 +752,6 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; unsigned long tbl_time_before = 0; struct mlx5e_encap_entry *e; @@ -834,8 +837,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, e->compl_result = 1; attach_flow: - err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before, - out_index); + err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created, + tbl_time_before, out_index); if (err) goto out_err; @@ -1198,6 +1201,7 @@ out: static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct mlx5e_encap_entry *e, bool new_encap_entry, unsigned long tbl_time_before, @@ -1206,7 +1210,6 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; unsigned long tbl_time_after = tbl_time_before; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; struct mlx5_esw_flow_attr *esw_attr; struct mlx5e_route_entry *r; @@ -1377,7 +1380,7 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, continue; } - err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); + err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); if (err) { mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h index 3391504d9a08..d542b8476491 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -7,15 +7,19 @@ #include "tc_priv.h" void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index); + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + int out_index); int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, struct net_device **encap_dev, bool *encap_valid); + int mlx5e_attach_decap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2022fa4a9598..099d4ce16049 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -295,13 +295,62 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv, if (is_mdev_switchdev_mode(priv->mdev)) { mlx5_eswitch_del_offloaded_rule(esw, rule, attr); - return; } mlx5e_del_offloaded_nic_rule(priv, rule, attr); } +struct mlx5_flow_handle * +mlx5e_tc_rule_offload(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (attr->flags & MLX5_ATTR_FLAG_CT) { + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = + &attr->parse_attr->mod_hdr_acts; + + return mlx5_tc_ct_flow_offload(get_ct_priv(priv), + spec, attr, + mod_hdr_acts); + } + + if (!is_mdev_switchdev_mode(priv->mdev)) + return mlx5e_add_offloaded_nic_rule(priv, spec, attr); + + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) + return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr); + + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); +} + +void +mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (attr->flags & MLX5_ATTR_FLAG_CT) { + mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr); + return; + } + + if (!is_mdev_switchdev_mode(priv->mdev)) { + mlx5e_del_offloaded_nic_rule(priv, rule, attr); + return; + } + + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) { + mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr); + return; + } + + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); +} + int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, @@ -1039,6 +1088,21 @@ err_ft_get: } static int +alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev, + struct mlx5_flow_attr *attr) + +{ + struct mlx5_fc *counter; + + counter = mlx5_fc_create(counter_dev, true); + if (IS_ERR(counter)) + return PTR_ERR(counter); + + attr->counter = counter; + return 0; +} + +static int mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) @@ -1046,7 +1110,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_core_dev *dev = priv->mdev; - struct mlx5_fc *counter; int err; parse_attr = attr->parse_attr; @@ -1058,11 +1121,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(dev, true); - if (IS_ERR(counter)) - return PTR_ERR(counter); - - attr->counter = counter; + err = alloc_flow_attr_counter(dev, attr); + if (err) + return err; } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { @@ -1072,8 +1133,8 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, return err; } - if (flow_flag_test(flow, CT)) - flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec, + if (attr->flags & MLX5_ATTR_FLAG_CT) + flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec, attr, &parse_attr->mod_hdr_acts); else flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, @@ -1107,8 +1168,8 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, flow_flag_clear(flow, OFFLOADED); - if (flow_flag_test(flow, CT)) - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); + if (attr->flags & MLX5_ATTR_FLAG_CT) + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); else if (!IS_ERR_OR_NULL(flow->rule[0])) mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr); @@ -1142,40 +1203,27 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr) { - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; struct mlx5_flow_handle *rule; - if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) + if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); - if (flow_flag_test(flow, CT)) { - mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; - - rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv), - flow, spec, attr, - mod_hdr_acts); - } else if (flow_flag_test(flow, SAMPLE)) { - rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr, - mlx5e_tc_get_flow_tun_id(flow)); - } else { - rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); - } + rule = mlx5e_tc_rule_offload(flow->priv, spec, attr); if (IS_ERR(rule)) return rule; if (attr->esw_attr->split_count) { flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); - if (IS_ERR(flow->rule[1])) { - if (flow_flag_test(flow, CT)) - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); - else - mlx5_eswitch_del_offloaded_rule(esw, rule, attr); - return flow->rule[1]; - } + if (IS_ERR(flow->rule[1])) + goto err_rule1; } return rule; + +err_rule1: + mlx5e_tc_rule_unoffload(flow->priv, rule, attr); + return flow->rule[1]; } void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, @@ -1184,19 +1232,13 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, { flow_flag_clear(flow, OFFLOADED); - if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) - goto offload_rule_0; + if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) + return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); if (attr->esw_attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); - if (flow_flag_test(flow, CT)) - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); - else if (flow_flag_test(flow, SAMPLE)) - mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); - else -offload_rule_0: - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr); } struct mlx5_flow_handle * @@ -1214,7 +1256,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; slow_attr->esw_attr->split_count = 0; - slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); if (!IS_ERR(rule)) @@ -1239,7 +1281,7 @@ void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; slow_attr->esw_attr->split_count = 0; - slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); flow_flag_clear(flow, SLOW); kfree(slow_attr); @@ -1348,10 +1390,10 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro } int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow) + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) { - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts; + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; struct mlx5_modify_hdr *mod_hdr; mod_hdr = mlx5_modify_header_alloc(priv->mdev, @@ -1361,13 +1403,101 @@ int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, if (IS_ERR(mod_hdr)) return PTR_ERR(mod_hdr); - WARN_ON(flow->attr->modify_hdr); - flow->attr->modify_hdr = mod_hdr; + WARN_ON(attr->modify_hdr); + attr->modify_hdr = mod_hdr; return 0; } static int +set_encap_dests(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack, + bool *encap_valid, + bool *vf_tun) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_esw_flow_attr *esw_attr; + struct net_device *encap_dev = NULL; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *out_priv; + int out_index; + int err = 0; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + *vf_tun = false; + *encap_valid = true; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + struct net_device *out_dev; + int mirred_ifindex; + + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + mirred_ifindex = parse_attr->mirred_ifindex[out_index]; + out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); + if (!out_dev) { + NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); + err = -ENODEV; + goto out; + } + err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, + extack, &encap_dev, encap_valid); + dev_put(out_dev); + if (err) + goto out; + + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && + !esw_attr->dest_int_port) + *vf_tun = true; + + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + esw_attr->dests[out_index].rep = rpriv->rep; + esw_attr->dests[out_index].mdev = out_priv->mdev; + } + + if (*vf_tun && esw_attr->out_count > 1) { + NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); + err = -EOPNOTSUPP; + goto out; + } + +out: + return err; +} + +static void +clean_encap_dests(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + bool *vf_tun) +{ + struct mlx5_esw_flow_attr *esw_attr; + int out_index; + + esw_attr = attr->esw_attr; + *vf_tun = false; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && + !esw_attr->dest_int_port) + *vf_tun = true; + + mlx5e_detach_encap(priv, flow, attr, out_index); + kfree(attr->parse_attr->tun_info[out_index]); + } +} + +static int mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) @@ -1375,15 +1505,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; - bool vf_tun = false, encap_valid = true; - struct net_device *encap_dev = NULL; struct mlx5_esw_flow_attr *esw_attr; - struct mlx5e_rep_priv *rpriv; - struct mlx5e_priv *out_priv; - struct mlx5_fc *counter; + bool vf_tun, encap_valid; u32 max_prio, max_chain; int err = 0; - int out_index; parse_attr = attr->parse_attr; esw_attr = attr->esw_attr; @@ -1472,50 +1597,17 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, esw_attr->int_port = int_port; } - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { - struct net_device *out_dev; - int mirred_ifindex; - - if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) - continue; - - mirred_ifindex = parse_attr->mirred_ifindex[out_index]; - out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); - if (!out_dev) { - NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); - err = -ENODEV; - goto err_out; - } - err = mlx5e_attach_encap(priv, flow, out_dev, out_index, - extack, &encap_dev, &encap_valid); - dev_put(out_dev); - if (err) - goto err_out; - - if (esw_attr->dests[out_index].flags & - MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && - !esw_attr->dest_int_port) - vf_tun = true; - out_priv = netdev_priv(encap_dev); - rpriv = out_priv->ppriv; - esw_attr->dests[out_index].rep = rpriv->rep; - esw_attr->dests[out_index].mdev = out_priv->mdev; - } - - if (vf_tun && esw_attr->out_count > 1) { - NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); - err = -EOPNOTSUPP; + err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun); + if (err) goto err_out; - } err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) goto err_out; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && - !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { if (vf_tun) { - err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); + err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); if (err) goto err_out; } else { @@ -1526,13 +1618,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(esw_attr->counter_dev, true); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); + err = alloc_flow_attr_counter(esw_attr->counter_dev, attr); + if (err) goto err_out; - } - - attr->counter = counter; } /* we get here if one of the following takes place: @@ -1576,8 +1664,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; - bool vf_tun = false; - int out_index; + bool vf_tun; esw_attr = attr->esw_attr; mlx5e_put_flow_tunnel_id(flow); @@ -1601,16 +1688,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow->decap_route) mlx5e_detach_decap_route(priv, flow); - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { - if (esw_attr->dests[out_index].flags & - MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && - !esw_attr->dest_int_port) - vf_tun = true; - if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { - mlx5e_detach_encap(priv, flow, out_index); - kfree(attr->parse_attr->tun_info[out_index]); - } - } + clean_encap_dests(priv, flow, attr, &vf_tun); mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); @@ -1634,7 +1712,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); - kfree(attr->sample_attr); kvfree(attr->esw_attr->rx_tun_attr); kvfree(attr->parse_attr); kfree(flow->attr); @@ -1854,7 +1931,7 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } - flow->tunnel_id = value; + flow->attr->tunnel_id = value; return 0; err_set: @@ -1868,8 +1945,8 @@ err_enc_opts: static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) { - u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK; - u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS; + u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK; + u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS; struct mlx5_rep_uplink_priv *uplink_priv; struct mlx5e_rep_priv *uplink_rpriv; struct mlx5_eswitch *esw; @@ -1885,11 +1962,6 @@ static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) enc_opts_id); } -u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow) -{ - return flow->tunnel_id; -} - void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, struct flow_match_basic *match, bool outer, void *headers_c, void *headers_v) @@ -2811,14 +2883,15 @@ static unsigned long mask_to_le(unsigned long mask, int size) return mask; } + static int offload_pedit_fields(struct mlx5e_priv *priv, int namespace, - struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, u32 *action_flags, struct netlink_ext_ack *extack) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; + struct pedit_headers_action *hdrs = parse_attr->hdrs; void *headers_c, *headers_v, *action, *vals_p; u32 *s_masks_p, *a_masks_p, s_mask, a_mask; struct mlx5e_tc_mod_hdr_acts *mod_acts; @@ -2944,35 +3017,43 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, static const struct pedit_headers zero_masks = {}; -static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, - u32 *action_flags, - struct netlink_ext_ack *extack) +static int verify_offload_pedit_fields(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct netlink_ext_ack *extack) { struct pedit_headers *cmd_masks; - int err; u8 cmd; - err = offload_pedit_fields(priv, namespace, hdrs, parse_attr, - action_flags, extack); - if (err < 0) - goto out_dealloc_parsed_actions; - for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { - cmd_masks = &hdrs[cmd].masks; + cmd_masks = &parse_attr->hdrs[cmd].masks; if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { - NL_SET_ERR_MSG_MOD(extack, - "attempt to offload an unsupported field"); + NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field"); netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, 16, 1, cmd_masks, sizeof(zero_masks), true); - err = -EOPNOTSUPP; - goto out_dealloc_parsed_actions; + return -EOPNOTSUPP; } } return 0; +} + +static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action_flags, + struct netlink_ext_ack *extack) +{ + int err; + + err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack); + if (err) + goto out_dealloc_parsed_actions; + + err = verify_offload_pedit_fields(priv, parse_attr, extack); + if (err) + goto out_dealloc_parsed_actions; + + return 0; out_dealloc_parsed_actions: mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); @@ -3257,7 +3338,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, return -EOPNOTSUPP; } - if (!tc_act->can_offload(parse_state, act, i)) + if (!tc_act->can_offload(parse_state, act, i, attr)) return -EOPNOTSUPP; err = tc_act->parse_action(parse_state, act, priv, attr); @@ -3268,7 +3349,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, flow_action_for_each(i, act, flow_action) { tc_act = mlx5e_tc_act_get(act->id, ns_type); if (!tc_act || !tc_act->post_parse || - !tc_act->can_offload(parse_state, act, i)) + !tc_act->can_offload(parse_state, act, i, attr)) continue; err = tc_act->post_parse(parse_state, priv, attr); @@ -3283,10 +3364,10 @@ static int actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, - struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct pedit_headers_action *hdrs = parse_attr->hdrs; enum mlx5_flow_namespace_type ns_type; int err; @@ -3296,8 +3377,7 @@ actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, ns_type = mlx5e_get_flow_namespace(flow); - err = alloc_tc_pedit_action(priv, ns_type, parse_attr, hdrs, - &attr->action, extack); + err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack); if (err) return err; @@ -3345,7 +3425,6 @@ parse_tc_nic_actions(struct mlx5e_priv *priv, struct mlx5e_tc_act_parse_state *parse_state; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; - struct pedit_headers_action *hdrs; int err; err = flow_action_supported(flow_action, extack); @@ -3357,13 +3436,12 @@ parse_tc_nic_actions(struct mlx5e_priv *priv, parse_state = &parse_attr->parse_state; mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); parse_state->ct_priv = get_ct_priv(priv); - hdrs = parse_state->hdrs; err = parse_tc_actions(parse_state, flow_action); if (err) return err; - err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack); + err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); if (err) return err; @@ -3468,7 +3546,6 @@ parse_tc_fdb_actions(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; - struct pedit_headers_action *hdrs; int err; err = flow_action_supported(flow_action, extack); @@ -3480,7 +3557,6 @@ parse_tc_fdb_actions(struct mlx5e_priv *priv, parse_state = &parse_attr->parse_state; mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); parse_state->ct_priv = get_ct_priv(priv); - hdrs = parse_state->hdrs; err = parse_tc_actions(parse_state, flow_action); if (err) @@ -3494,7 +3570,7 @@ parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack); + err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 5ffae9b13066..c6221728b767 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -71,7 +71,7 @@ struct mlx5_flow_attr { struct mlx5_fc *counter; struct mlx5_modify_hdr *modify_hdr; struct mlx5_ct_attr ct_attr; - struct mlx5e_sample_attr *sample_attr; + struct mlx5e_sample_attr sample_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; u32 chain; u16 prio; @@ -82,6 +82,7 @@ struct mlx5_flow_attr { u8 outer_match_level; u8 ip_version; u8 tun_ip_version; + int tunnel_id; /* mapped tunnel id */ u32 flags; union { struct mlx5_esw_flow_attr esw_attr[0]; @@ -89,6 +90,23 @@ struct mlx5_flow_attr { }; }; +enum { + MLX5_ATTR_FLAG_VLAN_HANDLED = BIT(0), + MLX5_ATTR_FLAG_SLOW_PATH = BIT(1), + MLX5_ATTR_FLAG_NO_IN_PORT = BIT(2), + MLX5_ATTR_FLAG_SRC_REWRITE = BIT(3), + MLX5_ATTR_FLAG_SAMPLE = BIT(4), + MLX5_ATTR_FLAG_ACCEPT = BIT(5), + MLX5_ATTR_FLAG_CT = BIT(6), +}; + +/* Returns true if any of the flags that require skipping further TC/NF processing are set. */ +static inline bool +mlx5e_tc_attr_flags_skip(u32 attr_flags) +{ + return attr_flags & (MLX5_ATTR_FLAG_SLOW_PATH | MLX5_ATTR_FLAG_ACCEPT); +} + struct mlx5_rx_tun_attr { u16 decap_vport; union { @@ -243,11 +261,8 @@ int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, u32 data); int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow); - -struct mlx5e_tc_flow; -u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow); + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, struct flow_match_basic *match, bool outer, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c index c275fe028b6d..0abef71cb839 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c @@ -86,7 +86,7 @@ mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, mlx5_eswitch_is_vf_vport(esw, vport_num) && esw->dev == dest_mdev && attr->ip_version && - attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE; + attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE; } u16 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ead5e8acc8be..44321cdfe928 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -448,22 +448,6 @@ enum { MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE = BIT(2), }; -enum { - MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0), - MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), - MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2), - MLX5_ESW_ATTR_FLAG_SRC_REWRITE = BIT(3), - MLX5_ESW_ATTR_FLAG_SAMPLE = BIT(4), - MLX5_ESW_ATTR_FLAG_ACCEPT = BIT(5), -}; - -/* Returns true if any of the flags that require skipping further TC/NF processing are set. */ -static inline bool -mlx5_esw_attr_flags_skip(u32 attr_flags) -{ - return attr_flags & (MLX5_ESW_ATTR_FLAG_SLOW_PATH | MLX5_ESW_ATTR_FLAG_ACCEPT); -} - struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_core_dev *in_mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9a7b25692505..2b31d8bbd1b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -180,7 +180,7 @@ esw_setup_decap_indir(struct mlx5_eswitch *esw, { struct mlx5_flow_table *ft; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) return -EOPNOTSUPP; ft = mlx5_esw_indir_table_get(esw, attr, spec, @@ -201,12 +201,12 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw, static int esw_setup_sampler_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, - struct mlx5_flow_attr *attr, + u32 sampler_id, int i) { flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; - dest[i].sampler_id = attr->sample_attr->sampler_id; + dest[i].sampler_id = sampler_id; return 0; } @@ -297,7 +297,7 @@ esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest, struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; int err; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) return -EOPNOTSUPP; /* flow steering cannot handle more than one dest with the same ft @@ -364,7 +364,7 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest, struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; int j, err; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) return -EOPNOTSUPP; for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { @@ -463,15 +463,16 @@ esw_setup_dests(struct mlx5_flow_destination *dest, if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) && esw_src_port_rewrite_supported(esw)) - attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE; + attr->flags |= MLX5_ATTR_FLAG_SRC_REWRITE; - if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) { - esw_setup_sampler_dest(dest, flow_act, attr, *i); + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE && + !(attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)) { + esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i); (*i)++; } else if (attr->dest_ft) { esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); (*i)++; - } else if (mlx5_esw_attr_flags_skip(attr->flags)) { + } else if (mlx5e_tc_attr_flags_skip(attr->flags)) { esw_setup_slow_path_dest(dest, flow_act, chains, *i); (*i)++; } else if (attr->dest_chain) { @@ -498,7 +499,7 @@ esw_cleanup_dests(struct mlx5_eswitch *esw, if (attr->dest_ft) { esw_cleanup_decap_indir(esw, attr); - } else if (!mlx5_esw_attr_flags_skip(attr->flags)) { + } else if (!mlx5e_tc_attr_flags_skip(attr->flags)) { if (attr->dest_chain) esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0); else if (esw_is_indir_table(esw, attr)) @@ -589,7 +590,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, else fdb = attr->ft; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT)) + if (!(attr->flags & MLX5_ATTR_FLAG_NO_IN_PORT)) mlx5_eswitch_set_rule_source_port(esw, spec, attr, esw_attr->in_mdev->priv.eswitch, esw_attr->in_rep->vport); @@ -721,7 +722,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, mlx5_del_flow_rules(rule); - if (!mlx5_esw_attr_flags_skip(attr->flags)) { + if (!mlx5e_tc_attr_flags_skip(attr->flags)) { /* unref the term table */ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { if (esw_attr->dests[i].termtbl) @@ -863,7 +864,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, if (err) goto unlock; - attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; + attr->flags &= ~MLX5_ATTR_FLAG_VLAN_HANDLED; vport = esw_vlan_action_get_vport(esw_attr, push, pop); @@ -871,7 +872,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, /* tracks VF --> wire rules without vlan push action */ if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { vport->vlan_refcount++; - attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; + attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED; } goto unlock; @@ -902,7 +903,7 @@ skip_set_push: } out: if (!err) - attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; + attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED; unlock: mutex_unlock(&esw->state_lock); return err; @@ -921,7 +922,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) return 0; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_VLAN_HANDLED)) + if (!(attr->flags & MLX5_ATTR_FLAG_VLAN_HANDLED)) return 0; push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 182306bbefaa..ee568bf34ae2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -219,12 +219,14 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) || - mlx5_esw_attr_flags_skip(attr->flags) || + mlx5e_tc_attr_flags_skip(attr->flags) || (!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port)) return false; /* push vlan on RX */ - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && + !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) & + MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX)) return true; /* hairpin */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index dafe341358c7..a0ac17c3f12f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -152,6 +152,12 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns) return 0; } +static u32 mlx5_cmd_stub_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + return 0; +} + static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master, struct mlx5_core_dev *slave, bool ft_id_valid, @@ -971,6 +977,12 @@ static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns, return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); } +static u32 mlx5_cmd_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + return 0; +} + static const struct mlx5_flow_cmds mlx5_flow_cmds = { .create_flow_table = mlx5_cmd_create_flow_table, .destroy_flow_table = mlx5_cmd_destroy_flow_table, @@ -990,6 +1002,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = { .set_peer = mlx5_cmd_stub_set_peer, .create_ns = mlx5_cmd_stub_create_ns, .destroy_ns = mlx5_cmd_stub_destroy_ns, + .get_capabilities = mlx5_cmd_get_capabilities, }; static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { @@ -1011,6 +1024,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { .set_peer = mlx5_cmd_stub_set_peer, .create_ns = mlx5_cmd_stub_create_ns, .destroy_ns = mlx5_cmd_stub_destroy_ns, + .get_capabilities = mlx5_cmd_stub_get_capabilities, }; const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 220ec632d35a..274004e80f03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -101,6 +101,9 @@ struct mlx5_flow_cmds { u16 format_id, u32 *match_mask); int (*destroy_match_definer)(struct mlx5_flow_root_namespace *ns, int definer_id); + + u32 (*get_capabilities)(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type); }; int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index b628917e38e4..42f878e21fea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -3040,6 +3040,22 @@ void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev) steering->esw_ingress_root_ns = NULL; } +u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + + ns = mlx5_get_flow_namespace(dev, type); + if (!ns) + return 0; + + root = find_root(&ns->node); + if (!root) + return 0; + + return root->cmds->get_capabilities(root, root->table_type); +} + static int init_egress_root_ns(struct mlx5_flow_steering *steering) { int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 5469b08d635f..c488a7c5b07e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -120,6 +120,11 @@ enum mlx5_flow_steering_mode { MLX5_FLOW_STEERING_MODE_SMFS }; +enum mlx5_flow_steering_capabilty { + MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0, + MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1, +}; + struct mlx5_flow_steering { struct mlx5_core_dev *dev; enum mlx5_flow_steering_mode mode; @@ -301,6 +306,8 @@ void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev); int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports); void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev); +u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); + struct mlx5_flow_root_namespace *find_root(struct fs_node *node); #define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index a476da2424f8..033757bfdf64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -735,6 +735,16 @@ static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns) return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain); } +static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + if (ft_type != FS_FT_FDB || + MLX5_CAP_GEN(ns->dev, steering_format_version) != MLX5_STEERING_FORMAT_CONNECTX_6DX) + return 0; + + return MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX | MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX; +} + bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) { return mlx5dr_is_supported(dev); @@ -759,6 +769,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = { .set_peer = mlx5_cmd_dr_set_peer, .create_ns = mlx5_cmd_dr_create_ns, .destroy_ns = mlx5_cmd_dr_destroy_ns, + .get_capabilities = mlx5_cmd_dr_get_capabilities, }; const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 866b9357939b..f45df5fbdcc0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -212,6 +212,29 @@ struct mlxsw_event_listener_item { void *priv; }; +static const u8 mlxsw_core_trap_groups[] = { + MLXSW_REG_HTGT_TRAP_GROUP_EMAD, + MLXSW_REG_HTGT_TRAP_GROUP_CORE_EVENT, +}; + +static int mlxsw_core_trap_groups_set(struct mlxsw_core *mlxsw_core) +{ + char htgt_pl[MLXSW_REG_HTGT_LEN]; + int err; + int i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_core_trap_groups); i++) { + mlxsw_reg_htgt_pack(htgt_pl, mlxsw_core_trap_groups[i], + MLXSW_REG_HTGT_INVALID_POLICER, + MLXSW_REG_HTGT_DEFAULT_PRIORITY, + MLXSW_REG_HTGT_DEFAULT_TC); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); + if (err) + return err; + } + return 0; +} + /****************** * EMAD processing ******************/ @@ -777,16 +800,10 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) if (err) goto err_trap_register; - err = mlxsw_core->driver->basic_trap_groups_set(mlxsw_core); - if (err) - goto err_emad_trap_set; mlxsw_core->emad.use_emad = true; return 0; -err_emad_trap_set: - mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener, - mlxsw_core); err_trap_register: destroy_workqueue(mlxsw_core->emad_wq); return err; @@ -1706,7 +1723,7 @@ static void mlxsw_core_health_listener_func(const struct mlxsw_reg_info *reg, } static const struct mlxsw_listener mlxsw_core_health_listener = - MLXSW_EVENTL(mlxsw_core_health_listener_func, MFDE, MFDE); + MLXSW_CORE_EVENTL(mlxsw_core_health_listener_func, MFDE); static int mlxsw_core_health_fw_fatal_dump_fatal_cause(const char *mfde_pl, @@ -2122,6 +2139,10 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, } } + err = mlxsw_core_trap_groups_set(mlxsw_core); + if (err) + goto err_trap_groups_set; + err = mlxsw_emad_init(mlxsw_core); if (err) goto err_emad_init; @@ -2181,6 +2202,7 @@ err_fw_rev_validate: err_register_params: mlxsw_emad_fini(mlxsw_core); err_emad_init: +err_trap_groups_set: kfree(mlxsw_core->lag.mapping); err_alloc_lag_mapping: mlxsw_ports_fini(mlxsw_core, reload); @@ -2540,6 +2562,45 @@ void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_trap_unregister); +int mlxsw_core_traps_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listeners, + size_t listeners_count, void *priv) +{ + int i, err; + + for (i = 0; i < listeners_count; i++) { + err = mlxsw_core_trap_register(mlxsw_core, + &listeners[i], + priv); + if (err) + goto err_listener_register; + } + return 0; + +err_listener_register: + for (i--; i >= 0; i--) { + mlxsw_core_trap_unregister(mlxsw_core, + &listeners[i], + priv); + } + return err; +} +EXPORT_SYMBOL(mlxsw_core_traps_register); + +void mlxsw_core_traps_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listeners, + size_t listeners_count, void *priv) +{ + int i; + + for (i = 0; i < listeners_count; i++) { + mlxsw_core_trap_unregister(mlxsw_core, + &listeners[i], + priv); + } +} +EXPORT_SYMBOL(mlxsw_core_traps_unregister); + int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, bool enabled) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index f30bb8614e69..6d304092f4e7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -163,6 +163,9 @@ struct mlxsw_listener { .enabled_on_register = true, \ } +#define MLXSW_CORE_EVENTL(_func, _trap_id) \ + MLXSW_EVENTL(_func, _trap_id, CORE_EVENT) + int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_rx_listener *rxl, void *priv, bool enabled); @@ -181,6 +184,12 @@ int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, void *priv); +int mlxsw_core_traps_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listeners, + size_t listeners_count, void *priv); +void mlxsw_core_traps_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listeners, + size_t listeners_count, void *priv); int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, bool enabled); @@ -315,7 +324,6 @@ struct mlxsw_driver { const struct mlxsw_bus_info *mlxsw_bus_info, struct netlink_ext_ack *extack); void (*fini)(struct mlxsw_core *mlxsw_core); - int (*basic_trap_groups_set)(struct mlxsw_core *mlxsw_core); int (*port_type_set)(struct mlxsw_core *mlxsw_core, u16 local_port, enum devlink_port_type new_type); int (*port_split)(struct mlxsw_core *mlxsw_core, u16 local_port, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 6dd4ae2f45f4..6ea4bf87be0b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -18,6 +18,7 @@ struct mlxsw_env_module_info { int num_ports_mapped; int num_ports_up; enum ethtool_module_power_mode_policy power_mode_policy; + enum mlxsw_reg_pmtm_module_type type; }; struct mlxsw_env { @@ -27,14 +28,47 @@ struct mlxsw_env { struct mlxsw_env_module_info module_info[]; }; -static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, - bool *qsfp, bool *cmis) +static int __mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(core); + int err; + + switch (mlxsw_env->module_info[module].type) { + case MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR: + err = -EINVAL; + break; + default: + err = 0; + } + + return err; +} + +static int mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(core); + int err; + + mutex_lock(&mlxsw_env->module_info_lock); + err = __mlxsw_env_validate_module_type(core, module); + mutex_unlock(&mlxsw_env->module_info_lock); + + return err; +} + +static int +mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, bool *qsfp, + bool *cmis) { char mcia_pl[MLXSW_REG_MCIA_LEN]; char *eeprom_tmp; u8 ident; int err; + err = mlxsw_env_validate_module_type(core, id); + if (err) + return err; + mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1, MLXSW_REG_MCIA_I2C_ADDR_LOW); err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl); @@ -206,7 +240,8 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, return 0; } -int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, +int mlxsw_env_get_module_info(struct net_device *netdev, + struct mlxsw_core *mlxsw_core, int module, struct ethtool_modinfo *modinfo) { u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE]; @@ -215,6 +250,13 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, unsigned int read_size; int err; + err = mlxsw_env_validate_module_type(mlxsw_core, module); + if (err) { + netdev_err(netdev, + "EEPROM is not equipped on port module type"); + return err; + } + err = mlxsw_env_query_module_eeprom(mlxsw_core, module, 0, offset, module_info, false, &read_size); if (err) @@ -356,6 +398,13 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module, { u32 bytes_read = 0; u16 device_addr; + int err; + + err = mlxsw_env_validate_module_type(mlxsw_core, module); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "EEPROM is not equipped on port module type"); + return err; + } /* Offset cannot be larger than 2 * ETH_MODULE_EEPROM_PAGE_LEN */ device_addr = page->offset; @@ -364,7 +413,6 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module, char mcia_pl[MLXSW_REG_MCIA_LEN]; char *eeprom_tmp; u8 size; - int err; size = min_t(u8, page->length - bytes_read, MLXSW_REG_MCIA_EEPROM_SIZE); @@ -419,6 +467,12 @@ int mlxsw_env_reset_module(struct net_device *netdev, mutex_lock(&mlxsw_env->module_info_lock); + err = __mlxsw_env_validate_module_type(mlxsw_core, module); + if (err) { + netdev_err(netdev, "Reset module is not supported on port module type\n"); + goto out; + } + if (mlxsw_env->module_info[module].num_ports_up) { netdev_err(netdev, "Cannot reset module when ports using it are administratively up\n"); err = -EINVAL; @@ -461,6 +515,12 @@ mlxsw_env_get_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module, mutex_lock(&mlxsw_env->module_info_lock); + err = __mlxsw_env_validate_module_type(mlxsw_core, module); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Power mode is not supported on port module type"); + goto out; + } + params->policy = mlxsw_env->module_info[module].power_mode_policy; mlxsw_reg_mcion_pack(mcion_pl, module); @@ -571,6 +631,13 @@ mlxsw_env_set_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module, mutex_lock(&mlxsw_env->module_info_lock); + err = __mlxsw_env_validate_module_type(mlxsw_core, module); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Power mode set is not supported on port module type"); + goto out; + } + if (mlxsw_env->module_info[module].power_mode_policy == policy) goto out; @@ -661,13 +728,12 @@ static int mlxsw_env_temp_event_set(struct mlxsw_core *mlxsw_core, return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl); } -static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core, - u8 module_count) +static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core) { int i, err, sensor_index; bool has_temp_sensor; - for (i = 0; i < module_count; i++) { + for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) { err = mlxsw_env_module_has_temp_sensor(mlxsw_core, i, &has_temp_sensor); if (err) @@ -759,7 +825,7 @@ mlxsw_env_mtwe_listener_func(const struct mlxsw_reg_info *reg, char *mtwe_pl, } static const struct mlxsw_listener mlxsw_env_temp_warn_listener = - MLXSW_EVENTL(mlxsw_env_mtwe_listener_func, MTWE, MTWE); + MLXSW_CORE_EVENTL(mlxsw_env_mtwe_listener_func, MTWE); static int mlxsw_env_temp_warn_event_register(struct mlxsw_core *mlxsw_core) { @@ -849,7 +915,7 @@ mlxsw_env_pmpe_listener_func(const struct mlxsw_reg_info *reg, char *pmpe_pl, } static const struct mlxsw_listener mlxsw_env_module_plug_listener = - MLXSW_EVENTL(mlxsw_env_pmpe_listener_func, PMPE, PMPE); + MLXSW_CORE_EVENTL(mlxsw_env_pmpe_listener_func, PMPE); static int mlxsw_env_module_plug_event_register(struct mlxsw_core *mlxsw_core) @@ -876,12 +942,11 @@ mlxsw_env_module_plug_event_unregister(struct mlxsw_env *mlxsw_env) } static int -mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core, - u8 module_count) +mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core) { int i, err; - for (i = 0; i < module_count; i++) { + for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) { char pmaos_pl[MLXSW_REG_PMAOS_LEN]; mlxsw_reg_pmaos_pack(pmaos_pl, i); @@ -999,6 +1064,28 @@ out_unlock: } EXPORT_SYMBOL(mlxsw_env_module_port_down); +static int +mlxsw_env_module_type_set(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + int i; + + for (i = 0; i < mlxsw_env->module_count; i++) { + char pmtm_pl[MLXSW_REG_PMTM_LEN]; + int err; + + mlxsw_reg_pmtm_pack(pmtm_pl, 0, i); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(pmtm), pmtm_pl); + if (err) + return err; + + mlxsw_env->module_info[i].type = + mlxsw_reg_pmtm_module_type_get(pmtm_pl); + } + + return 0; +} + int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env) { char mgpir_pl[MLXSW_REG_MGPIR_LEN]; @@ -1037,17 +1124,21 @@ int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env) if (err) goto err_module_plug_event_register; - err = mlxsw_env_module_oper_state_event_enable(mlxsw_core, - env->module_count); + err = mlxsw_env_module_oper_state_event_enable(mlxsw_core); if (err) goto err_oper_state_event_enable; - err = mlxsw_env_module_temp_event_enable(mlxsw_core, env->module_count); + err = mlxsw_env_module_temp_event_enable(mlxsw_core); if (err) goto err_temp_event_enable; + err = mlxsw_env_module_type_set(mlxsw_core); + if (err) + goto err_type_set; + return 0; +err_type_set: err_temp_event_enable: err_oper_state_event_enable: mlxsw_env_module_plug_event_unregister(env); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h index da121b1a84b4..ec6564e5d2ee 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h @@ -12,7 +12,8 @@ struct ethtool_eeprom; int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, int off, int *temp); -int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, +int mlxsw_env_get_module_info(struct net_device *netdev, + struct mlxsw_core *mlxsw_core, int module, struct ethtool_modinfo *modinfo); int mlxsw_env_get_module_eeprom(struct net_device *netdev, diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 10d13f5f9c7d..9ac8ce01c061 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -110,7 +110,8 @@ static int mlxsw_m_get_module_info(struct net_device *netdev, struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core; - return mlxsw_env_get_module_info(core, mlxsw_m_port->module, modinfo); + return mlxsw_env_get_module_info(netdev, core, mlxsw_m_port->module, + modinfo); } static int diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 24cc65018b41..eebd0479b2bc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4482,6 +4482,8 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32); #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 BIT(22) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4 BIT(23) +#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_T BIT(24) +#define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T BIT(25) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR BIT(27) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR BIT(28) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR BIT(29) @@ -6062,6 +6064,58 @@ static inline void mlxsw_reg_pllp_unpack(char *payload, u8 *label_port, *slot_index = mlxsw_reg_pllp_slot_index_get(payload); } +/* PMTM - Port Module Type Mapping Register + * ---------------------------------------- + * The PMTM register allows query or configuration of module types. + * The register can only be set when the module is disabled by PMAOS register + */ +#define MLXSW_REG_PMTM_ID 0x5067 +#define MLXSW_REG_PMTM_LEN 0x10 + +MLXSW_REG_DEFINE(pmtm, MLXSW_REG_PMTM_ID, MLXSW_REG_PMTM_LEN); + +/* reg_pmtm_slot_index + * Slot index. + * Access: Index + */ +MLXSW_ITEM32(reg, pmtm, slot_index, 0x00, 24, 4); + +/* reg_pmtm_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, pmtm, module, 0x00, 16, 8); + +enum mlxsw_reg_pmtm_module_type { + MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_4_LANES = 0, + MLXSW_REG_PMTM_MODULE_TYPE_QSFP = 1, + MLXSW_REG_PMTM_MODULE_TYPE_SFP = 2, + MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_SINGLE_LANE = 4, + MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_2_LANES = 8, + MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP4X = 10, + MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP2X = 11, + MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP1X = 12, + MLXSW_REG_PMTM_MODULE_TYPE_QSFP_DD = 14, + MLXSW_REG_PMTM_MODULE_TYPE_OSFP = 15, + MLXSW_REG_PMTM_MODULE_TYPE_SFP_DD = 16, + MLXSW_REG_PMTM_MODULE_TYPE_DSFP = 17, + MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP8X = 18, + MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR = 19, +}; + +/* reg_pmtm_module_type + * Module type. + * Access: RW + */ +MLXSW_ITEM32(reg, pmtm, module_type, 0x04, 0, 5); + +static inline void mlxsw_reg_pmtm_pack(char *payload, u8 slot_index, u8 module) +{ + MLXSW_REG_ZERO(pmtm, payload); + mlxsw_reg_pmtm_slot_index_set(payload, slot_index); + mlxsw_reg_pmtm_module_set(payload, module); +} + /* HTGT - Host Trap Group Table * ---------------------------- * Configures the properties for forwarding to CPU. @@ -6087,9 +6141,7 @@ MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4); enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_EMAD, - MLXSW_REG_HTGT_TRAP_GROUP_MFDE, - MLXSW_REG_HTGT_TRAP_GROUP_MTWE, - MLXSW_REG_HTGT_TRAP_GROUP_PMPE, + MLXSW_REG_HTGT_TRAP_GROUP_CORE_EVENT, MLXSW_REG_HTGT_TRAP_GROUP_SP_STP, MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP, MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP, @@ -12568,6 +12620,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(pddr), MLXSW_REG(pmmp), MLXSW_REG(pllp), + MLXSW_REG(pmtm), MLXSW_REG(htgt), MLXSW_REG(hpkt), MLXSW_REG(rgcr), diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index c7fc650608eb..daacf6291253 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -33,6 +33,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_ACL_MAX_REGIONS, MLXSW_RES_ID_ACL_MAX_GROUPS, MLXSW_RES_ID_ACL_MAX_GROUP_SIZE, + MLXSW_RES_ID_ACL_MAX_DEFAULT_ACTIONS, MLXSW_RES_ID_ACL_FLEX_KEYS, MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE, MLXSW_RES_ID_ACL_ACTIONS_PER_SET, @@ -90,6 +91,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_ACL_MAX_REGIONS] = 0x2903, [MLXSW_RES_ID_ACL_MAX_GROUPS] = 0x2904, [MLXSW_RES_ID_ACL_MAX_GROUP_SIZE] = 0x2905, + [MLXSW_RES_ID_ACL_MAX_DEFAULT_ACTIONS] = 0x2908, [MLXSW_RES_ID_ACL_FLEX_KEYS] = 0x2910, [MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE] = 0x2911, [MLXSW_RES_ID_ACL_ACTIONS_PER_SET] = 0x2912, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index aa411dec62f0..a4b94eecea98 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2148,13 +2148,11 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, struct mlxsw_sp *mlxsw_sp = priv; struct mlxsw_sp_port *mlxsw_sp_port; enum mlxsw_reg_pude_oper_status status; - unsigned int max_ports; u16 local_port; - max_ports = mlxsw_core_max_ports(mlxsw_sp->core); local_port = mlxsw_reg_pude_local_port_get(pude_pl); - if (WARN_ON_ONCE(!local_port || local_port >= max_ports)) + if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port))) return; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) @@ -2393,45 +2391,6 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) return 0; } -static int mlxsw_sp_traps_register(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_listener listeners[], - size_t listeners_count) -{ - int i; - int err; - - for (i = 0; i < listeners_count; i++) { - err = mlxsw_core_trap_register(mlxsw_sp->core, - &listeners[i], - mlxsw_sp); - if (err) - goto err_listener_register; - - } - return 0; - -err_listener_register: - for (i--; i >= 0; i--) { - mlxsw_core_trap_unregister(mlxsw_sp->core, - &listeners[i], - mlxsw_sp); - } - return err; -} - -static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_listener listeners[], - size_t listeners_count) -{ - int i; - - for (i = 0; i < listeners_count; i++) { - mlxsw_core_trap_unregister(mlxsw_sp->core, - &listeners[i], - mlxsw_sp); - } -} - static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_trap *trap; @@ -2456,21 +2415,23 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_trap_groups_set; - err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener, - ARRAY_SIZE(mlxsw_sp_listener)); + err = mlxsw_core_traps_register(mlxsw_sp->core, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener), + mlxsw_sp); if (err) goto err_traps_register; - err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners, - mlxsw_sp->listeners_count); + err = mlxsw_core_traps_register(mlxsw_sp->core, mlxsw_sp->listeners, + mlxsw_sp->listeners_count, mlxsw_sp); if (err) goto err_extra_traps_init; return 0; err_extra_traps_init: - mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, - ARRAY_SIZE(mlxsw_sp_listener)); + mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener), + mlxsw_sp); err_traps_register: err_trap_groups_set: err_cpu_policers_set: @@ -2480,10 +2441,11 @@ err_cpu_policers_set: static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) { - mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp->listeners, - mlxsw_sp->listeners_count); - mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, - ARRAY_SIZE(mlxsw_sp_listener)); + mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp->listeners, + mlxsw_sp->listeners_count, + mlxsw_sp); + mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener), mlxsw_sp); kfree(mlxsw_sp->trap); } @@ -2528,42 +2490,6 @@ static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp) kfree(mlxsw_sp->lags); } -static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) -{ - char htgt_pl[MLXSW_REG_HTGT_LEN]; - int err; - - mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD, - MLXSW_REG_HTGT_INVALID_POLICER, - MLXSW_REG_HTGT_DEFAULT_PRIORITY, - MLXSW_REG_HTGT_DEFAULT_TC); - err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); - if (err) - return err; - - mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MFDE, - MLXSW_REG_HTGT_INVALID_POLICER, - MLXSW_REG_HTGT_DEFAULT_PRIORITY, - MLXSW_REG_HTGT_DEFAULT_TC); - err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); - if (err) - return err; - - mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MTWE, - MLXSW_REG_HTGT_INVALID_POLICER, - MLXSW_REG_HTGT_DEFAULT_PRIORITY, - MLXSW_REG_HTGT_DEFAULT_TC); - err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); - if (err) - return err; - - mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_PMPE, - MLXSW_REG_HTGT_INVALID_POLICER, - MLXSW_REG_HTGT_DEFAULT_PRIORITY, - MLXSW_REG_HTGT_DEFAULT_TC); - return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); -} - static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = { .clock_init = mlxsw_sp1_ptp_clock_init, .clock_fini = mlxsw_sp1_ptp_clock_fini, @@ -3677,7 +3603,6 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .fw_filename = MLXSW_SP1_FW_FILENAME, .init = mlxsw_sp1_init, .fini = mlxsw_sp_fini, - .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, .port_split = mlxsw_sp_port_split, .port_unsplit = mlxsw_sp_port_unsplit, .sb_pool_get = mlxsw_sp_sb_pool_get, @@ -3717,7 +3642,6 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .fw_filename = MLXSW_SP2_FW_FILENAME, .init = mlxsw_sp2_init, .fini = mlxsw_sp_fini, - .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, .port_split = mlxsw_sp_port_split, .port_unsplit = mlxsw_sp_port_unsplit, .sb_pool_get = mlxsw_sp_sb_pool_get, @@ -3758,7 +3682,6 @@ static struct mlxsw_driver mlxsw_sp3_driver = { .fw_filename = MLXSW_SP3_FW_FILENAME, .init = mlxsw_sp3_init, .fini = mlxsw_sp_fini, - .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, .port_split = mlxsw_sp_port_split, .port_unsplit = mlxsw_sp_port_unsplit, .sb_pool_get = mlxsw_sp_sb_pool_get, @@ -3797,7 +3720,6 @@ static struct mlxsw_driver mlxsw_sp4_driver = { .priv_size = sizeof(struct mlxsw_sp), .init = mlxsw_sp4_init, .fini = mlxsw_sp_fini, - .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, .port_split = mlxsw_sp_port_split, .port_unsplit = mlxsw_sp_port_unsplit, .sb_pool_get = mlxsw_sp_sb_pool_get, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index bb2442e1f705..30942b6ffcf9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -481,6 +481,13 @@ int mlxsw_sp_port_vlan_classification_set(struct mlxsw_sp_port *mlxsw_sp_port, bool is_8021ad_tagged, bool is_8021q_tagged); +static inline bool +mlxsw_sp_local_port_is_valid(struct mlxsw_sp *mlxsw_sp, u16 local_port) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + + return local_port < max_ports && local_port; +} /* spectrum_buffers.c */ struct mlxsw_sp_hdroom_prio { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c index a9fff8adc75e..d20e794e01ca 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c @@ -213,7 +213,6 @@ mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp1_kvdl_part *part; bool need_update = true; unsigned int nr_entries; - size_t usage_size; u64 resource_size; int err; @@ -225,8 +224,8 @@ mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp, } nr_entries = div_u64(resource_size, info->alloc_size); - usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long); - part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL); + part = kzalloc(struct_size(part, usage, BITS_TO_LONGS(nr_entries)), + GFP_KERNEL); if (!part) return ERR_PTR(-ENOMEM); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c index ad69913f19c1..5b0210862655 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c @@ -77,7 +77,14 @@ static int mlxsw_sp2_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv, int i; int err; + /* Some TCAM regions are not exposed to the host and used internally + * by the device. Allocate KVDL entries for the default actions of + * these regions to avoid the host from overwriting them. + */ tcam->kvdl_count = _tcam->max_regions; + if (MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_DEFAULT_ACTIONS)) + tcam->kvdl_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_MAX_DEFAULT_ACTIONS); err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, tcam->kvdl_count, &tcam->kvdl_index); if (err) @@ -97,7 +104,10 @@ static int mlxsw_sp2_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv, goto err_afa_block_continue; enc_actions = mlxsw_afa_block_cur_set(afa_block); - for (i = 0; i < tcam->kvdl_count; i++) { + /* Only write to KVDL entries used by TCAM regions exposed to the + * host. + */ + for (i = 0; i < _tcam->max_regions; i++) { mlxsw_reg_pefa_pack(pefa_pl, tcam->kvdl_index + i, true, enc_actions); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 20530712eadb..8b5d7f83b9b0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -1034,13 +1034,10 @@ static int mlxsw_sp_get_module_info(struct net_device *netdev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - int err; - - err = mlxsw_env_get_module_info(mlxsw_sp->core, - mlxsw_sp_port->mapping.module, - modinfo); - return err; + return mlxsw_env_get_module_info(netdev, mlxsw_sp->core, + mlxsw_sp_port->mapping.module, + modinfo); } static int mlxsw_sp_get_module_eeprom(struct net_device *netdev, @@ -1048,13 +1045,10 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - int err; - - err = mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core, - mlxsw_sp_port->mapping.module, ee, - data); - return err; + return mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core, + mlxsw_sp_port->mapping.module, ee, + data); } static int @@ -1273,12 +1267,22 @@ struct mlxsw_sp1_port_link_mode { static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = { { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T, + .mask_ethtool = ETHTOOL_LINK_MODE_100baseT_Full_BIT, + .speed = SPEED_100, + }, + { .mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII | MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX, .mask_ethtool = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, .speed = SPEED_1000, }, { + .mask = MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T, + .mask_ethtool = ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + .speed = SPEED_1000, + }, + { .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4, .mask_ethtool = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index 0ff163fbc775..35422e64d89f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -568,12 +568,11 @@ void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, u8 domain_number, u16 sequence_id, u64 timestamp) { - unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); struct mlxsw_sp_port *mlxsw_sp_port; struct mlxsw_sp1_ptp_key key; u8 types; - if (WARN_ON_ONCE(local_port >= max_ports)) + if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port))) return; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 65c1724c63b0..bffdb41fc4ed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2616,7 +2616,6 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, char *sfn_pl, int rec_index, bool adding) { - unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; struct mlxsw_sp_bridge_device *bridge_device; struct mlxsw_sp_bridge_port *bridge_port; @@ -2630,7 +2629,7 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &fid, &local_port); - if (WARN_ON_ONCE(local_port >= max_ports)) + if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port))) return; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) { diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index 91a755efe2e6..5f1e7b8bad4f 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -750,7 +750,7 @@ static int lan743x_ethtool_set_eee(struct net_device *netdev, } if (eee->eee_enabled) { - ret = phy_init_eee(phydev, 0); + ret = phy_init_eee(phydev, false); if (ret) { netif_err(adapter, drv, adapter->netdev, "EEE initialization failed\n"); diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile index 040cfff9f577..a9ffc719aa0e 100644 --- a/drivers/net/ethernet/microchip/lan966x/Makefile +++ b/drivers/net/ethernet/microchip/lan966x/Makefile @@ -7,4 +7,5 @@ obj-$(CONFIG_LAN966X_SWITCH) += lan966x-switch.o lan966x-switch-objs := lan966x_main.o lan966x_phylink.o lan966x_port.o \ lan966x_mac.o lan966x_ethtool.o lan966x_switchdev.o \ - lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o + lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o \ + lan966x_ptp.o diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c index 614f12c2fe6a..e58a27fd8b50 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c @@ -545,6 +545,39 @@ static int lan966x_set_pauseparam(struct net_device *dev, return phylink_ethtool_set_pauseparam(port->phylink, pause); } +static int lan966x_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + struct lan966x_phc *phc; + + if (!lan966x->ptp) + return ethtool_op_get_ts_info(dev, info); + + phc = &lan966x->phc[LAN966X_PHC_PORT]; + + info->phc_index = phc->clock ? ptp_clock_index(phc->clock) : -1; + if (info->phc_index == -1) { + info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + return 0; + } + info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) | + BIT(HWTSTAMP_TX_ONESTEP_SYNC); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + return 0; +} + const struct ethtool_ops lan966x_ethtool_ops = { .get_link_ksettings = lan966x_get_link_ksettings, .set_link_ksettings = lan966x_set_link_ksettings, @@ -556,6 +589,7 @@ const struct ethtool_ops lan966x_ethtool_ops = { .get_eth_mac_stats = lan966x_get_eth_mac_stats, .get_rmon_stats = lan966x_get_eth_rmon_stats, .get_link = ethtool_op_get_link, + .get_ts_info = lan966x_get_ts_info, }; static void lan966x_check_stats_work(struct work_struct *work) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 1f60fd125a1d..e62758bcb998 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -44,6 +44,7 @@ static const struct lan966x_main_io_resource lan966x_main_iomap[] = { { TARGET_ORG, 0, 1 }, /* 0xe2000000 */ { TARGET_GCB, 0x4000, 1 }, /* 0xe2004000 */ { TARGET_QS, 0x8000, 1 }, /* 0xe2008000 */ + { TARGET_PTP, 0xc000, 1 }, /* 0xe200c000 */ { TARGET_CHIP_TOP, 0x10000, 1 }, /* 0xe2010000 */ { TARGET_REW, 0x14000, 1 }, /* 0xe2014000 */ { TARGET_SYS, 0x28000, 1 }, /* 0xe2028000 */ @@ -201,7 +202,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb, val = lan_rd(lan966x, QS_INJ_STATUS); if (!(QS_INJ_STATUS_FIFO_RDY_GET(val) & BIT(grp)) || (QS_INJ_STATUS_WMARK_REACHED_GET(val) & BIT(grp))) - return NETDEV_TX_BUSY; + goto err; /* Write start of frame */ lan_wr(QS_INJ_CTRL_GAP_SIZE_SET(1) | @@ -213,7 +214,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb, /* Wait until the fifo is ready */ err = lan966x_port_inj_ready(lan966x, grp); if (err) - return NETDEV_TX_BUSY; + goto err; lan_wr((__force u32)ifh[i], lan966x, QS_INJ_WR(grp)); } @@ -225,7 +226,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb, /* Wait until the fifo is ready */ err = lan966x_port_inj_ready(lan966x, grp); if (err) - return NETDEV_TX_BUSY; + goto err; lan_wr(((u32 *)skb->data)[i], lan966x, QS_INJ_WR(grp)); } @@ -235,7 +236,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb, /* Wait until the fifo is ready */ err = lan966x_port_inj_ready(lan966x, grp); if (err) - return NETDEV_TX_BUSY; + goto err; lan_wr(0, lan966x, QS_INJ_WR(grp)); ++i; @@ -255,8 +256,19 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb, dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + return NETDEV_TX_OK; + dev_consume_skb_any(skb); return NETDEV_TX_OK; + +err: + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + lan966x_ptp_txtstamp_release(port, skb); + + return NETDEV_TX_BUSY; } static void lan966x_ifh_set_bypass(void *ifh, u64 bypass) @@ -289,10 +301,23 @@ static void lan966x_ifh_set_vid(void *ifh, u64 vid) IFH_POS_TCI, IFH_LEN * 4, PACK, 0); } +static void lan966x_ifh_set_rew_op(void *ifh, u64 rew_op) +{ + packing(ifh, &rew_op, IFH_POS_REW_CMD + IFH_WID_REW_CMD - 1, + IFH_POS_REW_CMD, IFH_LEN * 4, PACK, 0); +} + +static void lan966x_ifh_set_timestamp(void *ifh, u64 timestamp) +{ + packing(ifh, ×tamp, IFH_POS_TIMESTAMP + IFH_WID_TIMESTAMP - 1, + IFH_POS_TIMESTAMP, IFH_LEN * 4, PACK, 0); +} + static int lan966x_port_xmit(struct sk_buff *skb, struct net_device *dev) { struct lan966x_port *port = netdev_priv(dev); __be32 ifh[IFH_LEN]; + int err; memset(ifh, 0x0, sizeof(__be32) * IFH_LEN); @@ -302,6 +327,15 @@ static int lan966x_port_xmit(struct sk_buff *skb, struct net_device *dev) lan966x_ifh_set_ipv(ifh, skb->priority >= 7 ? 0x7 : skb->priority); lan966x_ifh_set_vid(ifh, skb_vlan_tag_get(skb)); + if (port->lan966x->ptp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { + err = lan966x_ptp_txtstamp_request(port, skb); + if (err) + return err; + + lan966x_ifh_set_rew_op(ifh, LAN966X_SKB_CB(skb)->rew_op); + lan966x_ifh_set_timestamp(ifh, LAN966X_SKB_CB(skb)->ts_id); + } + return lan966x_port_ifh_xmit(skb, ifh, dev); } @@ -350,6 +384,23 @@ static int lan966x_port_get_parent_id(struct net_device *dev, return 0; } +static int lan966x_port_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd) +{ + struct lan966x_port *port = netdev_priv(dev); + + if (!phy_has_hwtstamp(dev->phydev) && port->lan966x->ptp) { + switch (cmd) { + case SIOCSHWTSTAMP: + return lan966x_ptp_hwtstamp_set(port, ifr); + case SIOCGHWTSTAMP: + return lan966x_ptp_hwtstamp_get(port, ifr); + } + } + + return phy_mii_ioctl(dev->phydev, ifr, cmd); +} + static const struct net_device_ops lan966x_port_netdev_ops = { .ndo_open = lan966x_port_open, .ndo_stop = lan966x_port_stop, @@ -360,6 +411,7 @@ static const struct net_device_ops lan966x_port_netdev_ops = { .ndo_get_stats64 = lan966x_stats_get, .ndo_set_mac_address = lan966x_port_set_mac_address, .ndo_get_port_parent_id = lan966x_port_get_parent_id, + .ndo_eth_ioctl = lan966x_port_ioctl, }; bool lan966x_netdevice_check(const struct net_device *dev) @@ -434,6 +486,12 @@ static void lan966x_ifh_get_len(void *ifh, u64 *len) IFH_POS_LEN, IFH_LEN * 4, UNPACK, 0); } +static void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp) +{ + packing(ifh, timestamp, IFH_POS_TIMESTAMP + IFH_WID_TIMESTAMP - 1, + IFH_POS_TIMESTAMP, IFH_LEN * 4, UNPACK, 0); +} + static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args) { struct lan966x *lan966x = args; @@ -443,10 +501,10 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args) return IRQ_NONE; do { + u64 src_port, len, timestamp; struct net_device *dev; struct sk_buff *skb; int sz = 0, buf_len; - u64 src_port, len; u32 ifh[IFH_LEN]; u32 *buf; u32 val; @@ -461,6 +519,7 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args) lan966x_ifh_get_src_port(ifh, &src_port); lan966x_ifh_get_len(ifh, &len); + lan966x_ifh_get_timestamp(ifh, ×tamp); WARN_ON(src_port >= lan966x->num_phys_ports); @@ -501,6 +560,7 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args) *buf = val; } + lan966x_ptp_rxtstamp(lan966x, skb, timestamp); skb->protocol = eth_type_trans(skb, dev); if (lan966x->bridge_mask & BIT(src_port)) @@ -897,6 +957,17 @@ static int lan966x_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, err, "Unable to use ana irq"); } + lan966x->ptp_irq = platform_get_irq_byname(pdev, "ptp"); + if (lan966x->ptp_irq > 0) { + err = devm_request_threaded_irq(&pdev->dev, lan966x->ptp_irq, NULL, + lan966x_ptp_irq_handler, IRQF_ONESHOT, + "ptp irq", lan966x); + if (err) + return dev_err_probe(&pdev->dev, err, "Unable to use ptp irq"); + + lan966x->ptp = 1; + } + /* init switch */ lan966x_init(lan966x); lan966x_stats_init(lan966x); @@ -931,8 +1002,15 @@ static int lan966x_probe(struct platform_device *pdev) if (err) goto cleanup_ports; + err = lan966x_ptp_init(lan966x); + if (err) + goto cleanup_fdb; + return 0; +cleanup_fdb: + lan966x_fdb_deinit(lan966x); + cleanup_ports: fwnode_handle_put(portnp); @@ -958,6 +1036,7 @@ static int lan966x_remove(struct platform_device *pdev) lan966x_mac_purge_entries(lan966x); lan966x_mdb_deinit(lan966x); lan966x_fdb_deinit(lan966x); + lan966x_ptp_deinit(lan966x); return 0; } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 99c6d0a9f946..026474c609ea 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -8,6 +8,7 @@ #include <linux/jiffies.h> #include <linux/phy.h> #include <linux/phylink.h> +#include <linux/ptp_clock_kernel.h> #include <net/switchdev.h> #include "lan966x_regs.h" @@ -50,6 +51,13 @@ #define LAN966X_SPEED_100 2 #define LAN966X_SPEED_10 3 +#define LAN966X_PHC_COUNT 3 +#define LAN966X_PHC_PORT 0 + +#define IFH_REW_OP_NOOP 0x0 +#define IFH_REW_OP_ONE_STEP_PTP 0x3 +#define IFH_REW_OP_TWO_STEP_PTP 0x4 + /* MAC table entry types. * ENTRYTYPE_NORMAL is subject to aging. * ENTRYTYPE_LOCKED is not subject to aging. @@ -70,6 +78,24 @@ struct lan966x_stat_layout { char name[ETH_GSTRING_LEN]; }; +struct lan966x_phc { + struct ptp_clock *clock; + struct ptp_clock_info info; + struct hwtstamp_config hwtstamp_config; + struct lan966x *lan966x; + u8 index; +}; + +struct lan966x_skb_cb { + u8 rew_op; + u16 ts_id; + unsigned long jiffies; +}; + +#define LAN966X_PTP_TIMEOUT msecs_to_jiffies(10) +#define LAN966X_SKB_CB(skb) \ + ((struct lan966x_skb_cb *)((skb)->cb)) + struct lan966x { struct device *dev; @@ -105,6 +131,7 @@ struct lan966x { /* interrupts */ int xtr_irq; int ana_irq; + int ptp_irq; /* worqueue for fdb */ struct workqueue_struct *fdb_work; @@ -113,6 +140,14 @@ struct lan966x { /* mdb */ struct list_head mdb_entries; struct list_head pgid_entries; + + /* ptp */ + bool ptp; + struct lan966x_phc phc[LAN966X_PHC_COUNT]; + spinlock_t ptp_clock_lock; /* lock for phc */ + spinlock_t ptp_ts_id_lock; /* lock for ts_id */ + struct mutex ptp_lock; /* lock for ptp interface state */ + u16 ptp_skbs; }; struct lan966x_port_config { @@ -142,6 +177,10 @@ struct lan966x_port { struct phylink *phylink; struct phy *serdes; struct fwnode_handle *fwnode; + + u8 ptp_cmd; + u16 ts_id; + struct sk_buff_head tx_skbs; }; extern const struct phylink_mac_ops lan966x_phylink_mac_ops; @@ -228,6 +267,18 @@ int lan966x_handle_port_mdb_del(struct lan966x_port *port, void lan966x_mdb_erase_entries(struct lan966x *lan966x, u16 vid); void lan966x_mdb_write_entries(struct lan966x *lan966x, u16 vid); +int lan966x_ptp_init(struct lan966x *lan966x); +void lan966x_ptp_deinit(struct lan966x *lan966x); +int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr); +int lan966x_ptp_hwtstamp_get(struct lan966x_port *port, struct ifreq *ifr); +void lan966x_ptp_rxtstamp(struct lan966x *lan966x, struct sk_buff *skb, + u64 timestamp); +int lan966x_ptp_txtstamp_request(struct lan966x_port *port, + struct sk_buff *skb); +void lan966x_ptp_txtstamp_release(struct lan966x_port *port, + struct sk_buff *skb); +irqreturn_t lan966x_ptp_irq_handler(int irq, void *args); + static inline void __iomem *lan_addr(void __iomem *base[], int id, int tinst, int tcnt, int gbase, int ginst, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c new file mode 100644 index 000000000000..ae782778d6dd --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/ptp_classify.h> + +#include "lan966x_main.h" + +#define LAN966X_MAX_PTP_ID 512 + +/* Represents 1ppm adjustment in 2^59 format with 6.037735849ns as reference + * The value is calculated as following: (1/1000000)/((2^-59)/6.037735849) + */ +#define LAN966X_1PPM_FORMAT 3480517749723LL + +/* Represents 1ppb adjustment in 2^29 format with 6.037735849ns as reference + * The value is calculated as following: (1/1000000000)/((2^59)/6.037735849) + */ +#define LAN966X_1PPB_FORMAT 3480517749LL + +#define TOD_ACC_PIN 0x5 + +enum { + PTP_PIN_ACTION_IDLE = 0, + PTP_PIN_ACTION_LOAD, + PTP_PIN_ACTION_SAVE, + PTP_PIN_ACTION_CLOCK, + PTP_PIN_ACTION_DELTA, + PTP_PIN_ACTION_TOD +}; + +static u64 lan966x_ptp_get_nominal_value(void) +{ + u64 res = 0x304d2df1; + + res <<= 32; + return res; +} + +int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr) +{ + struct lan966x *lan966x = port->lan966x; + struct hwtstamp_config cfg; + struct lan966x_phc *phc; + + /* For now don't allow to run ptp on ports that are part of a bridge, + * because in case of transparent clock the HW will still forward the + * frames, so there would be duplicate frames + */ + if (lan966x->bridge_mask & BIT(port->chip_port)) + return -EINVAL; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + switch (cfg.tx_type) { + case HWTSTAMP_TX_ON: + port->ptp_cmd = IFH_REW_OP_TWO_STEP_PTP; + break; + case HWTSTAMP_TX_ONESTEP_SYNC: + port->ptp_cmd = IFH_REW_OP_ONE_STEP_PTP; + break; + case HWTSTAMP_TX_OFF: + port->ptp_cmd = IFH_REW_OP_NOOP; + break; + default: + return -ERANGE; + } + + switch (cfg.rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + cfg.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + return -ERANGE; + } + + /* Commit back the result & save it */ + mutex_lock(&lan966x->ptp_lock); + phc = &lan966x->phc[LAN966X_PHC_PORT]; + memcpy(&phc->hwtstamp_config, &cfg, sizeof(cfg)); + mutex_unlock(&lan966x->ptp_lock); + + return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; +} + +int lan966x_ptp_hwtstamp_get(struct lan966x_port *port, struct ifreq *ifr) +{ + struct lan966x *lan966x = port->lan966x; + struct lan966x_phc *phc; + + phc = &lan966x->phc[LAN966X_PHC_PORT]; + return copy_to_user(ifr->ifr_data, &phc->hwtstamp_config, + sizeof(phc->hwtstamp_config)) ? -EFAULT : 0; +} + +static int lan966x_ptp_classify(struct lan966x_port *port, struct sk_buff *skb) +{ + struct ptp_header *header; + u8 msgtype; + int type; + + if (port->ptp_cmd == IFH_REW_OP_NOOP) + return IFH_REW_OP_NOOP; + + type = ptp_classify_raw(skb); + if (type == PTP_CLASS_NONE) + return IFH_REW_OP_NOOP; + + header = ptp_parse_header(skb, type); + if (!header) + return IFH_REW_OP_NOOP; + + if (port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) + return IFH_REW_OP_TWO_STEP_PTP; + + /* If it is sync and run 1 step then set the correct operation, + * otherwise run as 2 step + */ + msgtype = ptp_get_msgtype(header, type); + if ((msgtype & 0xf) == 0) + return IFH_REW_OP_ONE_STEP_PTP; + + return IFH_REW_OP_TWO_STEP_PTP; +} + +static void lan966x_ptp_txtstamp_old_release(struct lan966x_port *port) +{ + struct sk_buff *skb, *skb_tmp; + unsigned long flags; + + spin_lock_irqsave(&port->tx_skbs.lock, flags); + skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { + if time_after(LAN966X_SKB_CB(skb)->jiffies + LAN966X_PTP_TIMEOUT, + jiffies) + break; + + __skb_unlink(skb, &port->tx_skbs); + dev_kfree_skb_any(skb); + } + spin_unlock_irqrestore(&port->tx_skbs.lock, flags); +} + +int lan966x_ptp_txtstamp_request(struct lan966x_port *port, + struct sk_buff *skb) +{ + struct lan966x *lan966x = port->lan966x; + unsigned long flags; + u8 rew_op; + + rew_op = lan966x_ptp_classify(port, skb); + LAN966X_SKB_CB(skb)->rew_op = rew_op; + + if (rew_op != IFH_REW_OP_TWO_STEP_PTP) + return 0; + + lan966x_ptp_txtstamp_old_release(port); + + spin_lock_irqsave(&lan966x->ptp_ts_id_lock, flags); + if (lan966x->ptp_skbs == LAN966X_MAX_PTP_ID) { + spin_unlock_irqrestore(&lan966x->ptp_ts_id_lock, flags); + return -EBUSY; + } + + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + + skb_queue_tail(&port->tx_skbs, skb); + LAN966X_SKB_CB(skb)->ts_id = port->ts_id; + LAN966X_SKB_CB(skb)->jiffies = jiffies; + + lan966x->ptp_skbs++; + port->ts_id++; + if (port->ts_id == LAN966X_MAX_PTP_ID) + port->ts_id = 0; + + spin_unlock_irqrestore(&lan966x->ptp_ts_id_lock, flags); + + return 0; +} + +void lan966x_ptp_txtstamp_release(struct lan966x_port *port, + struct sk_buff *skb) +{ + struct lan966x *lan966x = port->lan966x; + unsigned long flags; + + spin_lock_irqsave(&lan966x->ptp_ts_id_lock, flags); + port->ts_id--; + lan966x->ptp_skbs--; + skb_unlink(skb, &port->tx_skbs); + spin_unlock_irqrestore(&lan966x->ptp_ts_id_lock, flags); +} + +static void lan966x_get_hwtimestamp(struct lan966x *lan966x, + struct timespec64 *ts, + u32 nsec) +{ + /* Read current PTP time to get seconds */ + unsigned long flags; + u32 curr_nsec; + + spin_lock_irqsave(&lan966x->ptp_clock_lock, flags); + + lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_SAVE) | + PTP_PIN_CFG_PIN_DOM_SET(LAN966X_PHC_PORT) | + PTP_PIN_CFG_PIN_SYNC_SET(0), + PTP_PIN_CFG_PIN_ACTION | + PTP_PIN_CFG_PIN_DOM | + PTP_PIN_CFG_PIN_SYNC, + lan966x, PTP_PIN_CFG(TOD_ACC_PIN)); + + ts->tv_sec = lan_rd(lan966x, PTP_TOD_SEC_LSB(TOD_ACC_PIN)); + curr_nsec = lan_rd(lan966x, PTP_TOD_NSEC(TOD_ACC_PIN)); + + ts->tv_nsec = nsec; + + /* Sec has incremented since the ts was registered */ + if (curr_nsec < nsec) + ts->tv_sec--; + + spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags); +} + +irqreturn_t lan966x_ptp_irq_handler(int irq, void *args) +{ + int budget = LAN966X_MAX_PTP_ID; + struct lan966x *lan966x = args; + + while (budget--) { + struct sk_buff *skb, *skb_tmp, *skb_match = NULL; + struct skb_shared_hwtstamps shhwtstamps; + struct lan966x_port *port; + struct timespec64 ts; + unsigned long flags; + u32 val, id, txport; + u32 delay; + + val = lan_rd(lan966x, PTP_TWOSTEP_CTRL); + + /* Check if a timestamp can be retrieved */ + if (!(val & PTP_TWOSTEP_CTRL_VLD)) + break; + + WARN_ON(val & PTP_TWOSTEP_CTRL_OVFL); + + if (!(val & PTP_TWOSTEP_CTRL_STAMP_TX)) + continue; + + /* Retrieve the ts Tx port */ + txport = PTP_TWOSTEP_CTRL_STAMP_PORT_GET(val); + + /* Retrieve its associated skb */ + port = lan966x->ports[txport]; + + /* Retrieve the delay */ + delay = lan_rd(lan966x, PTP_TWOSTEP_STAMP); + delay = PTP_TWOSTEP_STAMP_STAMP_NSEC_GET(delay); + + /* Get next timestamp from fifo, which needs to be the + * rx timestamp which represents the id of the frame + */ + lan_rmw(PTP_TWOSTEP_CTRL_NXT_SET(1), + PTP_TWOSTEP_CTRL_NXT, + lan966x, PTP_TWOSTEP_CTRL); + + val = lan_rd(lan966x, PTP_TWOSTEP_CTRL); + + /* Check if a timestamp can be retried */ + if (!(val & PTP_TWOSTEP_CTRL_VLD)) + break; + + /* Read RX timestamping to get the ID */ + id = lan_rd(lan966x, PTP_TWOSTEP_STAMP); + + spin_lock_irqsave(&port->tx_skbs.lock, flags); + skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { + if (LAN966X_SKB_CB(skb)->ts_id != id) + continue; + + __skb_unlink(skb, &port->tx_skbs); + skb_match = skb; + break; + } + spin_unlock_irqrestore(&port->tx_skbs.lock, flags); + + /* Next ts */ + lan_rmw(PTP_TWOSTEP_CTRL_NXT_SET(1), + PTP_TWOSTEP_CTRL_NXT, + lan966x, PTP_TWOSTEP_CTRL); + + if (WARN_ON(!skb_match)) + continue; + + spin_lock(&lan966x->ptp_ts_id_lock); + lan966x->ptp_skbs--; + spin_unlock(&lan966x->ptp_ts_id_lock); + + /* Get the h/w timestamp */ + lan966x_get_hwtimestamp(lan966x, &ts, delay); + + /* Set the timestamp into the skb */ + shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec); + skb_tstamp_tx(skb_match, &shhwtstamps); + + dev_kfree_skb_any(skb_match); + } + + return IRQ_HANDLED; +} + +static int lan966x_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info); + struct lan966x *lan966x = phc->lan966x; + unsigned long flags; + bool neg_adj = 0; + u64 tod_inc; + u64 ref; + + if (!scaled_ppm) + return 0; + + if (scaled_ppm < 0) { + neg_adj = 1; + scaled_ppm = -scaled_ppm; + } + + tod_inc = lan966x_ptp_get_nominal_value(); + + /* The multiplication is split in 2 separate additions because of + * overflow issues. If scaled_ppm with 16bit fractional part was bigger + * than 20ppm then we got overflow. + */ + ref = LAN966X_1PPM_FORMAT * (scaled_ppm >> 16); + ref += (LAN966X_1PPM_FORMAT * (0xffff & scaled_ppm)) >> 16; + tod_inc = neg_adj ? tod_inc - ref : tod_inc + ref; + + spin_lock_irqsave(&lan966x->ptp_clock_lock, flags); + + lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(1 << BIT(phc->index)), + PTP_DOM_CFG_CLKCFG_DIS, + lan966x, PTP_DOM_CFG); + + lan_wr((u32)tod_inc & 0xFFFFFFFF, lan966x, + PTP_CLK_PER_CFG(phc->index, 0)); + lan_wr((u32)(tod_inc >> 32), lan966x, + PTP_CLK_PER_CFG(phc->index, 1)); + + lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(0), + PTP_DOM_CFG_CLKCFG_DIS, + lan966x, PTP_DOM_CFG); + + spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags); + + return 0; +} + +static int lan966x_ptp_settime64(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info); + struct lan966x *lan966x = phc->lan966x; + unsigned long flags; + + spin_lock_irqsave(&lan966x->ptp_clock_lock, flags); + + /* Must be in IDLE mode before the time can be loaded */ + lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_IDLE) | + PTP_PIN_CFG_PIN_DOM_SET(phc->index) | + PTP_PIN_CFG_PIN_SYNC_SET(0), + PTP_PIN_CFG_PIN_ACTION | + PTP_PIN_CFG_PIN_DOM | + PTP_PIN_CFG_PIN_SYNC, + lan966x, PTP_PIN_CFG(TOD_ACC_PIN)); + + /* Set new value */ + lan_wr(PTP_TOD_SEC_MSB_TOD_SEC_MSB_SET(upper_32_bits(ts->tv_sec)), + lan966x, PTP_TOD_SEC_MSB(TOD_ACC_PIN)); + lan_wr(lower_32_bits(ts->tv_sec), + lan966x, PTP_TOD_SEC_LSB(TOD_ACC_PIN)); + lan_wr(ts->tv_nsec, lan966x, PTP_TOD_NSEC(TOD_ACC_PIN)); + + /* Apply new values */ + lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_LOAD) | + PTP_PIN_CFG_PIN_DOM_SET(phc->index) | + PTP_PIN_CFG_PIN_SYNC_SET(0), + PTP_PIN_CFG_PIN_ACTION | + PTP_PIN_CFG_PIN_DOM | + PTP_PIN_CFG_PIN_SYNC, + lan966x, PTP_PIN_CFG(TOD_ACC_PIN)); + + spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags); + + return 0; +} + +static int lan966x_ptp_gettime64(struct ptp_clock_info *ptp, + struct timespec64 *ts) +{ + struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info); + struct lan966x *lan966x = phc->lan966x; + unsigned long flags; + time64_t s; + s64 ns; + + spin_lock_irqsave(&lan966x->ptp_clock_lock, flags); + + lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_SAVE) | + PTP_PIN_CFG_PIN_DOM_SET(phc->index) | + PTP_PIN_CFG_PIN_SYNC_SET(0), + PTP_PIN_CFG_PIN_ACTION | + PTP_PIN_CFG_PIN_DOM | + PTP_PIN_CFG_PIN_SYNC, + lan966x, PTP_PIN_CFG(TOD_ACC_PIN)); + + s = lan_rd(lan966x, PTP_TOD_SEC_MSB(TOD_ACC_PIN)); + s <<= 32; + s |= lan_rd(lan966x, PTP_TOD_SEC_LSB(TOD_ACC_PIN)); + ns = lan_rd(lan966x, PTP_TOD_NSEC(TOD_ACC_PIN)); + ns &= PTP_TOD_NSEC_TOD_NSEC; + + spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags); + + /* Deal with negative values */ + if ((ns & 0xFFFFFFF0) == 0x3FFFFFF0) { + s--; + ns &= 0xf; + ns += 999999984; + } + + set_normalized_timespec64(ts, s, ns); + return 0; +} + +static int lan966x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info); + struct lan966x *lan966x = phc->lan966x; + + if (delta > -(NSEC_PER_SEC / 2) && delta < (NSEC_PER_SEC / 2)) { + unsigned long flags; + + spin_lock_irqsave(&lan966x->ptp_clock_lock, flags); + + /* Must be in IDLE mode before the time can be loaded */ + lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_IDLE) | + PTP_PIN_CFG_PIN_DOM_SET(phc->index) | + PTP_PIN_CFG_PIN_SYNC_SET(0), + PTP_PIN_CFG_PIN_ACTION | + PTP_PIN_CFG_PIN_DOM | + PTP_PIN_CFG_PIN_SYNC, + lan966x, PTP_PIN_CFG(TOD_ACC_PIN)); + + lan_wr(PTP_TOD_NSEC_TOD_NSEC_SET(delta), + lan966x, PTP_TOD_NSEC(TOD_ACC_PIN)); + + /* Adjust time with the value of PTP_TOD_NSEC */ + lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_DELTA) | + PTP_PIN_CFG_PIN_DOM_SET(phc->index) | + PTP_PIN_CFG_PIN_SYNC_SET(0), + PTP_PIN_CFG_PIN_ACTION | + PTP_PIN_CFG_PIN_DOM | + PTP_PIN_CFG_PIN_SYNC, + lan966x, PTP_PIN_CFG(TOD_ACC_PIN)); + + spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags); + } else { + /* Fall back using lan966x_ptp_settime64 which is not exact */ + struct timespec64 ts; + u64 now; + + lan966x_ptp_gettime64(ptp, &ts); + + now = ktime_to_ns(timespec64_to_ktime(ts)); + ts = ns_to_timespec64(now + delta); + + lan966x_ptp_settime64(ptp, &ts); + } + + return 0; +} + +static struct ptp_clock_info lan966x_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "lan966x ptp", + .max_adj = 200000, + .gettime64 = lan966x_ptp_gettime64, + .settime64 = lan966x_ptp_settime64, + .adjtime = lan966x_ptp_adjtime, + .adjfine = lan966x_ptp_adjfine, +}; + +static int lan966x_ptp_phc_init(struct lan966x *lan966x, + int index, + struct ptp_clock_info *clock_info) +{ + struct lan966x_phc *phc = &lan966x->phc[index]; + + phc->info = *clock_info; + phc->clock = ptp_clock_register(&phc->info, lan966x->dev); + if (IS_ERR(phc->clock)) + return PTR_ERR(phc->clock); + + phc->index = index; + phc->lan966x = lan966x; + + /* PTP Rx stamping is always enabled. */ + phc->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + + return 0; +} + +int lan966x_ptp_init(struct lan966x *lan966x) +{ + u64 tod_adj = lan966x_ptp_get_nominal_value(); + struct lan966x_port *port; + int err, i; + + if (!lan966x->ptp) + return 0; + + for (i = 0; i < LAN966X_PHC_COUNT; ++i) { + err = lan966x_ptp_phc_init(lan966x, i, &lan966x_ptp_clock_info); + if (err) + return err; + } + + spin_lock_init(&lan966x->ptp_clock_lock); + spin_lock_init(&lan966x->ptp_ts_id_lock); + mutex_init(&lan966x->ptp_lock); + + /* Disable master counters */ + lan_wr(PTP_DOM_CFG_ENA_SET(0), lan966x, PTP_DOM_CFG); + + /* Configure the nominal TOD increment per clock cycle */ + lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(0x7), + PTP_DOM_CFG_CLKCFG_DIS, + lan966x, PTP_DOM_CFG); + + for (i = 0; i < LAN966X_PHC_COUNT; ++i) { + lan_wr((u32)tod_adj & 0xFFFFFFFF, lan966x, + PTP_CLK_PER_CFG(i, 0)); + lan_wr((u32)(tod_adj >> 32), lan966x, + PTP_CLK_PER_CFG(i, 1)); + } + + lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(0), + PTP_DOM_CFG_CLKCFG_DIS, + lan966x, PTP_DOM_CFG); + + /* Enable master counters */ + lan_wr(PTP_DOM_CFG_ENA_SET(0x7), lan966x, PTP_DOM_CFG); + + for (i = 0; i < lan966x->num_phys_ports; i++) { + port = lan966x->ports[i]; + if (!port) + continue; + + skb_queue_head_init(&port->tx_skbs); + } + + return 0; +} + +void lan966x_ptp_deinit(struct lan966x *lan966x) +{ + struct lan966x_port *port; + int i; + + for (i = 0; i < lan966x->num_phys_ports; i++) { + port = lan966x->ports[i]; + if (!port) + continue; + + skb_queue_purge(&port->tx_skbs); + } + + for (i = 0; i < LAN966X_PHC_COUNT; ++i) + ptp_clock_unregister(lan966x->phc[i].clock); +} + +void lan966x_ptp_rxtstamp(struct lan966x *lan966x, struct sk_buff *skb, + u64 timestamp) +{ + struct skb_shared_hwtstamps *shhwtstamps; + struct lan966x_phc *phc; + struct timespec64 ts; + u64 full_ts_in_ns; + + if (!lan966x->ptp) + return; + + phc = &lan966x->phc[LAN966X_PHC_PORT]; + lan966x_ptp_gettime64(&phc->info, &ts); + + /* Drop the sub-ns precision */ + timestamp = timestamp >> 2; + if (ts.tv_nsec < timestamp) + ts.tv_sec--; + ts.tv_nsec = timestamp; + full_ts_in_ns = ktime_set(ts.tv_sec, ts.tv_nsec); + + shhwtstamps = skb_hwtstamps(skb); + shhwtstamps->hwtstamp = full_ts_in_ns; +} diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h index 797560172aca..37a5d7e63cb6 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h @@ -19,6 +19,7 @@ enum lan966x_target { TARGET_DEV = 13, TARGET_GCB = 27, TARGET_ORG = 36, + TARGET_PTP = 41, TARGET_QS = 42, TARGET_QSYS = 46, TARGET_REW = 47, @@ -559,6 +560,108 @@ enum lan966x_target { #define DEV_PCS1G_STICKY_LINK_DOWN_STICKY_GET(x)\ FIELD_GET(DEV_PCS1G_STICKY_LINK_DOWN_STICKY, x) +/* PTP:PTP_CFG:PTP_DOM_CFG */ +#define PTP_DOM_CFG __REG(TARGET_PTP, 0, 1, 512, 0, 1, 16, 12, 0, 1, 4) + +#define PTP_DOM_CFG_ENA GENMASK(11, 9) +#define PTP_DOM_CFG_ENA_SET(x)\ + FIELD_PREP(PTP_DOM_CFG_ENA, x) +#define PTP_DOM_CFG_ENA_GET(x)\ + FIELD_GET(PTP_DOM_CFG_ENA, x) + +#define PTP_DOM_CFG_CLKCFG_DIS GENMASK(2, 0) +#define PTP_DOM_CFG_CLKCFG_DIS_SET(x)\ + FIELD_PREP(PTP_DOM_CFG_CLKCFG_DIS, x) +#define PTP_DOM_CFG_CLKCFG_DIS_GET(x)\ + FIELD_GET(PTP_DOM_CFG_CLKCFG_DIS, x) + +/* PTP:PTP_TOD_DOMAINS:CLK_PER_CFG */ +#define PTP_CLK_PER_CFG(g, r) __REG(TARGET_PTP, 0, 1, 528, g, 3, 28, 0, r, 2, 4) + +/* PTP:PTP_PINS:PTP_PIN_CFG */ +#define PTP_PIN_CFG(g) __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 0, 0, 1, 4) + +#define PTP_PIN_CFG_PIN_ACTION GENMASK(29, 27) +#define PTP_PIN_CFG_PIN_ACTION_SET(x)\ + FIELD_PREP(PTP_PIN_CFG_PIN_ACTION, x) +#define PTP_PIN_CFG_PIN_ACTION_GET(x)\ + FIELD_GET(PTP_PIN_CFG_PIN_ACTION, x) + +#define PTP_PIN_CFG_PIN_SYNC GENMASK(26, 25) +#define PTP_PIN_CFG_PIN_SYNC_SET(x)\ + FIELD_PREP(PTP_PIN_CFG_PIN_SYNC, x) +#define PTP_PIN_CFG_PIN_SYNC_GET(x)\ + FIELD_GET(PTP_PIN_CFG_PIN_SYNC, x) + +#define PTP_PIN_CFG_PIN_DOM GENMASK(17, 16) +#define PTP_PIN_CFG_PIN_DOM_SET(x)\ + FIELD_PREP(PTP_PIN_CFG_PIN_DOM, x) +#define PTP_PIN_CFG_PIN_DOM_GET(x)\ + FIELD_GET(PTP_PIN_CFG_PIN_DOM, x) + +/* PTP:PTP_PINS:PTP_TOD_SEC_MSB */ +#define PTP_TOD_SEC_MSB(g) __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 4, 0, 1, 4) + +#define PTP_TOD_SEC_MSB_TOD_SEC_MSB GENMASK(15, 0) +#define PTP_TOD_SEC_MSB_TOD_SEC_MSB_SET(x)\ + FIELD_PREP(PTP_TOD_SEC_MSB_TOD_SEC_MSB, x) +#define PTP_TOD_SEC_MSB_TOD_SEC_MSB_GET(x)\ + FIELD_GET(PTP_TOD_SEC_MSB_TOD_SEC_MSB, x) + +/* PTP:PTP_PINS:PTP_TOD_SEC_LSB */ +#define PTP_TOD_SEC_LSB(g) __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 8, 0, 1, 4) + +/* PTP:PTP_PINS:PTP_TOD_NSEC */ +#define PTP_TOD_NSEC(g) __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 12, 0, 1, 4) + +#define PTP_TOD_NSEC_TOD_NSEC GENMASK(29, 0) +#define PTP_TOD_NSEC_TOD_NSEC_SET(x)\ + FIELD_PREP(PTP_TOD_NSEC_TOD_NSEC, x) +#define PTP_TOD_NSEC_TOD_NSEC_GET(x)\ + FIELD_GET(PTP_TOD_NSEC_TOD_NSEC, x) + +/* PTP:PTP_TS_FIFO:PTP_TWOSTEP_CTRL */ +#define PTP_TWOSTEP_CTRL __REG(TARGET_PTP, 0, 1, 612, 0, 1, 12, 0, 0, 1, 4) + +#define PTP_TWOSTEP_CTRL_NXT BIT(11) +#define PTP_TWOSTEP_CTRL_NXT_SET(x)\ + FIELD_PREP(PTP_TWOSTEP_CTRL_NXT, x) +#define PTP_TWOSTEP_CTRL_NXT_GET(x)\ + FIELD_GET(PTP_TWOSTEP_CTRL_NXT, x) + +#define PTP_TWOSTEP_CTRL_VLD BIT(10) +#define PTP_TWOSTEP_CTRL_VLD_SET(x)\ + FIELD_PREP(PTP_TWOSTEP_CTRL_VLD, x) +#define PTP_TWOSTEP_CTRL_VLD_GET(x)\ + FIELD_GET(PTP_TWOSTEP_CTRL_VLD, x) + +#define PTP_TWOSTEP_CTRL_STAMP_TX BIT(9) +#define PTP_TWOSTEP_CTRL_STAMP_TX_SET(x)\ + FIELD_PREP(PTP_TWOSTEP_CTRL_STAMP_TX, x) +#define PTP_TWOSTEP_CTRL_STAMP_TX_GET(x)\ + FIELD_GET(PTP_TWOSTEP_CTRL_STAMP_TX, x) + +#define PTP_TWOSTEP_CTRL_STAMP_PORT GENMASK(8, 1) +#define PTP_TWOSTEP_CTRL_STAMP_PORT_SET(x)\ + FIELD_PREP(PTP_TWOSTEP_CTRL_STAMP_PORT, x) +#define PTP_TWOSTEP_CTRL_STAMP_PORT_GET(x)\ + FIELD_GET(PTP_TWOSTEP_CTRL_STAMP_PORT, x) + +#define PTP_TWOSTEP_CTRL_OVFL BIT(0) +#define PTP_TWOSTEP_CTRL_OVFL_SET(x)\ + FIELD_PREP(PTP_TWOSTEP_CTRL_OVFL, x) +#define PTP_TWOSTEP_CTRL_OVFL_GET(x)\ + FIELD_GET(PTP_TWOSTEP_CTRL_OVFL, x) + +/* PTP:PTP_TS_FIFO:PTP_TWOSTEP_STAMP */ +#define PTP_TWOSTEP_STAMP __REG(TARGET_PTP, 0, 1, 612, 0, 1, 12, 4, 0, 1, 4) + +#define PTP_TWOSTEP_STAMP_STAMP_NSEC GENMASK(31, 2) +#define PTP_TWOSTEP_STAMP_STAMP_NSEC_SET(x)\ + FIELD_PREP(PTP_TWOSTEP_STAMP_STAMP_NSEC, x) +#define PTP_TWOSTEP_STAMP_STAMP_NSEC_GET(x)\ + FIELD_GET(PTP_TWOSTEP_STAMP_STAMP_NSEC, x) + /* DEVCPU_QS:XTR:XTR_GRP_CFG */ #define QS_XTR_GRP_CFG(r) __REG(TARGET_QS, 0, 1, 0, 0, 1, 36, 0, r, 2, 4) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 16266275dd36..35689b5e212c 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -328,7 +328,6 @@ static int sparx5_create_port(struct sparx5 *sparx5, return PTR_ERR(phylink); spx5_port->phylink = phylink; - phylink_set_pcs(phylink, &spx5_port->phylink_pcs); return 0; } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c index 8ba33bc1a001..830da0e5ff27 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c @@ -26,6 +26,15 @@ static bool port_conf_has_changed(struct sparx5_port_config *a, struct sparx5_po return false; } +static struct phylink_pcs * +sparx5_phylink_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) +{ + struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); + + return &port->phylink_pcs; +} + static void sparx5_phylink_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) @@ -130,6 +139,7 @@ const struct phylink_pcs_ops sparx5_phylink_pcs_ops = { const struct phylink_mac_ops sparx5_phylink_mac_ops = { .validate = phylink_generic_validate, + .mac_select_pcs = sparx5_phylink_mac_select_pcs, .mac_config = sparx5_phylink_mac_config, .mac_link_down = sparx5_phylink_mac_link_down, .mac_link_up = sparx5_phylink_mac_link_up, diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 636dfef24a6c..49b85ca578b0 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -663,7 +663,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, struct gdma_context *gc = gd->gdma_context; struct hw_channel_context *hwc; u32 length = gmi->length; - u32 req_msg_size; + size_t req_msg_size; int err; int i; @@ -674,7 +674,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, return -EINVAL; hwc = gc->hwc.driver_data; - req_msg_size = sizeof(*req) + num_page * sizeof(u64); + req_msg_size = struct_size(req, page_addr_list, num_page); if (req_msg_size > hwc->max_req_msg_size) return -EINVAL; diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h index 9a12607fb511..d36405af9432 100644 --- a/drivers/net/ethernet/microsoft/mana/mana.h +++ b/drivers/net/ethernet/microsoft/mana/mana.h @@ -48,7 +48,15 @@ enum TRI_STATE { #define MAX_PORTS_IN_MANA_DEV 256 -struct mana_stats { +struct mana_stats_rx { + u64 packets; + u64 bytes; + u64 xdp_drop; + u64 xdp_tx; + struct u64_stats_sync syncp; +}; + +struct mana_stats_tx { u64 packets; u64 bytes; struct u64_stats_sync syncp; @@ -76,7 +84,7 @@ struct mana_txq { atomic_t pending_sends; - struct mana_stats stats; + struct mana_stats_tx stats; }; /* skb data and frags dma mappings */ @@ -298,10 +306,11 @@ struct mana_rxq { u32 buf_index; - struct mana_stats stats; + struct mana_stats_rx stats; struct bpf_prog __rcu *bpf_prog; struct xdp_rxq_info xdp_rxq; + struct page *xdp_save_page; /* MUST BE THE LAST MEMBER: * Each receive buffer has an associated mana_recv_buf_oob. diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 498d0f999275..69e791e6abc4 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -136,7 +136,7 @@ int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) bool ipv4 = false, ipv6 = false; struct mana_tx_package pkg = {}; struct netdev_queue *net_txq; - struct mana_stats *tx_stats; + struct mana_stats_tx *tx_stats; struct gdma_queue *gdma_sq; unsigned int csum_type; struct mana_txq *txq; @@ -299,7 +299,8 @@ static void mana_get_stats64(struct net_device *ndev, { struct mana_port_context *apc = netdev_priv(ndev); unsigned int num_queues = apc->num_queues; - struct mana_stats *stats; + struct mana_stats_rx *rx_stats; + struct mana_stats_tx *tx_stats; unsigned int start; u64 packets, bytes; int q; @@ -310,26 +311,26 @@ static void mana_get_stats64(struct net_device *ndev, netdev_stats_to_stats64(st, &ndev->stats); for (q = 0; q < num_queues; q++) { - stats = &apc->rxqs[q]->stats; + rx_stats = &apc->rxqs[q]->stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); st->rx_packets += packets; st->rx_bytes += bytes; } for (q = 0; q < num_queues; q++) { - stats = &apc->tx_qp[q].txq.stats; + tx_stats = &apc->tx_qp[q].txq.stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); st->tx_packets += packets; st->tx_bytes += bytes; @@ -986,7 +987,7 @@ static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len, static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq) { - struct mana_stats *rx_stats = &rxq->stats; + struct mana_stats_rx *rx_stats = &rxq->stats; struct net_device *ndev = rxq->ndev; uint pkt_len = cqe->ppi[0].pkt_len; u16 rxq_idx = rxq->rxq_idx; @@ -1007,7 +1008,7 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len); if (act != XDP_PASS && act != XDP_TX) - goto drop; + goto drop_xdp; skb = mana_build_skb(buf_va, pkt_len, &xdp); @@ -1034,6 +1035,14 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); } + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->packets++; + rx_stats->bytes += pkt_len; + + if (act == XDP_TX) + rx_stats->xdp_tx++; + u64_stats_update_end(&rx_stats->syncp); + if (act == XDP_TX) { skb_set_queue_mapping(skb, rxq_idx); mana_xdp_tx(skb, ndev); @@ -1042,15 +1051,19 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, napi_gro_receive(napi, skb); + return; + +drop_xdp: u64_stats_update_begin(&rx_stats->syncp); - rx_stats->packets++; - rx_stats->bytes += pkt_len; + rx_stats->xdp_drop++; u64_stats_update_end(&rx_stats->syncp); - return; drop: - free_page((unsigned long)buf_va); + WARN_ON_ONCE(rxq->xdp_save_page); + rxq->xdp_save_page = virt_to_page(buf_va); + ++ndev->stats.rx_dropped; + return; } @@ -1105,7 +1118,13 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, rxbuf_oob = &rxq->rx_oobs[curr]; WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); - new_page = alloc_page(GFP_ATOMIC); + /* Reuse XDP dropped page if available */ + if (rxq->xdp_save_page) { + new_page = rxq->xdp_save_page; + rxq->xdp_save_page = NULL; + } else { + new_page = alloc_page(GFP_ATOMIC); + } if (new_page) { da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize, @@ -1392,6 +1411,9 @@ static void mana_destroy_rxq(struct mana_port_context *apc, mana_deinit_cq(apc, &rxq->rx_cq); + if (rxq->xdp_save_page) + __free_page(rxq->xdp_save_page); + for (i = 0; i < rxq->num_rx_buf; i++) { rx_oob = &rxq->rx_oobs[i]; diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index c3c81ae3fafd..e13f2453eabb 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -23,7 +23,7 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset) if (stringset != ETH_SS_STATS) return -EINVAL; - return ARRAY_SIZE(mana_eth_stats) + num_queues * 4; + return ARRAY_SIZE(mana_eth_stats) + num_queues * 6; } static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) @@ -46,6 +46,10 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) p += ETH_GSTRING_LEN; sprintf(p, "rx_%d_bytes", i); p += ETH_GSTRING_LEN; + sprintf(p, "rx_%d_xdp_drop", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_%d_xdp_tx", i); + p += ETH_GSTRING_LEN; } for (i = 0; i < num_queues; i++) { @@ -62,9 +66,12 @@ static void mana_get_ethtool_stats(struct net_device *ndev, struct mana_port_context *apc = netdev_priv(ndev); unsigned int num_queues = apc->num_queues; void *eth_stats = &apc->eth_stats; - struct mana_stats *stats; + struct mana_stats_rx *rx_stats; + struct mana_stats_tx *tx_stats; unsigned int start; u64 packets, bytes; + u64 xdp_drop; + u64 xdp_tx; int q, i = 0; if (!apc->port_is_up) @@ -74,26 +81,30 @@ static void mana_get_ethtool_stats(struct net_device *ndev, data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset); for (q = 0; q < num_queues; q++) { - stats = &apc->rxqs[q]->stats; + rx_stats = &apc->rxqs[q]->stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + xdp_drop = rx_stats->xdp_drop; + xdp_tx = rx_stats->xdp_tx; + } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; + data[i++] = xdp_drop; + data[i++] = xdp_tx; } for (q = 0; q < num_queues; q++) { - stats = &apc->tx_qp[q].txq.stats; + tx_stats = &apc->tx_qp[q].txq.stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index dfb4468fe287..ce865e619568 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -356,7 +356,7 @@ __nfp_tun_add_route_to_cache(struct list_head *route_list, return 0; } - entry = kmalloc(sizeof(*entry) + add_len, GFP_ATOMIC); + entry = kmalloc(struct_size(entry, ip_add, add_len), GFP_ATOMIC); if (!entry) { spin_unlock_bh(list_lock); return -ENOMEM; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 3d61a8cb60b0..50007cc5b580 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -1,8 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* Copyright (C) 2015-2018 Netronome Systems, Inc. */ -/* - * nfp_net_ctrl.h +/* nfp_net_ctrl.h * Netronome network device driver: Control BAR layout * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> * Jason McMullan <jason.mcmullan@netronome.com> @@ -15,30 +14,24 @@ #include <linux/types.h> -/** - * Configuration BAR size. +/* Configuration BAR size. * * The configuration BAR is 8K in size, but due to * THB-350, 32k needs to be reserved. */ #define NFP_NET_CFG_BAR_SZ (32 * 1024) -/** - * Offset in Freelist buffer where packet starts on RX - */ +/* Offset in Freelist buffer where packet starts on RX */ #define NFP_NET_RX_OFFSET 32 -/** - * LSO parameters +/* LSO parameters * %NFP_NET_LSO_MAX_HDR_SZ: Maximum header size supported for LSO frames * %NFP_NET_LSO_MAX_SEGS: Maximum number of segments LSO frame can produce */ #define NFP_NET_LSO_MAX_HDR_SZ 255 #define NFP_NET_LSO_MAX_SEGS 64 -/** - * Prepend field types - */ +/* Prepend field types */ #define NFP_NET_META_FIELD_SIZE 4 #define NFP_NET_META_HASH 1 /* next field carries hash type */ #define NFP_NET_META_MARK 2 @@ -49,9 +42,7 @@ #define NFP_META_PORT_ID_CTRL ~0U -/** - * Hash type pre-pended when a RSS hash was computed - */ +/* Hash type pre-pended when a RSS hash was computed */ #define NFP_NET_RSS_NONE 0 #define NFP_NET_RSS_IPV4 1 #define NFP_NET_RSS_IPV6 2 @@ -63,16 +54,14 @@ #define NFP_NET_RSS_IPV6_UDP 8 #define NFP_NET_RSS_IPV6_EX_UDP 9 -/** - * Ring counts +/* Ring counts * %NFP_NET_TXR_MAX: Maximum number of TX rings * %NFP_NET_RXR_MAX: Maximum number of RX rings */ #define NFP_NET_TXR_MAX 64 #define NFP_NET_RXR_MAX 64 -/** - * Read/Write config words (0x0000 - 0x002c) +/* Read/Write config words (0x0000 - 0x002c) * %NFP_NET_CFG_CTRL: Global control * %NFP_NET_CFG_UPDATE: Indicate which fields are updated * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings @@ -147,8 +136,7 @@ #define NFP_NET_CFG_LSC 0x0020 #define NFP_NET_CFG_MACADDR 0x0024 -/** - * Read-only words (0x0030 - 0x0050): +/* Read-only words (0x0030 - 0x0050): * %NFP_NET_CFG_VERSION: Firmware version number * %NFP_NET_CFG_STS: Status * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL) @@ -193,36 +181,31 @@ #define NFP_NET_CFG_START_TXQ 0x0048 #define NFP_NET_CFG_START_RXQ 0x004c -/** - * Prepend configuration +/* Prepend configuration */ #define NFP_NET_CFG_RX_OFFSET 0x0050 #define NFP_NET_CFG_RX_OFFSET_DYNAMIC 0 /* Prepend mode */ -/** - * RSS capabilities +/* RSS capabilities * %NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as * %NFP_NET_CFG_RSS_HFUNC) */ #define NFP_NET_CFG_RSS_CAP 0x0054 #define NFP_NET_CFG_RSS_CAP_HFUNC 0xff000000 -/** - * TLV area start +/* TLV area start * %NFP_NET_CFG_TLV_BASE: start anchor of the TLV area */ #define NFP_NET_CFG_TLV_BASE 0x0058 -/** - * VXLAN/UDP encap configuration +/* VXLAN/UDP encap configuration * %NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports * %NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes */ #define NFP_NET_CFG_VXLAN_PORT 0x0060 #define NFP_NET_CFG_VXLAN_SZ 0x0008 -/** - * BPF section +/* BPF section * %NFP_NET_CFG_BPF_ABI: BPF ABI version * %NFP_NET_CFG_BPF_CAP: BPF capabilities * %NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes @@ -247,14 +230,12 @@ #define NFP_NET_CFG_BPF_CFG_MASK 7ULL #define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK) -/** - * 40B reserved for future use (0x0098 - 0x00c0) +/* 40B reserved for future use (0x0098 - 0x00c0) */ #define NFP_NET_CFG_RESERVED 0x0098 #define NFP_NET_CFG_RESERVED_SZ 0x0028 -/** - * RSS configuration (0x0100 - 0x01ac): +/* RSS configuration (0x0100 - 0x01ac): * Used only when NFP_NET_CFG_CTRL_RSS is enabled * %NFP_NET_CFG_RSS_CFG: RSS configuration word * %NFP_NET_CFG_RSS_KEY: RSS "secret" key @@ -281,8 +262,7 @@ NFP_NET_CFG_RSS_KEY_SZ) #define NFP_NET_CFG_RSS_ITBL_SZ 0x80 -/** - * TX ring configuration (0x200 - 0x800) +/* TX ring configuration (0x200 - 0x800) * %NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration * %NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries) * %NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries) @@ -301,8 +281,7 @@ #define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \ ((_x) * 0x4)) -/** - * RX ring configuration (0x0800 - 0x0c00) +/* RX ring configuration (0x0800 - 0x0c00) * %NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration * %NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries) * %NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries) @@ -318,8 +297,7 @@ #define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \ ((_x) * 0x4)) -/** - * Interrupt Control/Cause registers (0x0c00 - 0x0d00) +/* Interrupt Control/Cause registers (0x0c00 - 0x0d00) * These registers are only used when MSI-X auto-masking is not * enabled (%NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index * by MSI-X entry and are 1B in size. If an entry is zero, the @@ -334,8 +312,7 @@ #define NFP_NET_CFG_ICR_RXTX 0x1 #define NFP_NET_CFG_ICR_LSC 0x2 -/** - * General device stats (0x0d00 - 0x0d90) +/* General device stats (0x0d00 - 0x0d90) * all counters are 64bit. */ #define NFP_NET_CFG_STATS_BASE 0x0d00 @@ -368,8 +345,7 @@ #define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0) #define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8) -/** - * Per ring stats (0x1000 - 0x1800) +/* Per ring stats (0x1000 - 0x1800) * options, 64bit per entry * %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) * %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) @@ -381,8 +357,7 @@ #define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \ ((_x) * 0x10)) -/** - * General use mailbox area (0x1800 - 0x19ff) +/* General use mailbox area (0x1800 - 0x19ff) * 4B used for update command and 4B return code * followed by a max of 504B of variable length value */ @@ -399,8 +374,7 @@ #define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET 5 #define NFP_NET_CFG_MBOX_CMD_TLV_CMSG 6 -/** - * VLAN filtering using general use mailbox +/* VLAN filtering using general use mailbox * %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox * %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter * %NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter @@ -411,8 +385,7 @@ #define NFP_NET_CFG_VLAN_FILTER_PROTO (NFP_NET_CFG_VLAN_FILTER + 2) #define NFP_NET_CFG_VLAN_FILTER_SZ 0x0004 -/** - * TLV capabilities +/* TLV capabilities * %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV * %NFP_NET_CFG_TLV_LENGTH: Offset of length within the TLV @@ -438,8 +411,7 @@ #define NFP_NET_CFG_TLV_HEADER_TYPE 0x7fff0000 #define NFP_NET_CFG_TLV_HEADER_LENGTH 0x0000ffff -/** - * Capability TLV types +/* Capability TLV types * * %NFP_NET_CFG_TLV_TYPE_UNKNOWN: * Special TLV type to catch bugs, should never be encountered. Drivers should @@ -512,8 +484,7 @@ struct device; -/** - * struct nfp_net_tlv_caps - parsed control BAR TLV capabilities +/* struct nfp_net_tlv_caps - parsed control BAR TLV capabilities * @me_freq_mhz: ME clock_freq (MHz) * @mbox_off: vNIC mailbox area offset * @mbox_len: vNIC mailbox area length diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h index a3db0cbf6425..786be58a907e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h @@ -4,8 +4,7 @@ #ifndef _NFP_NET_SRIOV_H_ #define _NFP_NET_SRIOV_H_ -/** - * SRIOV VF configuration. +/* SRIOV VF configuration. * The configuration memory begins with a mailbox region for communication with * the firmware followed by individual VF entries. */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h index ae4da189d955..df316b9e891d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h @@ -132,8 +132,7 @@ void nfp_devlink_port_unregister(struct nfp_port *port); void nfp_devlink_port_type_eth_set(struct nfp_port *port); void nfp_devlink_port_type_clear(struct nfp_port *port); -/** - * Mac stats (0x0000 - 0x0200) +/* Mac stats (0x0000 - 0x0200) * all counters are 64bit. */ #define NFP_MAC_STATS_BASE 0x0000 diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c index 10e7d8b21c46..730fea214b8a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -513,7 +513,7 @@ nfp_nsp_command_buf_dma_sg(struct nfp_nsp *nsp, dma_size = BIT_ULL(dma_order); nseg = DIV_ROUND_UP(max_size, chunk_size); - chunks = kzalloc(array_size(sizeof(*chunks), nseg), GFP_KERNEL); + chunks = kcalloc(nseg, sizeof(*chunks), GFP_KERNEL); if (!chunks) return -ENOMEM; diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h index 5e25411ff02f..602f4d45d529 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic.h +++ b/drivers/net/ethernet/pensando/ionic/ionic.h @@ -18,7 +18,7 @@ struct ionic_lif; #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF 0x1002 #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF 0x1003 -#define DEVCMD_TIMEOUT 10 +#define DEVCMD_TIMEOUT 5 #define IONIC_ADMINQ_TIME_SLICE msecs_to_jiffies(100) #define IONIC_PHC_UPDATE_NS 10000000000 /* 10s in nanoseconds */ @@ -78,6 +78,9 @@ void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode, u8 status, int err); int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait); +int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_wait); +void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status, + int err); int ionic_set_dma_mask(struct ionic *ionic); int ionic_setup(struct ionic *ionic); @@ -89,4 +92,6 @@ int ionic_port_identify(struct ionic *ionic); int ionic_port_init(struct ionic *ionic); int ionic_port_reset(struct ionic *ionic); +const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr); + #endif /* _IONIC_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 7e296fa71b36..6ffc62c41165 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -109,8 +109,8 @@ void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page) static void ionic_vf_dealloc_locked(struct ionic *ionic) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR }; struct ionic_vf *v; - dma_addr_t dma = 0; int i; if (!ionic->vfs) @@ -120,9 +120,8 @@ static void ionic_vf_dealloc_locked(struct ionic *ionic) v = &ionic->vfs[i]; if (v->stats_pa) { - (void)ionic_set_vf_config(ionic, i, - IONIC_VF_ATTR_STATSADDR, - (u8 *)&dma); + vfc.stats_pa = 0; + (void)ionic_set_vf_config(ionic, i, &vfc); dma_unmap_single(ionic->dev, v->stats_pa, sizeof(v->stats), DMA_FROM_DEVICE); v->stats_pa = 0; @@ -143,6 +142,7 @@ static void ionic_vf_dealloc(struct ionic *ionic) static int ionic_vf_alloc(struct ionic *ionic, int num_vfs) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR }; struct ionic_vf *v; int err = 0; int i; @@ -166,9 +166,10 @@ static int ionic_vf_alloc(struct ionic *ionic, int num_vfs) } ionic->num_vfs++; + /* ignore failures from older FW, we just won't get stats */ - (void)ionic_set_vf_config(ionic, i, IONIC_VF_ATTR_STATSADDR, - (u8 *)&v->stats_pa); + vfc.stats_pa = cpu_to_le64(v->stats_pa); + (void)ionic_set_vf_config(ionic, i, &vfc); } out: @@ -331,6 +332,9 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_deregister_lifs; } + mod_timer(&ionic->watchdog_timer, + round_jiffies(jiffies + ionic->watchdog_period)); + return 0; err_out_deregister_lifs: @@ -348,7 +352,6 @@ err_out_port_reset: err_out_reset: ionic_reset(ionic); err_out_teardown: - del_timer_sync(&ionic->watchdog_timer); pci_clear_master(pdev); /* Don't fail the probe for these errors, keep * the hw interface around for inspection diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index d57e80d44c9d..52a1b5cfd8e7 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -33,7 +33,8 @@ static void ionic_watchdog_cb(struct timer_list *t) !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) ionic_link_status_check_request(lif, CAN_NOT_SLEEP); - if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state)) { + if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state) && + !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) { work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) { netdev_err(lif->netdev, "rxmode change dropped\n"); @@ -46,6 +47,24 @@ static void ionic_watchdog_cb(struct timer_list *t) } } +static void ionic_watchdog_init(struct ionic *ionic) +{ + struct ionic_dev *idev = &ionic->idev; + + timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0); + ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ; + + /* set times to ensure the first check will proceed */ + atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ); + idev->last_hb_time = jiffies - 2 * ionic->watchdog_period; + /* init as ready, so no transition if the first check succeeds */ + idev->last_fw_hb = 0; + idev->fw_hb_ready = true; + idev->fw_status_ready = true; + idev->fw_generation = IONIC_FW_STS_F_GENERATION & + ioread8(&idev->dev_info_regs->fw_status); +} + void ionic_init_devinfo(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; @@ -109,21 +128,7 @@ int ionic_dev_setup(struct ionic *ionic) return -EFAULT; } - timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0); - ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ; - - /* set times to ensure the first check will proceed */ - atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ); - idev->last_hb_time = jiffies - 2 * ionic->watchdog_period; - /* init as ready, so no transition if the first check succeeds */ - idev->last_fw_hb = 0; - idev->fw_hb_ready = true; - idev->fw_status_ready = true; - idev->fw_generation = IONIC_FW_STS_F_GENERATION & - ioread8(&idev->dev_info_regs->fw_status); - - mod_timer(&ionic->watchdog_timer, - round_jiffies(jiffies + ionic->watchdog_period)); + ionic_watchdog_init(ionic); idev->db_pages = bar->vaddr; idev->phy_db_pages = bar->bus_addr; @@ -132,10 +137,21 @@ int ionic_dev_setup(struct ionic *ionic) } /* Devcmd Interface */ +bool ionic_is_fw_running(struct ionic_dev *idev) +{ + u8 fw_status = ioread8(&idev->dev_info_regs->fw_status); + + /* firmware is useful only if the running bit is set and + * fw_status != 0xff (bad PCI read) + */ + return (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING); +} + int ionic_heartbeat_check(struct ionic *ionic) { - struct ionic_dev *idev = &ionic->idev; unsigned long check_time, last_check_time; + struct ionic_dev *idev = &ionic->idev; + struct ionic_lif *lif = ionic->lif; bool fw_status_ready = true; bool fw_hb_ready; u8 fw_generation; @@ -155,13 +171,10 @@ do_check_time: goto do_check_time; } - /* firmware is useful only if the running bit is set and - * fw_status != 0xff (bad PCI read) - * If fw_status is not ready don't bother with the generation. - */ fw_status = ioread8(&idev->dev_info_regs->fw_status); - if (fw_status == 0xff || !(fw_status & IONIC_FW_STS_F_RUNNING)) { + /* If fw_status is not ready don't bother with the generation */ + if (!ionic_is_fw_running(idev)) { fw_status_ready = false; } else { fw_generation = fw_status & IONIC_FW_STS_F_GENERATION; @@ -176,31 +189,41 @@ do_check_time: * the down, the next watchdog will see the fw is up * and the generation value stable, so will trigger * the fw-up activity. + * + * If we had already moved to FW_RESET from a RESET event, + * it is possible that we never saw the fw_status go to 0, + * so we fake the current idev->fw_status_ready here to + * force the transition and get FW up again. */ - fw_status_ready = false; + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + idev->fw_status_ready = false; /* go to running */ + else + fw_status_ready = false; /* go to down */ } } /* is this a transition? */ if (fw_status_ready != idev->fw_status_ready) { - struct ionic_lif *lif = ionic->lif; bool trigger = false; - idev->fw_status_ready = fw_status_ready; - - if (!fw_status_ready) { - dev_info(ionic->dev, "FW stopped %u\n", fw_status); - if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) - trigger = true; - } else { - dev_info(ionic->dev, "FW running %u\n", fw_status); - if (lif && test_bit(IONIC_LIF_F_FW_RESET, lif->state)) - trigger = true; + if (!fw_status_ready && lif && + !test_bit(IONIC_LIF_F_FW_RESET, lif->state) && + !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) { + dev_info(ionic->dev, "FW stopped 0x%02x\n", fw_status); + trigger = true; + + } else if (fw_status_ready && lif && + test_bit(IONIC_LIF_F_FW_RESET, lif->state) && + !test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) { + dev_info(ionic->dev, "FW running 0x%02x\n", fw_status); + trigger = true; } if (trigger) { struct ionic_deferred_work *work; + idev->fw_status_ready = fw_status_ready; + work = kzalloc(sizeof(*work), GFP_ATOMIC); if (work) { work->type = IONIC_DW_TYPE_LIF_RESET; @@ -210,12 +233,14 @@ do_check_time: } } - if (!fw_status_ready) + if (!idev->fw_status_ready) return -ENXIO; - /* wait at least one watchdog period since the last heartbeat */ + /* Because of some variability in the actual FW heartbeat, we + * wait longer than the DEVCMD_TIMEOUT before checking again. + */ last_check_time = idev->last_hb_time; - if (time_before(check_time, last_check_time + ionic->watchdog_period)) + if (time_before(check_time, last_check_time + DEVCMD_TIMEOUT * 2 * HZ)) return 0; fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat); @@ -392,60 +417,63 @@ void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type) } /* VF commands */ -int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data) +int ionic_set_vf_config(struct ionic *ionic, int vf, + struct ionic_vf_setattr_cmd *vfc) { union ionic_dev_cmd cmd = { .vf_setattr.opcode = IONIC_CMD_VF_SETATTR, - .vf_setattr.attr = attr, + .vf_setattr.attr = vfc->attr, .vf_setattr.vf_index = cpu_to_le16(vf), }; int err; + memcpy(cmd.vf_setattr.pad, vfc->pad, sizeof(vfc->pad)); + + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_go(&ionic->idev, &cmd); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + + return err; +} + +int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr, + struct ionic_vf_getattr_comp *comp) +{ + union ionic_dev_cmd cmd = { + .vf_getattr.opcode = IONIC_CMD_VF_GETATTR, + .vf_getattr.attr = attr, + .vf_getattr.vf_index = cpu_to_le16(vf), + }; + int err; + + if (vf >= ionic->num_vfs) + return -EINVAL; + switch (attr) { case IONIC_VF_ATTR_SPOOFCHK: - cmd.vf_setattr.spoofchk = *data; - dev_dbg(ionic->dev, "%s: vf %d spoof %d\n", - __func__, vf, *data); - break; case IONIC_VF_ATTR_TRUST: - cmd.vf_setattr.trust = *data; - dev_dbg(ionic->dev, "%s: vf %d trust %d\n", - __func__, vf, *data); - break; case IONIC_VF_ATTR_LINKSTATE: - cmd.vf_setattr.linkstate = *data; - dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n", - __func__, vf, *data); - break; case IONIC_VF_ATTR_MAC: - ether_addr_copy(cmd.vf_setattr.macaddr, data); - dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n", - __func__, vf, data); - break; case IONIC_VF_ATTR_VLAN: - cmd.vf_setattr.vlanid = cpu_to_le16(*(u16 *)data); - dev_dbg(ionic->dev, "%s: vf %d vlan %d\n", - __func__, vf, *(u16 *)data); - break; case IONIC_VF_ATTR_RATE: - cmd.vf_setattr.maxrate = cpu_to_le32(*(u32 *)data); - dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n", - __func__, vf, *(u32 *)data); break; case IONIC_VF_ATTR_STATSADDR: - cmd.vf_setattr.stats_pa = cpu_to_le64(*(u64 *)data); - dev_dbg(ionic->dev, "%s: vf %d stats_pa 0x%08llx\n", - __func__, vf, *(u64 *)data); - break; default: return -EINVAL; } mutex_lock(&ionic->dev_cmd_lock); ionic_dev_cmd_go(&ionic->idev, &cmd); - err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + err = ionic_dev_cmd_wait_nomsg(ionic, DEVCMD_TIMEOUT); + memcpy_fromio(comp, &ionic->idev.dev_cmd_regs->comp.vf_getattr, + sizeof(*comp)); mutex_unlock(&ionic->dev_cmd_lock); + if (err && comp->status != IONIC_RC_ENOSUPP) + ionic_dev_cmd_dev_err_print(ionic, cmd.vf_getattr.opcode, + comp->status, err); + return err; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index e5acf3bd62b2..563c302eb033 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -318,7 +318,10 @@ void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable); void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type); void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type); -int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data); +int ionic_set_vf_config(struct ionic *ionic, int vf, + struct ionic_vf_setattr_cmd *vfc); +int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr, + struct ionic_vf_getattr_comp *comp); void ionic_dev_cmd_queue_identify(struct ionic_dev *idev, u16 lif_type, u8 qtype, u8 qver); void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver); @@ -353,5 +356,6 @@ void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start); void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info, unsigned int stop_index); int ionic_heartbeat_check(struct ionic *ionic); +bool ionic_is_fw_running(struct ionic_dev *idev); #endif /* _IONIC_DEV_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 2ff7be17e5af..542e395fb037 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1112,12 +1112,17 @@ static bool ionic_notifyq_service(struct ionic_cq *cq, ionic_link_status_check_request(lif, CAN_NOT_SLEEP); break; case IONIC_EVENT_RESET: - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) { - netdev_err(lif->netdev, "Reset event dropped\n"); - } else { - work->type = IONIC_DW_TYPE_LIF_RESET; - ionic_lif_deferred_enqueue(&lif->deferred, work); + if (lif->ionic->idev.fw_status_ready && + !test_bit(IONIC_LIF_F_FW_RESET, lif->state) && + !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + netdev_err(lif->netdev, "Reset event dropped\n"); + clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state); + } else { + work->type = IONIC_DW_TYPE_LIF_RESET; + ionic_lif_deferred_enqueue(&lif->deferred, work); + } } break; default: @@ -1782,7 +1787,7 @@ static void ionic_lif_quiesce(struct ionic_lif *lif) err = ionic_adminq_post_wait(lif, &ctx); if (err) - netdev_err(lif->netdev, "lif quiesce failed %d\n", err); + netdev_dbg(lif->netdev, "lif quiesce failed %d\n", err); } static void ionic_txrx_disable(struct ionic_lif *lif) @@ -2152,6 +2157,76 @@ static int ionic_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd } } +static int ionic_update_cached_vf_config(struct ionic *ionic, int vf) +{ + struct ionic_vf_getattr_comp comp = { 0 }; + int err; + u8 attr; + + attr = IONIC_VF_ATTR_VLAN; + err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp); + if (err && comp.status != IONIC_RC_ENOSUPP) + goto err_out; + if (!err) + ionic->vfs[vf].vlanid = comp.vlanid; + + attr = IONIC_VF_ATTR_SPOOFCHK; + err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp); + if (err && comp.status != IONIC_RC_ENOSUPP) + goto err_out; + if (!err) + ionic->vfs[vf].spoofchk = comp.spoofchk; + + attr = IONIC_VF_ATTR_LINKSTATE; + err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp); + if (err && comp.status != IONIC_RC_ENOSUPP) + goto err_out; + if (!err) { + switch (comp.linkstate) { + case IONIC_VF_LINK_STATUS_UP: + ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_ENABLE; + break; + case IONIC_VF_LINK_STATUS_DOWN: + ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_DISABLE; + break; + case IONIC_VF_LINK_STATUS_AUTO: + ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_AUTO; + break; + default: + dev_warn(ionic->dev, "Unexpected link state %u\n", comp.linkstate); + break; + } + } + + attr = IONIC_VF_ATTR_RATE; + err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp); + if (err && comp.status != IONIC_RC_ENOSUPP) + goto err_out; + if (!err) + ionic->vfs[vf].maxrate = comp.maxrate; + + attr = IONIC_VF_ATTR_TRUST; + err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp); + if (err && comp.status != IONIC_RC_ENOSUPP) + goto err_out; + if (!err) + ionic->vfs[vf].trusted = comp.trust; + + attr = IONIC_VF_ATTR_MAC; + err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp); + if (err && comp.status != IONIC_RC_ENOSUPP) + goto err_out; + if (!err) + ether_addr_copy(ionic->vfs[vf].macaddr, comp.macaddr); + +err_out: + if (err) + dev_err(ionic->dev, "Failed to get %s for VF %d\n", + ionic_vf_attr_to_str(attr), vf); + + return err; +} + static int ionic_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivf) { @@ -2167,14 +2242,18 @@ static int ionic_get_vf_config(struct net_device *netdev, if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ivf->vf = vf; - ivf->vlan = le16_to_cpu(ionic->vfs[vf].vlanid); - ivf->qos = 0; - ivf->spoofchk = ionic->vfs[vf].spoofchk; - ivf->linkstate = ionic->vfs[vf].linkstate; - ivf->max_tx_rate = le32_to_cpu(ionic->vfs[vf].maxrate); - ivf->trusted = ionic->vfs[vf].trusted; - ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr); + ivf->vf = vf; + ivf->qos = 0; + + ret = ionic_update_cached_vf_config(ionic, vf); + if (!ret) { + ivf->vlan = le16_to_cpu(ionic->vfs[vf].vlanid); + ivf->spoofchk = ionic->vfs[vf].spoofchk; + ivf->linkstate = ionic->vfs[vf].linkstate; + ivf->max_tx_rate = le32_to_cpu(ionic->vfs[vf].maxrate); + ivf->trusted = ionic->vfs[vf].trusted; + ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr); + } } up_read(&ionic->vf_op_lock); @@ -2220,6 +2299,7 @@ static int ionic_get_vf_stats(struct net_device *netdev, int vf, static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_MAC }; struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; int ret; @@ -2235,7 +2315,11 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ret = ionic_set_vf_config(ionic, vf, IONIC_VF_ATTR_MAC, mac); + ether_addr_copy(vfc.macaddr, mac); + dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n", + __func__, vf, vfc.macaddr); + + ret = ionic_set_vf_config(ionic, vf, &vfc); if (!ret) ether_addr_copy(ionic->vfs[vf].macaddr, mac); } @@ -2247,6 +2331,7 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, __be16 proto) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_VLAN }; struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; int ret; @@ -2269,8 +2354,11 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ret = ionic_set_vf_config(ionic, vf, - IONIC_VF_ATTR_VLAN, (u8 *)&vlan); + vfc.vlanid = cpu_to_le16(vlan); + dev_dbg(ionic->dev, "%s: vf %d vlan %d\n", + __func__, vf, le16_to_cpu(vfc.vlanid)); + + ret = ionic_set_vf_config(ionic, vf, &vfc); if (!ret) ionic->vfs[vf].vlanid = cpu_to_le16(vlan); } @@ -2282,6 +2370,7 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, static int ionic_set_vf_rate(struct net_device *netdev, int vf, int tx_min, int tx_max) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_RATE }; struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; int ret; @@ -2298,8 +2387,11 @@ static int ionic_set_vf_rate(struct net_device *netdev, int vf, if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ret = ionic_set_vf_config(ionic, vf, - IONIC_VF_ATTR_RATE, (u8 *)&tx_max); + vfc.maxrate = cpu_to_le32(tx_max); + dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n", + __func__, vf, le32_to_cpu(vfc.maxrate)); + + ret = ionic_set_vf_config(ionic, vf, &vfc); if (!ret) lif->ionic->vfs[vf].maxrate = cpu_to_le32(tx_max); } @@ -2310,9 +2402,9 @@ static int ionic_set_vf_rate(struct net_device *netdev, int vf, static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_SPOOFCHK }; struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; - u8 data = set; /* convert to u8 for config */ int ret; if (!netif_device_present(netdev)) @@ -2323,10 +2415,13 @@ static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set) if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ret = ionic_set_vf_config(ionic, vf, - IONIC_VF_ATTR_SPOOFCHK, &data); + vfc.spoofchk = set; + dev_dbg(ionic->dev, "%s: vf %d spoof %d\n", + __func__, vf, vfc.spoofchk); + + ret = ionic_set_vf_config(ionic, vf, &vfc); if (!ret) - ionic->vfs[vf].spoofchk = data; + ionic->vfs[vf].spoofchk = set; } up_write(&ionic->vf_op_lock); @@ -2335,9 +2430,9 @@ static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set) static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_TRUST }; struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; - u8 data = set; /* convert to u8 for config */ int ret; if (!netif_device_present(netdev)) @@ -2348,10 +2443,13 @@ static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set) if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ret = ionic_set_vf_config(ionic, vf, - IONIC_VF_ATTR_TRUST, &data); + vfc.trust = set; + dev_dbg(ionic->dev, "%s: vf %d trust %d\n", + __func__, vf, vfc.trust); + + ret = ionic_set_vf_config(ionic, vf, &vfc); if (!ret) - ionic->vfs[vf].trusted = data; + ionic->vfs[vf].trusted = set; } up_write(&ionic->vf_op_lock); @@ -2360,20 +2458,21 @@ static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set) static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_LINKSTATE }; struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; - u8 data; + u8 vfls; int ret; switch (set) { case IFLA_VF_LINK_STATE_ENABLE: - data = IONIC_VF_LINK_STATUS_UP; + vfls = IONIC_VF_LINK_STATUS_UP; break; case IFLA_VF_LINK_STATE_DISABLE: - data = IONIC_VF_LINK_STATUS_DOWN; + vfls = IONIC_VF_LINK_STATUS_DOWN; break; case IFLA_VF_LINK_STATE_AUTO: - data = IONIC_VF_LINK_STATUS_AUTO; + vfls = IONIC_VF_LINK_STATUS_AUTO; break; default: return -EINVAL; @@ -2387,8 +2486,11 @@ static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set) if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ret = ionic_set_vf_config(ionic, vf, - IONIC_VF_ATTR_LINKSTATE, &data); + vfc.linkstate = vfls; + dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n", + __func__, vf, vfc.linkstate); + + ret = ionic_set_vf_config(ionic, vf, &vfc); if (!ret) ionic->vfs[vf].linkstate = set; } @@ -2835,6 +2937,7 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif) mutex_unlock(&lif->queue_lock); + clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state); dev_info(ionic->dev, "FW Down: LIFs stopped\n"); } @@ -2934,8 +3037,6 @@ void ionic_lif_free(struct ionic_lif *lif) /* unmap doorbell page */ ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage); lif->kern_dbpage = NULL; - kfree(lif->dbid_inuse); - lif->dbid_inuse = NULL; mutex_destroy(&lif->config_lock); mutex_destroy(&lif->queue_lock); @@ -3135,22 +3236,12 @@ int ionic_lif_init(struct ionic_lif *lif) return -EINVAL; } - lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL); - if (!lif->dbid_inuse) { - dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n"); - return -ENOMEM; - } - - /* first doorbell id reserved for kernel (dbid aka pid == zero) */ - set_bit(0, lif->dbid_inuse); lif->kern_pid = 0; - dbpage_num = ionic_db_page_num(lif, lif->kern_pid); lif->kern_dbpage = ionic_bus_map_dbpage(lif->ionic, dbpage_num); if (!lif->kern_dbpage) { dev_err(dev, "Cannot map dbpage, aborting\n"); - err = -ENOMEM; - goto err_out_free_dbid; + return -ENOMEM; } err = ionic_lif_adminq_init(lif); @@ -3186,15 +3277,13 @@ int ionic_lif_init(struct ionic_lif *lif) return 0; err_out_notifyq_deinit: + napi_disable(&lif->adminqcq->napi); ionic_lif_qcq_deinit(lif, lif->notifyqcq); err_out_adminq_deinit: ionic_lif_qcq_deinit(lif, lif->adminqcq); ionic_lif_reset(lif); ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage); lif->kern_dbpage = NULL; -err_out_free_dbid: - kfree(lif->dbid_inuse); - lif->dbid_inuse = NULL; return err; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 9f7ab2f17f93..a53984bf3544 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -135,6 +135,7 @@ enum ionic_lif_state_flags { IONIC_LIF_F_LINK_CHECK_REQUESTED, IONIC_LIF_F_FILTER_SYNC_NEEDED, IONIC_LIF_F_FW_RESET, + IONIC_LIF_F_FW_STOPPING, IONIC_LIF_F_SPLIT_INTR, IONIC_LIF_F_BROKEN, IONIC_LIF_F_TX_DIM_INTR, @@ -213,7 +214,6 @@ struct ionic_lif { u32 rx_coalesce_hw; /* what the hw is using */ u32 tx_coalesce_usecs; /* what the user asked for */ u32 tx_coalesce_hw; /* what the hw is using */ - unsigned long *dbid_inuse; unsigned int dbid_count; struct ionic_phc *phc; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 875f4ec42efe..4029b4e021f8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -188,6 +188,28 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode) } } +const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr) +{ + switch (attr) { + case IONIC_VF_ATTR_SPOOFCHK: + return "IONIC_VF_ATTR_SPOOFCHK"; + case IONIC_VF_ATTR_TRUST: + return "IONIC_VF_ATTR_TRUST"; + case IONIC_VF_ATTR_LINKSTATE: + return "IONIC_VF_ATTR_LINKSTATE"; + case IONIC_VF_ATTR_MAC: + return "IONIC_VF_ATTR_MAC"; + case IONIC_VF_ATTR_VLAN: + return "IONIC_VF_ATTR_VLAN"; + case IONIC_VF_ATTR_RATE: + return "IONIC_VF_ATTR_RATE"; + case IONIC_VF_ATTR_STATSADDR: + return "IONIC_VF_ATTR_STATSADDR"; + default: + return "IONIC_VF_ATTR_UNKNOWN"; + } +} + static void ionic_adminq_flush(struct ionic_lif *lif) { struct ionic_desc_info *desc_info; @@ -215,9 +237,13 @@ static void ionic_adminq_flush(struct ionic_lif *lif) void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode, u8 status, int err) { + const char *stat_str; + + stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" : + ionic_error_to_str(status); + netdev_err(lif->netdev, "%s (%d) failed: %s (%d)\n", - ionic_opcode_to_str(opcode), opcode, - ionic_error_to_str(status), err); + ionic_opcode_to_str(opcode), opcode, stat_str, err); } static int ionic_adminq_check_err(struct ionic_lif *lif, @@ -318,6 +344,7 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, if (do_msg && !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) netdev_err(netdev, "Posting of %s (%d) failed: %d\n", name, ctx->cmd.cmd.opcode, err); + ctx->comp.comp.status = IONIC_RC_ERROR; return err; } @@ -331,11 +358,15 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, if (remaining) break; - /* interrupt the wait if FW stopped */ - if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) { + /* force a check of FW status and break out if FW reset */ + (void)ionic_heartbeat_check(lif->ionic); + if ((test_bit(IONIC_LIF_F_FW_RESET, lif->state) && + !lif->ionic->idev.fw_status_ready) || + test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) { if (do_msg) - netdev_err(netdev, "%s (%d) interrupted, FW in reset\n", - name, ctx->cmd.cmd.opcode); + netdev_warn(netdev, "%s (%d) interrupted, FW in reset\n", + name, ctx->cmd.cmd.opcode); + ctx->comp.comp.status = IONIC_RC_ERROR; return -ENXIO; } @@ -370,21 +401,34 @@ int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx * static void ionic_dev_cmd_clean(struct ionic *ionic) { - union __iomem ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs; + struct ionic_dev *idev = &ionic->idev; - iowrite32(0, ®s->doorbell); - memset_io(®s->cmd, 0, sizeof(regs->cmd)); + iowrite32(0, &idev->dev_cmd_regs->doorbell); + memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd)); } -int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds) +void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status, + int err) +{ + const char *stat_str; + + stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" : + ionic_error_to_str(status); + + dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n", + ionic_opcode_to_str(opcode), opcode, stat_str, err); +} + +static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds, + const bool do_msg) { struct ionic_dev *idev = &ionic->idev; unsigned long start_time; unsigned long max_wait; unsigned long duration; + int done = 0; + bool fw_up; int opcode; - int hb = 0; - int done; int err; /* Wait for dev cmd to complete, retrying if we get EAGAIN, @@ -394,31 +438,24 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds) try_again: opcode = readb(&idev->dev_cmd_regs->cmd.cmd.opcode); start_time = jiffies; - do { + for (fw_up = ionic_is_fw_running(idev); + !done && fw_up && time_before(jiffies, max_wait); + fw_up = ionic_is_fw_running(idev)) { done = ionic_dev_cmd_done(idev); if (done) break; usleep_range(100, 200); - - /* Don't check the heartbeat on FW_CONTROL commands as they are - * notorious for interrupting the firmware's heartbeat update. - */ - if (opcode != IONIC_CMD_FW_CONTROL) - hb = ionic_heartbeat_check(ionic); - } while (!done && !hb && time_before(jiffies, max_wait)); + } duration = jiffies - start_time; dev_dbg(ionic->dev, "DEVCMD %s (%d) done=%d took %ld secs (%ld jiffies)\n", ionic_opcode_to_str(opcode), opcode, done, duration / HZ, duration); - if (!done && hb) { - /* It is possible (but unlikely) that FW was busy and missed a - * heartbeat check but is still alive and will process this - * request, so don't clean the dev_cmd in this case. - */ - dev_dbg(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n", - ionic_opcode_to_str(opcode), opcode); + if (!done && !fw_up) { + ionic_dev_cmd_clean(ionic); + dev_warn(ionic->dev, "DEVCMD %s (%d) interrupted - FW is down\n", + ionic_opcode_to_str(opcode), opcode); return -ENXIO; } @@ -444,9 +481,9 @@ try_again: } if (!(opcode == IONIC_CMD_FW_CONTROL && err == IONIC_RC_EAGAIN)) - dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n", - ionic_opcode_to_str(opcode), opcode, - ionic_error_to_str(err), err); + if (do_msg) + ionic_dev_cmd_dev_err_print(ionic, opcode, err, + ionic_error_to_errno(err)); return ionic_error_to_errno(err); } @@ -454,6 +491,16 @@ try_again: return 0; } +int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds) +{ + return __ionic_dev_cmd_wait(ionic, max_seconds, true); +} + +int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_seconds) +{ + return __ionic_dev_cmd_wait(ionic, max_seconds, false); +} + int ionic_setup(struct ionic *ionic) { int err; @@ -540,6 +587,9 @@ int ionic_reset(struct ionic *ionic) struct ionic_dev *idev = &ionic->idev; int err; + if (!ionic_is_fw_running(idev)) + return 0; + mutex_lock(&ionic->dev_cmd_lock); ionic_dev_cmd_reset(idev); err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); @@ -612,15 +662,17 @@ int ionic_port_init(struct ionic *ionic) int ionic_port_reset(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; - int err; + int err = 0; if (!idev->port_info) return 0; - mutex_lock(&ionic->dev_cmd_lock); - ionic_dev_cmd_port_reset(idev); - err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); - mutex_unlock(&ionic->dev_cmd_lock); + if (ionic_is_fw_running(idev)) { + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_port_reset(idev); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + } dma_free_coherent(ionic->dev, idev->port_info_sz, idev->port_info, idev->port_info_pa); @@ -628,9 +680,6 @@ int ionic_port_reset(struct ionic *ionic) idev->port_info = NULL; idev->port_info_pa = 0; - if (err) - dev_err(ionic->dev, "Failed to reset port\n"); - return err; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index f6e785f949f9..b7363376dfc8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -376,10 +376,24 @@ static int ionic_lif_filter_add(struct ionic_lif *lif, spin_unlock_bh(&lif->rx_filters.lock); - if (err == -ENOSPC) { - if (le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN) - lif->max_vlans = lif->nvlans; + /* store the max_vlans limit that we found */ + if (err == -ENOSPC && + le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN) + lif->max_vlans = lif->nvlans; + + /* Prevent unnecessary error messages on recoverable + * errors as the filter will get retried on the next + * sync attempt. + */ + switch (err) { + case -ENOSPC: + case -ENXIO: + case -ETIMEDOUT: + case -EAGAIN: + case -EBUSY: return 0; + default: + break; } ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode, @@ -494,9 +508,22 @@ static int ionic_lif_filter_del(struct ionic_lif *lif, spin_unlock_bh(&lif->rx_filters.lock); if (state != IONIC_FILTER_STATE_NEW) { - err = ionic_adminq_post_wait(lif, &ctx); - if (err && err != -EEXIST) + err = ionic_adminq_post_wait_nomsg(lif, &ctx); + + switch (err) { + /* ignore these errors */ + case -EEXIST: + case -ENXIO: + case -ETIMEDOUT: + case -EAGAIN: + case -EBUSY: + case 0: + break; + default: + ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode, + ctx.comp.comp.status, err); return err; + } } return 0; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 94384f5d2a22..d197a70a49c9 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -669,27 +669,37 @@ dma_fail: return -EIO; } +static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q, + struct ionic_desc_info *desc_info) +{ + struct ionic_buf_info *buf_info = desc_info->bufs; + struct device *dev = q->dev; + unsigned int i; + + if (!desc_info->nbufs) + return; + + dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr, + buf_info->len, DMA_TO_DEVICE); + buf_info++; + for (i = 1; i < desc_info->nbufs; i++, buf_info++) + dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr, + buf_info->len, DMA_TO_DEVICE); + + desc_info->nbufs = 0; +} + static void ionic_tx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info, struct ionic_cq_info *cq_info, void *cb_arg) { - struct ionic_buf_info *buf_info = desc_info->bufs; struct ionic_tx_stats *stats = q_to_tx_stats(q); struct ionic_qcq *qcq = q_to_qcq(q); struct sk_buff *skb = cb_arg; - struct device *dev = q->dev; - unsigned int i; u16 qi; - if (desc_info->nbufs) { - dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr, - buf_info->len, DMA_TO_DEVICE); - buf_info++; - for (i = 1; i < desc_info->nbufs; i++, buf_info++) - dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr, - buf_info->len, DMA_TO_DEVICE); - } + ionic_tx_desc_unmap_bufs(q, desc_info); if (!skb) return; @@ -931,8 +941,11 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) err = ionic_tx_tcp_inner_pseudo_csum(skb); else err = ionic_tx_tcp_pseudo_csum(skb); - if (err) + if (err) { + /* clean up mapping from ionic_tx_map_skb */ + ionic_tx_desc_unmap_bufs(q, desc_info); return err; + } if (encap) hdrlen = skb_inner_transport_header(skb) - skb->data + @@ -1003,8 +1016,8 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) return 0; } -static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb, - struct ionic_desc_info *desc_info) +static void ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) { struct ionic_txq_desc *desc = desc_info->txq_desc; struct ionic_buf_info *buf_info = desc_info->bufs; @@ -1038,12 +1051,10 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb, stats->crc32_csum++; else stats->csum++; - - return 0; } -static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb, - struct ionic_desc_info *desc_info) +static void ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) { struct ionic_txq_desc *desc = desc_info->txq_desc; struct ionic_buf_info *buf_info = desc_info->bufs; @@ -1074,12 +1085,10 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb, desc->csum_offset = 0; stats->csum_none++; - - return 0; } -static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb, - struct ionic_desc_info *desc_info) +static void ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) { struct ionic_txq_sg_desc *sg_desc = desc_info->txq_sg_desc; struct ionic_buf_info *buf_info = &desc_info->bufs[1]; @@ -1093,31 +1102,24 @@ static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb, } stats->frags += skb_shinfo(skb)->nr_frags; - - return 0; } static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb) { struct ionic_desc_info *desc_info = &q->info[q->head_idx]; struct ionic_tx_stats *stats = q_to_tx_stats(q); - int err; if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) return -EIO; /* set up the initial descriptor */ if (skb->ip_summed == CHECKSUM_PARTIAL) - err = ionic_tx_calc_csum(q, skb, desc_info); + ionic_tx_calc_csum(q, skb, desc_info); else - err = ionic_tx_calc_no_csum(q, skb, desc_info); - if (err) - return err; + ionic_tx_calc_no_csum(q, skb, desc_info); /* add frags */ - err = ionic_tx_skb_frags(q, skb, desc_info); - if (err) - return err; + ionic_tx_skb_frags(q, skb, desc_info); skb_tx_timestamp(skb); stats->pkts++; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index da1eadabcb41..b3811ad4bcf0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -614,12 +614,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, usecs); } -int qed_mcp_cmd(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 cmd, - u32 param, - u32 *o_mcp_resp, - u32 *o_mcp_param) +static int _qed_mcp_cmd(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 cmd, + u32 param, + u32 *o_mcp_resp, + u32 *o_mcp_param, + bool can_sleep) { struct qed_mcp_mb_params mb_params; int rc; @@ -627,6 +628,7 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn, memset(&mb_params, 0, sizeof(mb_params)); mb_params.cmd = cmd; mb_params.param = param; + mb_params.flags = can_sleep ? QED_MB_FLAG_CAN_SLEEP : 0; rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); if (rc) @@ -638,6 +640,28 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn, return 0; } +int qed_mcp_cmd(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 cmd, + u32 param, + u32 *o_mcp_resp, + u32 *o_mcp_param) +{ + return (_qed_mcp_cmd(p_hwfn, p_ptt, cmd, param, + o_mcp_resp, o_mcp_param, true)); +} + +int qed_mcp_cmd_nosleep(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 cmd, + u32 param, + u32 *o_mcp_resp, + u32 *o_mcp_param) +{ + return (_qed_mcp_cmd(p_hwfn, p_ptt, cmd, param, + o_mcp_resp, o_mcp_param, false)); +} + static int qed_mcp_nvm_wr_cmd(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, @@ -1728,8 +1752,8 @@ static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) qed_configure_pf_max_bandwidth(p_hwfn->cdev, p_info->bandwidth_max); /* Acknowledge the MFW */ - qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BW_UPDATE_ACK, 0, &resp, - ¶m); + qed_mcp_cmd_nosleep(p_hwfn, p_ptt, DRV_MSG_CODE_BW_UPDATE_ACK, 0, &resp, + ¶m); } static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) @@ -1766,8 +1790,8 @@ static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) p_hwfn->mcp_info->func_info.ovlan, p_hwfn->hw_info.hw_mode); /* Acknowledge the MFW */ - qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_S_TAG_UPDATE_ACK, 0, - &resp, ¶m); + qed_mcp_cmd_nosleep(p_hwfn, p_ptt, DRV_MSG_CODE_S_TAG_UPDATE_ACK, 0, + &resp, ¶m); } static void qed_mcp_handle_fan_failure(struct qed_hwfn *p_hwfn, @@ -3675,8 +3699,8 @@ static int qed_mcp_resource_cmd(struct qed_hwfn *p_hwfn, { int rc; - rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_RESOURCE_CMD, param, - p_mcp_resp, p_mcp_param); + rc = qed_mcp_cmd_nosleep(p_hwfn, p_ptt, DRV_MSG_CODE_RESOURCE_CMD, + param, p_mcp_resp, p_mcp_param); if (rc) return rc; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 369e1892450a..2f26bee54e6c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -393,11 +393,12 @@ int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 *p_board_config); /** - * qed_mcp_cmd(): General function for sending commands to the MCP + * qed_mcp_cmd(): Sleepable function for sending commands to the MCP * mailbox. It acquire mutex lock for the entire * operation, from sending the request until the MCP * response. Waiting for MCP response will be checked up - * to 5 seconds every 5ms. + * to 5 seconds every 10ms. Should not be called from atomic + * context. * * @p_hwfn: HW device data. * @p_ptt: PTT required for register access. @@ -417,6 +418,31 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn, u32 *o_mcp_param); /** + * qed_mcp_cmd_nosleep(): Function for sending commands to the MCP + * mailbox. It acquire mutex lock for the entire + * operation, from sending the request until the MCP + * response. Waiting for MCP response will be checked up + * to 5 seconds every 10us. Should be called when sleep + * is not allowed. + * + * @p_hwfn: HW device data. + * @p_ptt: PTT required for register access. + * @cmd: command to be sent to the MCP. + * @param: Optional param + * @o_mcp_resp: The MCP response code (exclude sequence). + * @o_mcp_param: Optional parameter provided by the MCP + * response + * + * Return: Int - 0 - Operation was successul. + */ +int qed_mcp_cmd_nosleep(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 cmd, + u32 param, + u32 *o_mcp_resp, + u32 *o_mcp_param); + +/** * qed_mcp_drain(): drains the nig, allowing completion to pass in * case of pauses. * (Should be called only from sleepable context) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 19e2621e0645..67014eb76969 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2667,10 +2667,7 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_38: rtl_eri_set_bits(tp, 0xd4, 0x0c00); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53: - rtl_eri_set_bits(tp, 0xd4, 0x1f80); - break; - case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_63: r8168_mac_ocp_modify(tp, 0xc0ac, 0, 0x1f80); break; default: @@ -2678,13 +2675,48 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp) } } +static void rtl_disable_exit_l1(struct rtl8169_private *tp) +{ + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38: + rtl_eri_clear_bits(tp, 0xd4, 0x1f00); + break; + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_63: + r8168_mac_ocp_modify(tp, 0xc0ac, 0x1f80, 0); + break; + default: + break; + } +} + static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) { /* Don't enable ASPM in the chip if OS can't control ASPM */ if (enable && tp->aspm_manageable) { RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en); RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn); + + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_45 ... RTL_GIGA_MAC_VER_48: + case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63: + /* reset ephy tx/rx disable timer */ + r8168_mac_ocp_modify(tp, 0xe094, 0xff00, 0); + /* chip can trigger L1.2 */ + r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, BIT(2)); + break; + default: + break; + } } else { + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_45 ... RTL_GIGA_MAC_VER_48: + case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63: + r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, 0); + break; + default: + break; + } + RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn); RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en); } @@ -4683,7 +4715,7 @@ static void rtl8169_down(struct rtl8169_private *tp) rtl_pci_commit(tp); rtl8169_cleanup(tp, true); - + rtl_disable_exit_l1(tp); rtl_prepare_power_down(tp); } @@ -4843,8 +4875,6 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp) rtl8169_down(tp); } -#ifdef CONFIG_PM - static int rtl8169_runtime_resume(struct device *dev) { struct rtl8169_private *tp = dev_get_drvdata(dev); @@ -4860,7 +4890,7 @@ static int rtl8169_runtime_resume(struct device *dev) return 0; } -static int __maybe_unused rtl8169_suspend(struct device *device) +static int rtl8169_suspend(struct device *device) { struct rtl8169_private *tp = dev_get_drvdata(device); @@ -4873,7 +4903,7 @@ static int __maybe_unused rtl8169_suspend(struct device *device) return 0; } -static int __maybe_unused rtl8169_resume(struct device *device) +static int rtl8169_resume(struct device *device) { struct rtl8169_private *tp = dev_get_drvdata(device); @@ -4915,13 +4945,11 @@ static int rtl8169_runtime_idle(struct device *device) } static const struct dev_pm_ops rtl8169_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume) - SET_RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume, - rtl8169_runtime_idle) + SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume) + RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume, + rtl8169_runtime_idle) }; -#endif /* CONFIG_PM */ - static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp) { /* WoL fails with 8168b when the receiver is disabled. */ @@ -5255,6 +5283,16 @@ done: rtl_rar_set(tp, mac_addr); } +/* register is set if system vendor successfully tested ASPM 1.2 */ +static bool rtl_aspm_is_safe(struct rtl8169_private *tp) +{ + if (tp->mac_version >= RTL_GIGA_MAC_VER_60 && + r8168_mac_ocp_read(tp, 0xc0b2) & 0xf) + return true; + + return false; +} + static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { struct rtl8169_private *tp; @@ -5333,7 +5371,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) * Chips from RTL8168h partially have issues with L1.2, but seem * to work fine with L1 and L1.1. */ - if (tp->mac_version >= RTL_GIGA_MAC_VER_45) + if (rtl_aspm_is_safe(tp)) + rc = 0; + else if (tp->mac_version >= RTL_GIGA_MAC_VER_45) rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); else rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1); @@ -5460,9 +5500,7 @@ static struct pci_driver rtl8169_pci_driver = { .probe = rtl_init_one, .remove = rtl_remove_one, .shutdown = rtl_shutdown, -#ifdef CONFIG_PM - .driver.pm = &rtl8169_pm_ops, -#endif + .driver.pm = pm_ptr(&rtl8169_pm_ops), }; module_pci_driver(rtl8169_pci_driver); diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index b215cde68e10..24e2635c4c80 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1432,11 +1432,7 @@ static int ravb_phy_init(struct net_device *ndev) * at this time. */ if (soc_device_match(r8a7795es10)) { - err = phy_set_max_speed(phydev, SPEED_100); - if (err) { - netdev_err(ndev, "failed to limit PHY to 100Mbit/s\n"); - goto err_phy_disconnect; - } + phy_set_max_speed(phydev, SPEED_100); netdev_info(ndev, "limited PHY to 100Mbit/s\n"); } @@ -1457,8 +1453,6 @@ static int ravb_phy_init(struct net_device *ndev) return 0; -err_phy_disconnect: - phy_disconnect(phydev); err_deregister_fixed_link: if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); @@ -2854,7 +2848,6 @@ static int ravb_wol_restore(struct net_device *ndev) { struct ravb_private *priv = netdev_priv(ndev); const struct ravb_hw_info *info = priv->info; - int ret; if (info->nc_queues) napi_enable(&priv->napi[RAVB_NC]); @@ -2863,9 +2856,7 @@ static int ravb_wol_restore(struct net_device *ndev) /* Disable MagicPacket */ ravb_modify(ndev, ECMR, ECMR_MPDE, 0); - ret = ravb_close(ndev); - if (ret < 0) - return ret; + ravb_close(ndev); return disable_irq_wake(priv->emac_irq); } diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index d947a628e166..67ade78fb767 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2026,14 +2026,8 @@ static int sh_eth_phy_init(struct net_device *ndev) } /* mask with MAC supported features */ - if (mdp->cd->register_type != SH_ETH_REG_GIGABIT) { - int err = phy_set_max_speed(phydev, SPEED_100); - if (err) { - netdev_err(ndev, "failed to limit PHY to 100 Mbit/s\n"); - phy_disconnect(phydev); - return err; - } - } + if (mdp->cd->register_type != SH_ETH_REG_GIGABIT) + phy_set_max_speed(phydev, SPEED_100); phy_attached_info(phydev); @@ -3450,9 +3444,7 @@ static int sh_eth_wol_restore(struct net_device *ndev) * both be reset and all registers restored. This is what * happens during suspend and resume without WoL enabled. */ - ret = sh_eth_close(ndev); - if (ret < 0) - return ret; + sh_eth_close(ndev); ret = sh_eth_open(ndev); if (ret < 0) return ret; @@ -3464,7 +3456,7 @@ static int sh_eth_suspend(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct sh_eth_private *mdp = netdev_priv(ndev); - int ret = 0; + int ret; if (!netif_running(ndev)) return 0; @@ -3483,7 +3475,7 @@ static int sh_eth_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct sh_eth_private *mdp = netdev_priv(ndev); - int ret = 0; + int ret; if (!netif_running(ndev)) return 0; diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 32161a56726c..77a0d9d7e65a 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -127,7 +127,7 @@ bool sxgbe_eee_init(struct sxgbe_priv_data * const priv) /* MAC core supports the EEE feature. */ if (priv->hw_cap.eee) { /* Check if the PHY supports EEE */ - if (phy_init_eee(ndev->phydev, 1)) + if (phy_init_eee(ndev->phydev, true)) return false; priv->eee_active = 1; diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index cf366ed2557c..50d535981a35 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3990,6 +3990,30 @@ static unsigned int ef10_check_caps(const struct efx_nic *efx, } } +static unsigned int efx_ef10_recycle_ring_size(const struct efx_nic *efx) +{ + unsigned int ret = EFX_RECYCLE_RING_SIZE_10G; + + /* There is no difference between PFs and VFs. The side is based on + * the maximum link speed of a given NIC. + */ + switch (efx->pci_dev->device & 0xfff) { + case 0x0903: /* Farmingdale can do up to 10G */ + break; + case 0x0923: /* Greenport can do up to 40G */ + case 0x0a03: /* Medford can do up to 40G */ + ret *= 4; + break; + default: /* Medford2 can do up to 100G */ + ret *= 10; + } + + if (IS_ENABLED(CONFIG_PPC64)) + ret *= 4; + + return ret; +} + #define EF10_OFFLOAD_FEATURES \ (NETIF_F_IP_CSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | \ @@ -4106,6 +4130,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = { .check_caps = ef10_check_caps, .print_additional_fwver = efx_ef10_print_additional_fwver, .sensor_event = efx_mcdi_sensor_event, + .rx_recycle_ring_size = efx_ef10_recycle_ring_size, }; const struct efx_nic_type efx_hunt_a0_nic_type = { @@ -4243,4 +4268,5 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .check_caps = ef10_check_caps, .print_additional_fwver = efx_ef10_print_additional_fwver, .sensor_event = efx_mcdi_sensor_event, + .rx_recycle_ring_size = efx_ef10_recycle_ring_size, }; diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index f79b14a119ae..a07cbf45a326 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -23,6 +23,7 @@ #include "ef100_rx.h" #include "ef100_tx.h" #include "ef100_netdev.h" +#include "rx_common.h" #define EF100_MAX_VIS 4096 #define EF100_NUM_MCDI_BUFFERS 1 @@ -696,6 +697,12 @@ static unsigned int ef100_check_caps(const struct efx_nic *efx, } } +static unsigned int efx_ef100_recycle_ring_size(const struct efx_nic *efx) +{ + /* Maximum link speed for Riverhead is 100G */ + return 10 * EFX_RECYCLE_RING_SIZE_10G; +} + /* NIC level access functions */ #define EF100_OFFLOAD_FEATURES (NETIF_F_HW_CSUM | NETIF_F_RXCSUM | \ @@ -770,6 +777,7 @@ const struct efx_nic_type ef100_pf_nic_type = { .rx_push_rss_context_config = efx_mcdi_rx_push_rss_context_config, .rx_pull_rss_context_config = efx_mcdi_rx_pull_rss_context_config, .rx_restore_rss_contexts = efx_mcdi_rx_restore_rss_contexts, + .rx_recycle_ring_size = efx_ef100_recycle_ring_size, .reconfigure_mac = ef100_reconfigure_mac, .reconfigure_port = efx_mcdi_port_reconfigure, @@ -849,6 +857,7 @@ const struct efx_nic_type ef100_vf_nic_type = { .rx_pull_rss_config = efx_mcdi_rx_pull_rss_config, .rx_push_rss_config = efx_mcdi_pf_rx_push_rss_config, .rx_restore_rss_contexts = efx_mcdi_rx_restore_rss_contexts, + .rx_recycle_ring_size = efx_ef100_recycle_ring_size, .reconfigure_mac = ef100_reconfigure_mac, .test_nvram = efx_new_mcdi_nvram_test_all, diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index cc15ee8812d9..c75dc75e2857 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1282,6 +1282,7 @@ struct efx_udp_tunnel { * @udp_tnl_has_port: Check if a port has been added as UDP tunnel * @print_additional_fwver: Dump NIC-specific additional FW version info * @sensor_event: Handle a sensor event from MCDI + * @rx_recycle_ring_size: Size of the RX recycle ring * @revision: Hardware architecture revision * @txd_ptr_tbl_base: TX descriptor ring base address * @rxd_ptr_tbl_base: RX descriptor ring base address @@ -1460,6 +1461,7 @@ struct efx_nic_type { size_t (*print_additional_fwver)(struct efx_nic *efx, char *buf, size_t len); void (*sensor_event)(struct efx_nic *efx, efx_qword_t *ev); + unsigned int (*rx_recycle_ring_size)(const struct efx_nic *efx); int revision; unsigned int txd_ptr_tbl_base; diff --git a/drivers/net/ethernet/sfc/nic_common.h b/drivers/net/ethernet/sfc/nic_common.h index b9cafe9cd568..0cef35c0c559 100644 --- a/drivers/net/ethernet/sfc/nic_common.h +++ b/drivers/net/ethernet/sfc/nic_common.h @@ -195,6 +195,11 @@ static inline void efx_sensor_event(struct efx_nic *efx, efx_qword_t *ev) efx->type->sensor_event(efx, ev); } +static inline unsigned int efx_rx_recycle_ring_size(const struct efx_nic *efx) +{ + return efx->type->rx_recycle_ring_size(efx); +} + /* Some statistics are computed as A - B where A and B each increase * linearly with some hardware counter(s) and the counters are read * asynchronously. If the counters contributing to B are always read diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index 633ca77a26fd..1b22c7be0088 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -23,13 +23,6 @@ module_param(rx_refill_threshold, uint, 0444); MODULE_PARM_DESC(rx_refill_threshold, "RX descriptor ring refill threshold (%)"); -/* Number of RX buffers to recycle pages for. When creating the RX page recycle - * ring, this number is divided by the number of buffers per page to calculate - * the number of pages to store in the RX page recycle ring. - */ -#define EFX_RECYCLE_RING_SIZE_IOMMU 4096 -#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH) - /* RX maximum head room required. * * This must be at least 1 to prevent overflow, plus one packet-worth @@ -141,16 +134,7 @@ static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue) unsigned int bufs_in_recycle_ring, page_ring_size; struct efx_nic *efx = rx_queue->efx; - /* Set the RX recycle ring size */ -#ifdef CONFIG_PPC64 - bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; -#else - if (iommu_present(&pci_bus_type)) - bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; - else - bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU; -#endif /* CONFIG_PPC64 */ - + bufs_in_recycle_ring = efx_rx_recycle_ring_size(efx); page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring / efx->rx_bufs_per_page); rx_queue->page_ring = kcalloc(page_ring_size, diff --git a/drivers/net/ethernet/sfc/rx_common.h b/drivers/net/ethernet/sfc/rx_common.h index 207ccd8ba062..fbd2769307f9 100644 --- a/drivers/net/ethernet/sfc/rx_common.h +++ b/drivers/net/ethernet/sfc/rx_common.h @@ -18,6 +18,12 @@ #define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \ EFX_RX_USR_BUF_SIZE) +/* Number of RX buffers to recycle pages for. When creating the RX page recycle + * ring, this number is divided by the number of buffers per page to calculate + * the number of pages to store in the RX page recycle ring. + */ +#define EFX_RECYCLE_RING_SIZE_10G 256 + static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf) { return page_address(buf->page) + buf->page_offset; diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 16347a6d0c47..ce3060e15b54 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -25,6 +25,7 @@ #include "mcdi_port_common.h" #include "selftest.h" #include "siena_sriov.h" +#include "rx_common.h" /* Hardware control for SFC9000 family including SFL9021 (aka Siena). */ @@ -958,6 +959,12 @@ static unsigned int siena_check_caps(const struct efx_nic *efx, return 0; } +static unsigned int efx_siena_recycle_ring_size(const struct efx_nic *efx) +{ + /* Maximum link speed is 10G */ + return EFX_RECYCLE_RING_SIZE_10G; +} + /************************************************************************** * * Revision-dependent attributes used by efx.c and nic.c @@ -1098,4 +1105,5 @@ const struct efx_nic_type siena_a0_nic_type = { .rx_hash_key_size = 16, .check_caps = siena_check_caps, .sensor_event = efx_mcdi_sensor_event, + .rx_recycle_ring_size = efx_siena_recycle_ring_size, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 09644ab0d87a..f86cc83003f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -16,6 +16,7 @@ #include <linux/of_net.h> #include <linux/phy.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <linux/regulator/consumer.h> #include <linux/regmap.h> #include <linux/stmmac.h> @@ -57,7 +58,6 @@ struct emac_variant { }; /* struct sunxi_priv_data - hold all sunxi private data - * @tx_clk: reference to MAC TX clock * @ephy_clk: reference to the optional EPHY clock for the internal PHY * @regulator: reference to the optional regulator * @rst_ephy: reference to the optional EPHY reset for the internal PHY @@ -68,7 +68,6 @@ struct emac_variant { * @mux_handle: Internal pointer used by mdio-mux lib */ struct sunxi_priv_data { - struct clk *tx_clk; struct clk *ephy_clk; struct regulator *regulator; struct reset_control *rst_ephy; @@ -579,22 +578,14 @@ static int sun8i_dwmac_init(struct platform_device *pdev, void *priv) } } - ret = clk_prepare_enable(gmac->tx_clk); - if (ret) { - dev_err(&pdev->dev, "Could not enable AHB clock\n"); - goto err_disable_regulator; - } - if (gmac->use_internal_phy) { ret = sun8i_dwmac_power_internal_phy(netdev_priv(ndev)); if (ret) - goto err_disable_clk; + goto err_disable_regulator; } return 0; -err_disable_clk: - clk_disable_unprepare(gmac->tx_clk); err_disable_regulator: if (gmac->regulator) regulator_disable(gmac->regulator); @@ -1043,8 +1034,6 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv) if (gmac->variant->soc_has_internal_phy) sun8i_dwmac_unpower_internal_phy(gmac); - clk_disable_unprepare(gmac->tx_clk); - if (gmac->regulator) regulator_disable(gmac->regulator); } @@ -1167,12 +1156,6 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) return -EINVAL; } - gmac->tx_clk = devm_clk_get(dev, "stmmaceth"); - if (IS_ERR(gmac->tx_clk)) { - dev_err(dev, "Could not get TX clock\n"); - return PTR_ERR(gmac->tx_clk); - } - /* Optional regulator for PHY */ gmac->regulator = devm_regulator_get_optional(dev, "phy"); if (IS_ERR(gmac->regulator)) { @@ -1254,6 +1237,12 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) ndev = dev_get_drvdata(&pdev->dev); priv = netdev_priv(ndev); + /* the MAC is runtime suspended after stmmac_dvr_probe(), so we + * need to ensure the MAC resume back before other operations such + * as reset. + */ + pm_runtime_get_sync(&pdev->dev); + /* The mux must be registered after parent MDIO * so after stmmac_dvr_probe() */ @@ -1272,12 +1261,15 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) goto dwmac_remove; } + pm_runtime_put(&pdev->dev); + return 0; dwmac_mux: reset_control_put(gmac->rst_ephy); clk_put(gmac->ephy_clk); dwmac_remove: + pm_runtime_put_noidle(&pdev->dev); stmmac_dvr_remove(&pdev->dev); dwmac_exit: sun8i_dwmac_exit(pdev, gmac); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index bde76ea2deec..b745d624b2cb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -938,105 +938,15 @@ static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex) priv->pause, tx_cnt); } -static void stmmac_validate(struct phylink_config *config, - unsigned long *supported, - struct phylink_link_state *state) +static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); - __ETHTOOL_DECLARE_LINK_MODE_MASK(mac_supported) = { 0, }; - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; - int tx_cnt = priv->plat->tx_queues_to_use; - int max_speed = priv->plat->max_speed; - - phylink_set(mac_supported, 10baseT_Half); - phylink_set(mac_supported, 10baseT_Full); - phylink_set(mac_supported, 100baseT_Half); - phylink_set(mac_supported, 100baseT_Full); - phylink_set(mac_supported, 1000baseT_Half); - phylink_set(mac_supported, 1000baseT_Full); - phylink_set(mac_supported, 1000baseKX_Full); - - phylink_set(mac_supported, Autoneg); - phylink_set(mac_supported, Pause); - phylink_set(mac_supported, Asym_Pause); - phylink_set_port_modes(mac_supported); - - /* Cut down 1G if asked to */ - if ((max_speed > 0) && (max_speed < 1000)) { - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseX_Full); - } else if (priv->plat->has_gmac4) { - if (!max_speed || max_speed >= 2500) { - phylink_set(mac_supported, 2500baseT_Full); - phylink_set(mac_supported, 2500baseX_Full); - } - } else if (priv->plat->has_xgmac) { - if (!max_speed || (max_speed >= 2500)) { - phylink_set(mac_supported, 2500baseT_Full); - phylink_set(mac_supported, 2500baseX_Full); - } - if (!max_speed || (max_speed >= 5000)) { - phylink_set(mac_supported, 5000baseT_Full); - } - if (!max_speed || (max_speed >= 10000)) { - phylink_set(mac_supported, 10000baseSR_Full); - phylink_set(mac_supported, 10000baseLR_Full); - phylink_set(mac_supported, 10000baseER_Full); - phylink_set(mac_supported, 10000baseLRM_Full); - phylink_set(mac_supported, 10000baseT_Full); - phylink_set(mac_supported, 10000baseKX4_Full); - phylink_set(mac_supported, 10000baseKR_Full); - } - if (!max_speed || (max_speed >= 25000)) { - phylink_set(mac_supported, 25000baseCR_Full); - phylink_set(mac_supported, 25000baseKR_Full); - phylink_set(mac_supported, 25000baseSR_Full); - } - if (!max_speed || (max_speed >= 40000)) { - phylink_set(mac_supported, 40000baseKR4_Full); - phylink_set(mac_supported, 40000baseCR4_Full); - phylink_set(mac_supported, 40000baseSR4_Full); - phylink_set(mac_supported, 40000baseLR4_Full); - } - if (!max_speed || (max_speed >= 50000)) { - phylink_set(mac_supported, 50000baseCR2_Full); - phylink_set(mac_supported, 50000baseKR2_Full); - phylink_set(mac_supported, 50000baseSR2_Full); - phylink_set(mac_supported, 50000baseKR_Full); - phylink_set(mac_supported, 50000baseSR_Full); - phylink_set(mac_supported, 50000baseCR_Full); - phylink_set(mac_supported, 50000baseLR_ER_FR_Full); - phylink_set(mac_supported, 50000baseDR_Full); - } - if (!max_speed || (max_speed >= 100000)) { - phylink_set(mac_supported, 100000baseKR4_Full); - phylink_set(mac_supported, 100000baseSR4_Full); - phylink_set(mac_supported, 100000baseCR4_Full); - phylink_set(mac_supported, 100000baseLR4_ER4_Full); - phylink_set(mac_supported, 100000baseKR2_Full); - phylink_set(mac_supported, 100000baseSR2_Full); - phylink_set(mac_supported, 100000baseCR2_Full); - phylink_set(mac_supported, 100000baseLR2_ER2_FR2_Full); - phylink_set(mac_supported, 100000baseDR2_Full); - } - } - - /* Half-Duplex can only work with single queue */ - if (tx_cnt > 1) { - phylink_set(mask, 10baseT_Half); - phylink_set(mask, 100baseT_Half); - phylink_set(mask, 1000baseT_Half); - } - - linkmode_and(supported, supported, mac_supported); - linkmode_andnot(supported, supported, mask); - linkmode_and(state->advertising, state->advertising, mac_supported); - linkmode_andnot(state->advertising, state->advertising, mask); + if (!priv->hw->xpcs) + return NULL; - /* If PCS is supported, check which modes it supports. */ - if (priv->hw->xpcs) - xpcs_validate(priv->hw->xpcs, supported, state); + return &priv->hw->xpcs->pcs; } static void stmmac_mac_config(struct phylink_config *config, unsigned int mode, @@ -1175,7 +1085,8 @@ static void stmmac_mac_link_up(struct phylink_config *config, } static const struct phylink_mac_ops stmmac_phylink_mac_ops = { - .validate = stmmac_validate, + .validate = phylink_generic_validate, + .mac_select_pcs = stmmac_mac_select_pcs, .mac_config = stmmac_mac_config, .mac_link_down = stmmac_mac_link_down, .mac_link_up = stmmac_mac_link_up, @@ -1255,12 +1166,12 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) { struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data; struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node); + int max_speed = priv->plat->max_speed; int mode = priv->plat->phy_interface; struct phylink *phylink; priv->phylink_config.dev = &priv->dev->dev; priv->phylink_config.type = PHYLINK_NETDEV; - priv->phylink_config.pcs_poll = true; if (priv->plat->mdio_bus_data) priv->phylink_config.ovr_an_inband = mdio_bus_data->xpcs_an_inband; @@ -1268,14 +1179,50 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) if (!fwnode) fwnode = dev_fwnode(priv->device); + /* Set the platform/firmware specified interface mode */ + __set_bit(mode, priv->phylink_config.supported_interfaces); + + /* If we have an xpcs, it defines which PHY interfaces are supported. */ + if (priv->hw->xpcs) + xpcs_get_interfaces(priv->hw->xpcs, + priv->phylink_config.supported_interfaces); + + priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100; + + if (!max_speed || max_speed >= 1000) + priv->phylink_config.mac_capabilities |= MAC_1000; + + if (priv->plat->has_gmac4) { + if (!max_speed || max_speed >= 2500) + priv->phylink_config.mac_capabilities |= MAC_2500FD; + } else if (priv->plat->has_xgmac) { + if (!max_speed || max_speed >= 2500) + priv->phylink_config.mac_capabilities |= MAC_2500FD; + if (!max_speed || max_speed >= 5000) + priv->phylink_config.mac_capabilities |= MAC_5000FD; + if (!max_speed || max_speed >= 10000) + priv->phylink_config.mac_capabilities |= MAC_10000FD; + if (!max_speed || max_speed >= 25000) + priv->phylink_config.mac_capabilities |= MAC_25000FD; + if (!max_speed || max_speed >= 40000) + priv->phylink_config.mac_capabilities |= MAC_40000FD; + if (!max_speed || max_speed >= 50000) + priv->phylink_config.mac_capabilities |= MAC_50000FD; + if (!max_speed || max_speed >= 100000) + priv->phylink_config.mac_capabilities |= MAC_100000FD; + } + + /* Half-Duplex can only work with single queue */ + if (priv->plat->tx_queues_to_use > 1) + priv->phylink_config.mac_capabilities &= + ~(MAC_10HD | MAC_100HD | MAC_1000HD); + phylink = phylink_create(&priv->phylink_config, fwnode, mode, &stmmac_phylink_mac_ops); if (IS_ERR(phylink)) return PTR_ERR(phylink); - if (priv->hw->xpcs) - phylink_set_pcs(phylink, &priv->hw->xpcs->pcs); - priv->phylink = phylink; return 0; } diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 5b4d153b1492..40108968b350 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -386,6 +386,7 @@ struct axidma_bd { * @phylink: Pointer to phylink instance * @phylink_config: phylink configuration settings * @pcs_phy: Reference to PCS/PMA PHY if used + * @pcs: phylink pcs structure for PCS PHY * @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core * @axi_clk: AXI4-Lite bus clock * @misc_clks: Misc ethernet clocks (AXI4-Stream, Ref, MGT clocks) @@ -434,6 +435,7 @@ struct axienet_local { struct phylink_config phylink_config; struct mdio_device *pcs_phy; + struct phylink_pcs pcs; bool switch_x_sgmii; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 377c94ec2486..de0a6372ae0e 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1537,78 +1537,78 @@ static const struct ethtool_ops axienet_ethtool_ops = { .nway_reset = axienet_ethtools_nway_reset, }; -static void axienet_mac_pcs_get_state(struct phylink_config *config, - struct phylink_link_state *state) +static struct axienet_local *pcs_to_axienet_local(struct phylink_pcs *pcs) { - struct net_device *ndev = to_net_dev(config->dev); - struct axienet_local *lp = netdev_priv(ndev); + return container_of(pcs, struct axienet_local, pcs); +} - switch (state->interface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_1000BASEX: - phylink_mii_c22_pcs_get_state(lp->pcs_phy, state); - break; - default: - break; - } +static void axienet_pcs_get_state(struct phylink_pcs *pcs, + struct phylink_link_state *state) +{ + struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy; + + phylink_mii_c22_pcs_get_state(pcs_phy, state); } -static void axienet_mac_an_restart(struct phylink_config *config) +static void axienet_pcs_an_restart(struct phylink_pcs *pcs) { - struct net_device *ndev = to_net_dev(config->dev); - struct axienet_local *lp = netdev_priv(ndev); + struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy; - phylink_mii_c22_pcs_an_restart(lp->pcs_phy); + phylink_mii_c22_pcs_an_restart(pcs_phy); } -static int axienet_mac_prepare(struct phylink_config *config, unsigned int mode, - phy_interface_t iface) +static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising, + bool permit_pause_to_mac) { - struct net_device *ndev = to_net_dev(config->dev); + struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy; + struct net_device *ndev = pcs_to_axienet_local(pcs)->ndev; struct axienet_local *lp = netdev_priv(ndev); int ret; - switch (iface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_1000BASEX: - if (!lp->switch_x_sgmii) - return 0; - - ret = mdiobus_write(lp->pcs_phy->bus, - lp->pcs_phy->addr, - XLNX_MII_STD_SELECT_REG, - iface == PHY_INTERFACE_MODE_SGMII ? + if (lp->switch_x_sgmii) { + ret = mdiodev_write(pcs_phy, XLNX_MII_STD_SELECT_REG, + interface == PHY_INTERFACE_MODE_SGMII ? XLNX_MII_STD_SELECT_SGMII : 0); - if (ret < 0) - netdev_warn(ndev, "Failed to switch PHY interface: %d\n", + if (ret < 0) { + netdev_warn(ndev, + "Failed to switch PHY interface: %d\n", ret); - return ret; - default: - return 0; + return ret; + } } + + ret = phylink_mii_c22_pcs_config(pcs_phy, mode, interface, advertising); + if (ret < 0) + netdev_warn(ndev, "Failed to configure PCS: %d\n", ret); + + return ret; } -static void axienet_mac_config(struct phylink_config *config, unsigned int mode, - const struct phylink_link_state *state) +static const struct phylink_pcs_ops axienet_pcs_ops = { + .pcs_get_state = axienet_pcs_get_state, + .pcs_config = axienet_pcs_config, + .pcs_an_restart = axienet_pcs_an_restart, +}; + +static struct phylink_pcs *axienet_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) { struct net_device *ndev = to_net_dev(config->dev); struct axienet_local *lp = netdev_priv(ndev); - int ret; - switch (state->interface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_1000BASEX: - ret = phylink_mii_c22_pcs_config(lp->pcs_phy, mode, - state->interface, - state->advertising); - if (ret < 0) - netdev_warn(ndev, "Failed to configure PCS: %d\n", - ret); - break; + if (interface == PHY_INTERFACE_MODE_1000BASEX || + interface == PHY_INTERFACE_MODE_SGMII) + return &lp->pcs; - default: - break; - } + return NULL; +} + +static void axienet_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) +{ + /* nothing meaningful to do */ } static void axienet_mac_link_down(struct phylink_config *config, @@ -1663,9 +1663,7 @@ static void axienet_mac_link_up(struct phylink_config *config, static const struct phylink_mac_ops axienet_phylink_ops = { .validate = phylink_generic_validate, - .mac_pcs_get_state = axienet_mac_pcs_get_state, - .mac_an_restart = axienet_mac_an_restart, - .mac_prepare = axienet_mac_prepare, + .mac_select_pcs = axienet_mac_select_pcs, .mac_config = axienet_mac_config, .mac_link_down = axienet_mac_link_down, .mac_link_up = axienet_mac_link_up, @@ -2079,12 +2077,12 @@ static int axienet_probe(struct platform_device *pdev) ret = -EPROBE_DEFER; goto cleanup_mdio; } - lp->phylink_config.pcs_poll = true; + lp->pcs.ops = &axienet_pcs_ops; + lp->pcs.poll = true; } lp->phylink_config.dev = &ndev->dev; lp->phylink_config.type = PHYLINK_NETDEV; - lp->phylink_config.legacy_pre_march2020 = true; lp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE | MAC_10FD | MAC_100FD | MAC_1000FD; diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index ebd287039a54..5805e4a56385 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1514,10 +1514,9 @@ acpi_find_extended_socket_device(acpi_handle obj_handle, u32 level, { struct acpi_device *device; bool *found = context; - int result; - result = acpi_bus_get_device(obj_handle, &device); - if (result) + device = acpi_fetch_acpi_dev(obj_handle); + if (!device) return AE_OK; if (strcmp(acpi_device_hid(device), ACPI_MOTHERBOARD_RESOURCE_HID)) diff --git a/drivers/net/ipa/ipa_data-v3.1.c b/drivers/net/ipa/ipa_data-v3.1.c index 06ddb85f39b2..8ff351aefd23 100644 --- a/drivers/net/ipa/ipa_data-v3.1.c +++ b/drivers/net/ipa/ipa_data-v3.1.c @@ -101,6 +101,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .aggregation = true, .status_enable = true, .rx = { + .buffer_size = 8192, .pad_align = ilog2(sizeof(u32)), }, }, @@ -148,6 +149,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .qmap = true, .aggregation = true, .rx = { + .buffer_size = 8192, .aggr_close_eof = true, }, }, diff --git a/drivers/net/ipa/ipa_data-v3.5.1.c b/drivers/net/ipa/ipa_data-v3.5.1.c index 760c22bbdf70..d1c466abddb2 100644 --- a/drivers/net/ipa/ipa_data-v3.5.1.c +++ b/drivers/net/ipa/ipa_data-v3.5.1.c @@ -92,6 +92,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .aggregation = true, .status_enable = true, .rx = { + .buffer_size = 8192, .pad_align = ilog2(sizeof(u32)), }, }, @@ -140,6 +141,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .qmap = true, .aggregation = true, .rx = { + .buffer_size = 8192, .aggr_close_eof = true, }, }, diff --git a/drivers/net/ipa/ipa_data-v4.11.c b/drivers/net/ipa/ipa_data-v4.11.c index fea91451a0c3..b1991cc6f0ca 100644 --- a/drivers/net/ipa/ipa_data-v4.11.c +++ b/drivers/net/ipa/ipa_data-v4.11.c @@ -86,6 +86,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .aggregation = true, .status_enable = true, .rx = { + .buffer_size = 8192, .pad_align = ilog2(sizeof(u32)), }, }, @@ -133,6 +134,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .qmap = true, .aggregation = true, .rx = { + .buffer_size = 32768, .aggr_close_eof = true, }, }, diff --git a/drivers/net/ipa/ipa_data-v4.2.c b/drivers/net/ipa/ipa_data-v4.2.c index 2a231e79d5e1..1190a43e8743 100644 --- a/drivers/net/ipa/ipa_data-v4.2.c +++ b/drivers/net/ipa/ipa_data-v4.2.c @@ -82,6 +82,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .aggregation = true, .status_enable = true, .rx = { + .buffer_size = 8192, .pad_align = ilog2(sizeof(u32)), }, }, @@ -130,6 +131,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .qmap = true, .aggregation = true, .rx = { + .buffer_size = 8192, .aggr_close_eof = true, }, }, diff --git a/drivers/net/ipa/ipa_data-v4.5.c b/drivers/net/ipa/ipa_data-v4.5.c index 2da2c4194f2e..944f72b0f285 100644 --- a/drivers/net/ipa/ipa_data-v4.5.c +++ b/drivers/net/ipa/ipa_data-v4.5.c @@ -95,6 +95,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .aggregation = true, .status_enable = true, .rx = { + .buffer_size = 8192, .pad_align = ilog2(sizeof(u32)), }, }, @@ -142,6 +143,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .qmap = true, .aggregation = true, .rx = { + .buffer_size = 8192, .aggr_close_eof = true, }, }, diff --git a/drivers/net/ipa/ipa_data-v4.9.c b/drivers/net/ipa/ipa_data-v4.9.c index 2421b5abb5d4..16786bff7ef8 100644 --- a/drivers/net/ipa/ipa_data-v4.9.c +++ b/drivers/net/ipa/ipa_data-v4.9.c @@ -87,6 +87,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .aggregation = true, .status_enable = true, .rx = { + .buffer_size = 8192, .pad_align = ilog2(sizeof(u32)), }, }, @@ -134,6 +135,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .qmap = true, .aggregation = true, .rx = { + .buffer_size = 8192, .aggr_close_eof = true, }, }, diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h index 6d329e9ce5d2..dbbeecf6df29 100644 --- a/drivers/net/ipa/ipa_data.h +++ b/drivers/net/ipa/ipa_data.h @@ -112,6 +112,7 @@ struct ipa_endpoint_tx_data { /** * struct ipa_endpoint_rx_data - configuration data for RX endpoints + * @buffer_size: requested receive buffer size (bytes) * @pad_align: power-of-2 boundary to which packet payload is aligned * @aggr_close_eof: whether aggregation closes on end-of-frame * @@ -125,6 +126,7 @@ struct ipa_endpoint_tx_data { * a "frame" consisting of several transfers has ended. */ struct ipa_endpoint_rx_data { + u32 buffer_size; u32 pad_align; bool aggr_close_eof; }; diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 68291a3efd04..fffd0a784ef2 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -27,9 +27,6 @@ #define IPA_REPLENISH_BATCH 16 -/* RX buffer is 1 page (or a power-of-2 contiguous pages) */ -#define IPA_RX_BUFFER_SIZE 8192 /* PAGE_SIZE > 4096 wastes a LOT */ - /* The amount of RX buffer space consumed by standard skb overhead */ #define IPA_RX_BUFFER_OVERHEAD (PAGE_SIZE - SKB_MAX_ORDER(NET_SKB_PAD, 0)) @@ -75,6 +72,14 @@ struct ipa_status { #define IPA_STATUS_FLAGS1_RT_RULE_ID_FMASK GENMASK(31, 22) #define IPA_STATUS_FLAGS2_TAG_FMASK GENMASK_ULL(63, 16) +static u32 aggr_byte_limit_max(enum ipa_version version) +{ + if (version < IPA_VERSION_4_5) + return field_max(aggr_byte_limit_fmask(true)); + + return field_max(aggr_byte_limit_fmask(false)); +} + static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count, const struct ipa_gsi_endpoint_data *all_data, const struct ipa_gsi_endpoint_data *data) @@ -87,6 +92,9 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count, return true; if (!data->toward_ipa) { + u32 buffer_size; + u32 limit; + if (data->endpoint.filter_support) { dev_err(dev, "filtering not supported for " "RX endpoint %u\n", @@ -94,6 +102,41 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count, return false; } + /* Nothing more to check for non-AP RX */ + if (data->ee_id != GSI_EE_AP) + return true; + + buffer_size = data->endpoint.config.rx.buffer_size; + /* The buffer size must hold an MTU plus overhead */ + limit = IPA_MTU + IPA_RX_BUFFER_OVERHEAD; + if (buffer_size < limit) { + dev_err(dev, "RX buffer size too small for RX endpoint %u (%u < %u)\n", + data->endpoint_id, buffer_size, limit); + return false; + } + + /* For an endpoint supporting receive aggregation, the + * aggregation byte limit defines the point at which an + * aggregation window will close. It is programmed into the + * IPA hardware as a number of KB. We don't use "hard byte + * limit" aggregation, so we need to supply enough space in + * a receive buffer to hold a complete MTU plus normal skb + * overhead *after* that aggregation byte limit has been + * crossed. + * + * This check just ensures the receive buffer size doesn't + * exceed what's representable in the aggregation limit field. + */ + if (data->endpoint.config.aggregation) { + limit += SZ_1K * aggr_byte_limit_max(ipa->version); + if (buffer_size > limit) { + dev_err(dev, "RX buffer size too large for aggregated RX endpoint %u (%u > %u)\n", + data->endpoint_id, buffer_size, limit); + + return false; + } + } + return true; /* Nothing more to check for RX */ } @@ -156,21 +199,12 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count, return true; } -static u32 aggr_byte_limit_max(enum ipa_version version) -{ - if (version < IPA_VERSION_4_5) - return field_max(aggr_byte_limit_fmask(true)); - - return field_max(aggr_byte_limit_fmask(false)); -} - static bool ipa_endpoint_data_valid(struct ipa *ipa, u32 count, const struct ipa_gsi_endpoint_data *data) { const struct ipa_gsi_endpoint_data *dp = data; struct device *dev = &ipa->pdev->dev; enum ipa_endpoint_name name; - u32 limit; if (count > IPA_ENDPOINT_COUNT) { dev_err(dev, "too many endpoints specified (%u > %u)\n", @@ -178,26 +212,6 @@ static bool ipa_endpoint_data_valid(struct ipa *ipa, u32 count, return false; } - /* The aggregation byte limit defines the point at which an - * aggregation window will close. It is programmed into the - * IPA hardware as a number of KB. We don't use "hard byte - * limit" aggregation, which means that we need to supply - * enough space in a receive buffer to hold a complete MTU - * plus normal skb overhead *after* that aggregation byte - * limit has been crossed. - * - * This check ensures we don't define a receive buffer size - * that would exceed what we can represent in the field that - * is used to program its size. - */ - limit = aggr_byte_limit_max(ipa->version) * SZ_1K; - limit += IPA_MTU + IPA_RX_BUFFER_OVERHEAD; - if (limit < IPA_RX_BUFFER_SIZE) { - dev_err(dev, "buffer size too big for aggregation (%u > %u)\n", - IPA_RX_BUFFER_SIZE, limit); - return false; - } - /* Make sure needed endpoints have defined data */ if (ipa_gsi_endpoint_data_empty(&data[IPA_ENDPOINT_AP_COMMAND_TX])) { dev_err(dev, "command TX endpoint not defined\n"); @@ -723,13 +737,15 @@ static void ipa_endpoint_init_aggr(struct ipa_endpoint *endpoint) if (endpoint->data->aggregation) { if (!endpoint->toward_ipa) { + const struct ipa_endpoint_rx_data *rx_data; bool close_eof; u32 limit; + rx_data = &endpoint->data->rx; val |= u32_encode_bits(IPA_ENABLE_AGGR, AGGR_EN_FMASK); val |= u32_encode_bits(IPA_GENERIC, AGGR_TYPE_FMASK); - limit = ipa_aggr_size_kb(IPA_RX_BUFFER_SIZE); + limit = ipa_aggr_size_kb(rx_data->buffer_size); val |= aggr_byte_limit_encoded(version, limit); limit = IPA_AGGR_TIME_LIMIT; @@ -737,7 +753,7 @@ static void ipa_endpoint_init_aggr(struct ipa_endpoint *endpoint) /* AGGR_PKT_LIMIT is 0 (unlimited) */ - close_eof = endpoint->data->rx.aggr_close_eof; + close_eof = rx_data->aggr_close_eof; val |= aggr_sw_eof_active_encoded(version, close_eof); /* AGGR_HARD_BYTE_LIMIT_ENABLE is 0 */ @@ -1025,11 +1041,13 @@ static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint) struct gsi_trans *trans; bool doorbell = false; struct page *page; + u32 buffer_size; u32 offset; u32 len; int ret; - page = dev_alloc_pages(get_order(IPA_RX_BUFFER_SIZE)); + buffer_size = endpoint->data->rx.buffer_size; + page = dev_alloc_pages(get_order(buffer_size)); if (!page) return -ENOMEM; @@ -1039,7 +1057,7 @@ static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint) /* Offset the buffer to make space for skb headroom */ offset = NET_SKB_PAD; - len = IPA_RX_BUFFER_SIZE - offset; + len = buffer_size - offset; ret = gsi_trans_page_add(trans, page, len, offset); if (ret) @@ -1058,7 +1076,7 @@ static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint) err_trans_free: gsi_trans_free(trans); err_free_pages: - __free_pages(page, get_order(IPA_RX_BUFFER_SIZE)); + __free_pages(page, get_order(buffer_size)); return -ENOMEM; } @@ -1183,15 +1201,16 @@ static void ipa_endpoint_skb_copy(struct ipa_endpoint *endpoint, static bool ipa_endpoint_skb_build(struct ipa_endpoint *endpoint, struct page *page, u32 len) { + u32 buffer_size = endpoint->data->rx.buffer_size; struct sk_buff *skb; /* Nothing to do if there's no netdev */ if (!endpoint->netdev) return false; - WARN_ON(len > SKB_WITH_OVERHEAD(IPA_RX_BUFFER_SIZE - NET_SKB_PAD)); + WARN_ON(len > SKB_WITH_OVERHEAD(buffer_size - NET_SKB_PAD)); - skb = build_skb(page_address(page), IPA_RX_BUFFER_SIZE); + skb = build_skb(page_address(page), buffer_size); if (skb) { /* Reserve the headroom and account for the data */ skb_reserve(skb, NET_SKB_PAD); @@ -1289,8 +1308,9 @@ static bool ipa_endpoint_status_drop(struct ipa_endpoint *endpoint, static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint, struct page *page, u32 total_len) { + u32 buffer_size = endpoint->data->rx.buffer_size; void *data = page_address(page) + NET_SKB_PAD; - u32 unused = IPA_RX_BUFFER_SIZE - total_len; + u32 unused = buffer_size - total_len; u32 resid = total_len; while (resid) { @@ -1398,8 +1418,11 @@ void ipa_endpoint_trans_release(struct ipa_endpoint *endpoint, } else { struct page *page = trans->data; - if (page) - __free_pages(page, get_order(IPA_RX_BUFFER_SIZE)); + if (page) { + u32 buffer_size = endpoint->data->rx.buffer_size; + + __free_pages(page, get_order(buffer_size)); + } } } diff --git a/drivers/net/mdio/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c index 7ab4e26db08c..7aafc221b5cf 100644 --- a/drivers/net/mdio/mdio-xgene.c +++ b/drivers/net/mdio/mdio-xgene.c @@ -285,7 +285,8 @@ static acpi_status acpi_register_phy(acpi_handle handle, u32 lvl, const union acpi_object *obj; u32 phy_addr; - if (acpi_bus_get_device(handle, &adev)) + adev = acpi_fetch_acpi_dev(handle); + if (!adev) return AE_OK; if (acpi_dev_get_property(adev, "phy-channel", ACPI_TYPE_INTEGER, &obj)) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index cd6742e6ba8b..61418d4dc0cd 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -632,35 +632,43 @@ static void xpcs_resolve_pma(struct dw_xpcs *xpcs, } } -void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported, - struct phylink_link_state *state) +static int xpcs_validate(struct phylink_pcs *pcs, unsigned long *supported, + const struct phylink_link_state *state) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported) = { 0, }; const struct xpcs_compat *compat; + struct dw_xpcs *xpcs; int i; - /* phylink expects us to report all supported modes with - * PHY_INTERFACE_MODE_NA, just don't limit the supported and - * advertising masks and exit. - */ - if (state->interface == PHY_INTERFACE_MODE_NA) - return; - - linkmode_zero(xpcs_supported); - + xpcs = phylink_pcs_to_xpcs(pcs); compat = xpcs_find_compat(xpcs->id, state->interface); - /* Populate the supported link modes for this - * PHY interface type + /* Populate the supported link modes for this PHY interface type. + * FIXME: what about the port modes and autoneg bit? This masks + * all those away. */ if (compat) for (i = 0; compat->supported[i] != __ETHTOOL_LINK_MODE_MASK_NBITS; i++) set_bit(compat->supported[i], xpcs_supported); linkmode_and(supported, supported, xpcs_supported); - linkmode_and(state->advertising, state->advertising, xpcs_supported); + + return 0; +} + +void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces) +{ + int i, j; + + for (i = 0; i < DW_XPCS_INTERFACE_MAX; i++) { + const struct xpcs_compat *compat = &xpcs->id->compat[i]; + + for (j = 0; j < compat->num_interfaces; j++) + if (compat->interface[j] < PHY_INTERFACE_MODE_MAX) + __set_bit(compat->interface[j], interfaces); + } } -EXPORT_SYMBOL_GPL(xpcs_validate); +EXPORT_SYMBOL_GPL(xpcs_get_interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable) { @@ -1106,6 +1114,7 @@ static const struct xpcs_id xpcs_id_list[] = { }; static const struct phylink_pcs_ops xpcs_phylink_ops = { + .pcs_validate = xpcs_validate, .pcs_config = xpcs_config, .pcs_get_state = xpcs_get_state, .pcs_link_up = xpcs_link_up, diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c index 968dd43a2b1e..a8db1a19011b 100644 --- a/drivers/net/phy/aquantia_main.c +++ b/drivers/net/phy/aquantia_main.c @@ -533,9 +533,7 @@ static int aqcs109_config_init(struct phy_device *phydev) * PMA speed ability bits are the same for all members of the family, * AQCS109 however supports speeds up to 2.5G only. */ - ret = phy_set_max_speed(phydev, SPEED_2500); - if (ret) - return ret; + phy_set_max_speed(phydev, SPEED_2500); return aqr107_set_downshift(phydev, MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT); } diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 29aa811af430..73926006d319 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -19,6 +19,8 @@ #include <linux/regulator/of_regulator.h> #include <linux/regulator/driver.h> #include <linux/regulator/consumer.h> +#include <linux/phylink.h> +#include <linux/sfp.h> #include <dt-bindings/net/qca-ar803x.h> #define AT803X_SPECIFIC_FUNCTION_CONTROL 0x10 @@ -51,6 +53,8 @@ #define AT803X_INTR_ENABLE_PAGE_RECEIVED BIT(12) #define AT803X_INTR_ENABLE_LINK_FAIL BIT(11) #define AT803X_INTR_ENABLE_LINK_SUCCESS BIT(10) +#define AT803X_INTR_ENABLE_LINK_FAIL_BX BIT(8) +#define AT803X_INTR_ENABLE_LINK_SUCCESS_BX BIT(7) #define AT803X_INTR_ENABLE_WIRESPEED_DOWNGRADE BIT(5) #define AT803X_INTR_ENABLE_POLARITY_CHANGED BIT(1) #define AT803X_INTR_ENABLE_WOL BIT(0) @@ -85,7 +89,17 @@ #define AT803X_DEBUG_DATA 0x1E #define AT803X_MODE_CFG_MASK 0x0F -#define AT803X_MODE_CFG_SGMII 0x01 +#define AT803X_MODE_CFG_BASET_RGMII 0x00 +#define AT803X_MODE_CFG_BASET_SGMII 0x01 +#define AT803X_MODE_CFG_BX1000_RGMII_50OHM 0x02 +#define AT803X_MODE_CFG_BX1000_RGMII_75OHM 0x03 +#define AT803X_MODE_CFG_BX1000_CONV_50OHM 0x04 +#define AT803X_MODE_CFG_BX1000_CONV_75OHM 0x05 +#define AT803X_MODE_CFG_FX100_RGMII_50OHM 0x06 +#define AT803X_MODE_CFG_FX100_CONV_50OHM 0x07 +#define AT803X_MODE_CFG_RGMII_AUTO_MDET 0x0B +#define AT803X_MODE_CFG_FX100_RGMII_75OHM 0x0E +#define AT803X_MODE_CFG_FX100_CONV_75OHM 0x0F #define AT803X_PSSR 0x11 /*PHY-Specific Status Register*/ #define AT803X_PSSR_MR_AN_COMPLETE 0x0200 @@ -283,6 +297,8 @@ struct at803x_priv { u16 clk_25m_mask; u8 smarteee_lpi_tw_1g; u8 smarteee_lpi_tw_100m; + bool is_fiber; + bool is_1000basex; struct regulator_dev *vddio_rdev; struct regulator_dev *vddh_rdev; struct regulator *vddio; @@ -650,6 +666,55 @@ static int at8031_register_regulators(struct phy_device *phydev) return 0; } +static int at803x_sfp_insert(void *upstream, const struct sfp_eeprom_id *id) +{ + struct phy_device *phydev = upstream; + __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_support); + __ETHTOOL_DECLARE_LINK_MODE_MASK(sfp_support); + phy_interface_t iface; + + linkmode_zero(phy_support); + phylink_set(phy_support, 1000baseX_Full); + phylink_set(phy_support, 1000baseT_Full); + phylink_set(phy_support, Autoneg); + phylink_set(phy_support, Pause); + phylink_set(phy_support, Asym_Pause); + + linkmode_zero(sfp_support); + sfp_parse_support(phydev->sfp_bus, id, sfp_support); + /* Some modules support 10G modes as well as others we support. + * Mask out non-supported modes so the correct interface is picked. + */ + linkmode_and(sfp_support, phy_support, sfp_support); + + if (linkmode_empty(sfp_support)) { + dev_err(&phydev->mdio.dev, "incompatible SFP module inserted\n"); + return -EINVAL; + } + + iface = sfp_select_interface(phydev->sfp_bus, sfp_support); + + /* Only 1000Base-X is supported by AR8031/8033 as the downstream SerDes + * interface for use with SFP modules. + * However, some copper modules detected as having a preferred SGMII + * interface do default to and function in 1000Base-X mode, so just + * print a warning and allow such modules, as they may have some chance + * of working. + */ + if (iface == PHY_INTERFACE_MODE_SGMII) + dev_warn(&phydev->mdio.dev, "module may not function if 1000Base-X not supported\n"); + else if (iface != PHY_INTERFACE_MODE_1000BASEX) + return -EINVAL; + + return 0; +} + +static const struct sfp_upstream_ops at803x_sfp_ops = { + .attach = phy_sfp_attach, + .detach = phy_sfp_detach, + .module_insert = at803x_sfp_insert, +}; + static int at803x_parse_dt(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; @@ -757,6 +822,11 @@ static int at803x_parse_dt(struct phy_device *phydev) phydev_err(phydev, "failed to get VDDIO regulator\n"); return PTR_ERR(priv->vddio); } + + /* Only AR8031/8033 support 1000Base-X for SFP modules */ + ret = phy_sfp_probe(phydev, &at803x_sfp_ops); + if (ret < 0) + return ret; } return 0; @@ -784,16 +854,24 @@ static int at803x_probe(struct phy_device *phydev) return ret; } - /* Some bootloaders leave the fiber page selected. - * Switch to the copper page, as otherwise we read - * the PHY capabilities from the fiber side. - */ if (phydev->drv->phy_id == ATH8031_PHY_ID) { - phy_lock_mdio_bus(phydev); - ret = at803x_write_page(phydev, AT803X_PAGE_COPPER); - phy_unlock_mdio_bus(phydev); - if (ret) + int ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG); + int mode_cfg; + + if (ccr < 0) goto err; + mode_cfg = ccr & AT803X_MODE_CFG_MASK; + + switch (mode_cfg) { + case AT803X_MODE_CFG_BX1000_RGMII_50OHM: + case AT803X_MODE_CFG_BX1000_RGMII_75OHM: + priv->is_1000basex = true; + fallthrough; + case AT803X_MODE_CFG_FX100_RGMII_50OHM: + case AT803X_MODE_CFG_FX100_RGMII_75OHM: + priv->is_fiber = true; + break; + } } return 0; @@ -815,6 +893,7 @@ static void at803x_remove(struct phy_device *phydev) static int at803x_get_features(struct phy_device *phydev) { + struct at803x_priv *priv = phydev->priv; int err; err = genphy_read_abilities(phydev); @@ -841,12 +920,13 @@ static int at803x_get_features(struct phy_device *phydev) * As a result of that, ESTATUS_1000_XFULL is set * to 1 even when operating in copper TP mode. * - * Remove this mode from the supported link modes, - * as this driver currently only supports copper - * operation. + * Remove this mode from the supported link modes + * when not operating in 1000BaseX mode. */ - linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, - phydev->supported); + if (!priv->is_1000basex) + linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, + phydev->supported); + return 0; } @@ -910,8 +990,27 @@ static int at8031_pll_config(struct phy_device *phydev) static int at803x_config_init(struct phy_device *phydev) { + struct at803x_priv *priv = phydev->priv; int ret; + if (phydev->drv->phy_id == ATH8031_PHY_ID) { + /* Some bootloaders leave the fiber page selected. + * Switch to the appropriate page (fiber or copper), as otherwise we + * read the PHY capabilities from the wrong page. + */ + phy_lock_mdio_bus(phydev); + ret = at803x_write_page(phydev, + priv->is_fiber ? AT803X_PAGE_FIBER : + AT803X_PAGE_COPPER); + phy_unlock_mdio_bus(phydev); + if (ret) + return ret; + + ret = at8031_pll_config(phydev); + if (ret < 0) + return ret; + } + /* The RX and TX delay default is: * after HW reset: RX delay enabled and TX delay disabled * after SW reset: RX delay enabled, while TX delay retains the @@ -941,12 +1040,6 @@ static int at803x_config_init(struct phy_device *phydev) if (ret < 0) return ret; - if (phydev->drv->phy_id == ATH8031_PHY_ID) { - ret = at8031_pll_config(phydev); - if (ret < 0) - return ret; - } - /* Ar803x extended next page bit is enabled by default. Cisco * multigig switches read this bit and attempt to negotiate 10Gbps * rates even if the next page bit is disabled. This is incorrect @@ -967,6 +1060,7 @@ static int at803x_ack_interrupt(struct phy_device *phydev) static int at803x_config_intr(struct phy_device *phydev) { + struct at803x_priv *priv = phydev->priv; int err; int value; @@ -983,6 +1077,10 @@ static int at803x_config_intr(struct phy_device *phydev) value |= AT803X_INTR_ENABLE_DUPLEX_CHANGED; value |= AT803X_INTR_ENABLE_LINK_FAIL; value |= AT803X_INTR_ENABLE_LINK_SUCCESS; + if (priv->is_fiber) { + value |= AT803X_INTR_ENABLE_LINK_FAIL_BX; + value |= AT803X_INTR_ENABLE_LINK_SUCCESS_BX; + } err = phy_write(phydev, AT803X_INTR_ENABLE, value); } else { @@ -1115,8 +1213,12 @@ static int at803x_read_specific_status(struct phy_device *phydev) static int at803x_read_status(struct phy_device *phydev) { + struct at803x_priv *priv = phydev->priv; int err, old_link = phydev->link; + if (priv->is_1000basex) + return genphy_c37_read_status(phydev); + /* Update the link, but return if there was an error */ err = genphy_update_link(phydev); if (err) @@ -1170,6 +1272,7 @@ static int at803x_config_mdix(struct phy_device *phydev, u8 ctrl) static int at803x_config_aneg(struct phy_device *phydev) { + struct at803x_priv *priv = phydev->priv; int ret; ret = at803x_config_mdix(phydev, phydev->mdix_ctrl); @@ -1186,6 +1289,9 @@ static int at803x_config_aneg(struct phy_device *phydev) return ret; } + if (priv->is_1000basex) + return genphy_c37_config_aneg(phydev); + /* Do not restart auto-negotiation by setting ret to 0 defautly, * when calling __genphy_config_aneg later. */ diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 271fc01f7f7f..2001f3329133 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -243,7 +243,7 @@ size_t phy_speeds(unsigned int *speeds, size_t size, return count; } -static int __set_linkmode_max_speed(u32 max_speed, unsigned long *addr) +static void __set_linkmode_max_speed(u32 max_speed, unsigned long *addr) { const struct phy_setting *p; int i; @@ -254,13 +254,11 @@ static int __set_linkmode_max_speed(u32 max_speed, unsigned long *addr) else break; } - - return 0; } -static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) +static void __set_phy_supported(struct phy_device *phydev, u32 max_speed) { - return __set_linkmode_max_speed(max_speed, phydev->supported); + __set_linkmode_max_speed(max_speed, phydev->supported); } /** @@ -273,17 +271,11 @@ static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) * is connected to a 1G PHY. This function allows the MAC to indicate its * maximum speed, and so limit what the PHY will advertise. */ -int phy_set_max_speed(struct phy_device *phydev, u32 max_speed) +void phy_set_max_speed(struct phy_device *phydev, u32 max_speed) { - int err; - - err = __set_phy_supported(phydev, max_speed); - if (err) - return err; + __set_phy_supported(phydev, max_speed); phy_advertise_supported(phydev); - - return 0; } EXPORT_SYMBOL(phy_set_max_speed); @@ -440,7 +432,9 @@ int phy_speed_down_core(struct phy_device *phydev) if (min_common_speed == SPEED_UNKNOWN) return -EINVAL; - return __set_linkmode_max_speed(min_common_speed, phydev->advertising); + __set_linkmode_max_speed(min_common_speed, phydev->advertising); + + return 0; } static void mmd_phy_indirect(struct mii_bus *bus, int phy_addr, int devad, diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 4514d35ef4c4..9b72334aabb6 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -858,7 +858,6 @@ static int marvell_phy_init(struct usbnet *dev) reg = asix_mdio_read(dev->net, dev->mii.phy_id, MII_MARVELL_LED_CTRL); netdev_dbg(dev->net, "MII_MARVELL_LED_CTRL (2) = 0x%04x\n", reg); - reg &= 0xfc0f; } return 0; diff --git a/drivers/nfc/st-nci/vendor_cmds.c b/drivers/nfc/st-nci/vendor_cmds.c index 30d2912d1a05..6335d7afca24 100644 --- a/drivers/nfc/st-nci/vendor_cmds.c +++ b/drivers/nfc/st-nci/vendor_cmds.c @@ -456,7 +456,7 @@ static const struct nfc_vendor_cmd st_nci_vendor_cmds[] = { int st_nci_vendor_cmds_init(struct nci_dev *ndev) { - return nfc_set_vendor_cmds(ndev->nfc_dev, st_nci_vendor_cmds, + return nci_set_vendor_cmds(ndev, st_nci_vendor_cmds, sizeof(st_nci_vendor_cmds)); } EXPORT_SYMBOL(st_nci_vendor_cmds_init); diff --git a/drivers/nfc/st21nfca/vendor_cmds.c b/drivers/nfc/st21nfca/vendor_cmds.c index 74882866dbaf..bfa418d4c6b0 100644 --- a/drivers/nfc/st21nfca/vendor_cmds.c +++ b/drivers/nfc/st21nfca/vendor_cmds.c @@ -358,7 +358,7 @@ int st21nfca_vendor_cmds_init(struct nfc_hci_dev *hdev) struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev); init_completion(&info->vendor_info.req_completion); - return nfc_set_vendor_cmds(hdev->ndev, st21nfca_vendor_cmds, - sizeof(st21nfca_vendor_cmds)); + return nfc_hci_set_vendor_cmds(hdev, st21nfca_vendor_cmds, + sizeof(st21nfca_vendor_cmds)); } EXPORT_SYMBOL(st21nfca_vendor_cmds_init); diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 0e4bc8b9329d..b6f2cfd15dd2 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -317,11 +317,18 @@ no_memory: } EXPORT_SYMBOL(ptp_clock_register); +static int unregister_vclock(struct device *dev, void *data) +{ + struct ptp_clock *ptp = dev_get_drvdata(dev); + + ptp_vclock_unregister(info_to_vclock(ptp->info)); + return 0; +} + int ptp_clock_unregister(struct ptp_clock *ptp) { if (ptp_vclock_in_use(ptp)) { - pr_err("ptp: virtual clock in use\n"); - return -EBUSY; + device_for_each_child(&ptp->dev, NULL, unregister_vclock); } ptp->defunct = 1; diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index 41b92dc2f011..9233bfedeb17 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -14,7 +14,7 @@ static ssize_t clock_name_show(struct device *dev, struct device_attribute *attr, char *page) { struct ptp_clock *ptp = dev_get_drvdata(dev); - return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name); + return sysfs_emit(page, "%s\n", ptp->info->name); } static DEVICE_ATTR_RO(clock_name); @@ -387,7 +387,7 @@ static ssize_t ptp_pin_show(struct device *dev, struct device_attribute *attr, mutex_unlock(&ptp->pincfg_mux); - return snprintf(page, PAGE_SIZE, "%u %u\n", func, chan); + return sysfs_emit(page, "%u %u\n", func, chan); } static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c index ab1d233173e1..cb179a3ea508 100644 --- a/drivers/ptp/ptp_vclock.c +++ b/drivers/ptp/ptp_vclock.c @@ -57,6 +57,30 @@ static int ptp_vclock_gettime(struct ptp_clock_info *ptp, return 0; } +static int ptp_vclock_gettimex(struct ptp_clock_info *ptp, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + struct ptp_clock *pptp = vclock->pclock; + struct timespec64 pts; + unsigned long flags; + int err; + u64 ns; + + err = pptp->info->gettimex64(pptp->info, &pts, sts); + if (err) + return err; + + spin_lock_irqsave(&vclock->lock, flags); + ns = timecounter_cyc2time(&vclock->tc, timespec64_to_ns(&pts)); + spin_unlock_irqrestore(&vclock->lock, flags); + + *ts = ns_to_timespec64(ns); + + return 0; +} + static int ptp_vclock_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) { @@ -71,6 +95,28 @@ static int ptp_vclock_settime(struct ptp_clock_info *ptp, return 0; } +static int ptp_vclock_getcrosststamp(struct ptp_clock_info *ptp, + struct system_device_crosststamp *xtstamp) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + struct ptp_clock *pptp = vclock->pclock; + unsigned long flags; + int err; + u64 ns; + + err = pptp->info->getcrosststamp(pptp->info, xtstamp); + if (err) + return err; + + spin_lock_irqsave(&vclock->lock, flags); + ns = timecounter_cyc2time(&vclock->tc, ktime_to_ns(xtstamp->device)); + spin_unlock_irqrestore(&vclock->lock, flags); + + xtstamp->device = ns_to_ktime(ns); + + return 0; +} + static long ptp_vclock_refresh(struct ptp_clock_info *ptp) { struct ptp_vclock *vclock = info_to_vclock(ptp); @@ -84,11 +130,9 @@ static long ptp_vclock_refresh(struct ptp_clock_info *ptp) static const struct ptp_clock_info ptp_vclock_info = { .owner = THIS_MODULE, .name = "ptp virtual clock", - /* The maximum ppb value that long scaled_ppm can support */ - .max_adj = 32767999, + .max_adj = 500000000, .adjfine = ptp_vclock_adjfine, .adjtime = ptp_vclock_adjtime, - .gettime64 = ptp_vclock_gettime, .settime64 = ptp_vclock_settime, .do_aux_work = ptp_vclock_refresh, }; @@ -124,6 +168,12 @@ struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock) vclock->pclock = pclock; vclock->info = ptp_vclock_info; + if (pclock->info->gettimex64) + vclock->info.gettimex64 = ptp_vclock_gettimex; + else + vclock->info.gettime64 = ptp_vclock_gettime; + if (pclock->info->getcrosststamp) + vclock->info.getcrosststamp = ptp_vclock_getcrosststamp; vclock->cc = ptp_vclock_cc; snprintf(vclock->info.name, PTP_CLOCK_NAME_LEN, "ptp%d_virt", diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fa517ae604ad..8c92c974bd12 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -194,6 +194,17 @@ struct bpf_map { struct work_struct work; struct mutex freeze_mutex; atomic64_t writecnt; + /* 'Ownership' of program-containing map is claimed by the first program + * that is going to use this map or by the first program which FD is + * stored in the map to make sure that all callers and callees have the + * same prog type, JITed flag and xdp_has_frags flag. + */ + struct { + spinlock_t lock; + enum bpf_prog_type type; + bool jited; + bool xdp_has_frags; + } owner; }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -578,7 +589,6 @@ struct bpf_verifier_ops { const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); - bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner); }; struct bpf_prog_offload_ops { @@ -939,6 +949,7 @@ struct bpf_prog_aux { bool func_proto_unreliable; bool sleepable; bool tail_call_reachable; + bool xdp_has_frags; struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; @@ -999,16 +1010,6 @@ struct bpf_prog_aux { }; struct bpf_array_aux { - /* 'Ownership' of prog array is claimed by the first program that - * is going to use this map or by the first program which FD is - * stored in the map to make sure that all callers and callees have - * the same prog type and JITed flag. - */ - struct { - spinlock_t lock; - enum bpf_prog_type type; - bool jited; - } owner; /* Programs with direct jumps into programs part of this array. */ struct list_head poke_progs; struct bpf_map *map; @@ -1183,7 +1184,14 @@ struct bpf_event_entry { struct rcu_head rcu; }; -bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); +static inline bool map_type_contains_progs(struct bpf_map *map) +{ + return map->map_type == BPF_MAP_TYPE_PROG_ARRAY || + map->map_type == BPF_MAP_TYPE_DEVMAP || + map->map_type == BPF_MAP_TYPE_CPUMAP; +} + +bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp); int bpf_prog_calc_tag(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); @@ -1251,6 +1259,7 @@ struct bpf_run_ctx {}; struct bpf_cg_run_ctx { struct bpf_run_ctx run_ctx; const struct bpf_prog_array_item *prog_item; + int retval; }; struct bpf_trace_run_ctx { @@ -1283,19 +1292,19 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); -static __always_inline u32 +static __always_inline int BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, const void *ctx, bpf_prog_run_fn run_prog, - u32 *ret_flags) + int retval, u32 *ret_flags) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_cg_run_ctx run_ctx; - u32 ret = 1; u32 func_ret; + run_ctx.retval = retval; migrate_disable(); rcu_read_lock(); array = rcu_dereference(array_rcu); @@ -1304,27 +1313,29 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, while ((prog = READ_ONCE(item->prog))) { run_ctx.prog_item = item; func_ret = run_prog(prog, ctx); - ret &= (func_ret & 1); + if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval)) + run_ctx.retval = -EPERM; *(ret_flags) |= (func_ret >> 1); item++; } bpf_reset_run_ctx(old_run_ctx); rcu_read_unlock(); migrate_enable(); - return ret; + return run_ctx.retval; } -static __always_inline u32 +static __always_inline int BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog) + const void *ctx, bpf_prog_run_fn run_prog, + int retval) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_cg_run_ctx run_ctx; - u32 ret = 1; + run_ctx.retval = retval; migrate_disable(); rcu_read_lock(); array = rcu_dereference(array_rcu); @@ -1332,13 +1343,14 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); while ((prog = READ_ONCE(item->prog))) { run_ctx.prog_item = item; - ret &= run_prog(prog, ctx); + if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval)) + run_ctx.retval = -EPERM; item++; } bpf_reset_run_ctx(old_run_ctx); rcu_read_unlock(); migrate_enable(); - return ret; + return run_ctx.retval; } static __always_inline u32 @@ -1391,19 +1403,21 @@ out: * 0: NET_XMIT_SUCCESS skb should be transmitted * 1: NET_XMIT_DROP skb should be dropped and cn * 2: NET_XMIT_CN skb should be transmitted and cn - * 3: -EPERM skb should be dropped + * 3: -err skb should be dropped */ #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ ({ \ u32 _flags = 0; \ bool _cn; \ u32 _ret; \ - _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \ + _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \ _cn = _flags & BPF_RET_SET_CN; \ - if (_ret) \ + if (_ret && !IS_ERR_VALUE((long)_ret)) \ + _ret = -EFAULT; \ + if (!_ret) \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ else \ - _ret = (_cn ? NET_XMIT_DROP : -EPERM); \ + _ret = (_cn ? NET_XMIT_DROP : _ret); \ _ret; \ }) @@ -1724,7 +1738,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@ -1976,12 +1989,6 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, return -ENOTSUPP; } -static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, - struct module *owner) -{ - return false; -} - static inline void bpf_map_put(struct bpf_map *map) { } @@ -2076,6 +2083,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog, int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); +int sock_map_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr); + void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else @@ -2129,6 +2139,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void { return -EOPNOTSUPP; } + +static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EINVAL; +} #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e9993172f892..7a7be8c057f2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -521,6 +521,8 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); int check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno); +int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); @@ -564,4 +566,9 @@ static inline u32 type_flag(u32 type) return type & ~BPF_BASE_TYPE_MASK; } +static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) +{ + return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; +} + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 0c74348cbc9d..b12cfe3b12bb 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -12,11 +12,33 @@ #define BTF_TYPE_EMIT(type) ((void)(type *)0) #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val) +enum btf_kfunc_type { + BTF_KFUNC_TYPE_CHECK, + BTF_KFUNC_TYPE_ACQUIRE, + BTF_KFUNC_TYPE_RELEASE, + BTF_KFUNC_TYPE_RET_NULL, + BTF_KFUNC_TYPE_MAX, +}; + struct btf; struct btf_member; struct btf_type; union bpf_attr; struct btf_show; +struct btf_id_set; + +struct btf_kfunc_id_set { + struct module *owner; + union { + struct { + struct btf_id_set *check_set; + struct btf_id_set *acquire_set; + struct btf_id_set *release_set; + struct btf_id_set *ret_null_set; + }; + struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; + }; +}; extern const struct file_operations btf_fops; @@ -307,6 +329,11 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); +bool btf_kfunc_id_set_contains(const struct btf *btf, + enum bpf_prog_type prog_type, + enum btf_kfunc_type type, u32 kfunc_btf_id); +int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, + const struct btf_kfunc_id_set *s); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) @@ -318,50 +345,18 @@ static inline const char *btf_name_by_offset(const struct btf *btf, { return NULL; } -#endif - -struct kfunc_btf_id_set { - struct list_head list; - struct btf_id_set *set; - struct module *owner; -}; - -struct kfunc_btf_id_list { - struct list_head list; - struct mutex mutex; -}; - -#ifdef CONFIG_DEBUG_INFO_BTF_MODULES -void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s); -void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s); -bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, - struct module *owner); - -extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; -extern struct kfunc_btf_id_list prog_test_kfunc_list; -#else -static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) -{ -} -static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) +static inline bool btf_kfunc_id_set_contains(const struct btf *btf, + enum bpf_prog_type prog_type, + enum btf_kfunc_type type, + u32 kfunc_btf_id) { + return false; } -static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, - u32 kfunc_id, struct module *owner) +static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, + const struct btf_kfunc_id_set *s) { - return false; + return 0; } - -static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused; -static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused; #endif -#define DEFINE_KFUNC_BTF_ID_SET(set, name) \ - struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \ - THIS_MODULE } - #endif diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 919c0fde1c51..bc5d9cc34e4c 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -11,6 +11,7 @@ struct btf_id_set { #ifdef CONFIG_DEBUG_INFO_BTF #include <linux/compiler.h> /* for __PASTE */ +#include <linux/compiler_attributes.h> /* for __maybe_unused */ /* * Following macros help to define lists of BTF IDs placed @@ -146,14 +147,14 @@ extern struct btf_id_set name; #else -#define BTF_ID_LIST(name) static u32 name[5]; +#define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED -#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n]; -#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; -#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1]; -#define BTF_SET_START(name) static struct btf_id_set name = { 0 }; -#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; +#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n]; +#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1]; +#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1]; +#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 }; +#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h new file mode 100644 index 000000000000..4359fb0221cf --- /dev/null +++ b/include/linux/dsa/tag_qca.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __TAG_QCA_H +#define __TAG_QCA_H + +#define QCA_HDR_LEN 2 +#define QCA_HDR_VERSION 0x2 + +#define QCA_HDR_RECV_VERSION GENMASK(15, 14) +#define QCA_HDR_RECV_PRIORITY GENMASK(13, 11) +#define QCA_HDR_RECV_TYPE GENMASK(10, 6) +#define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3) +#define QCA_HDR_RECV_SOURCE_PORT GENMASK(2, 0) + +/* Packet type for recv */ +#define QCA_HDR_RECV_TYPE_NORMAL 0x0 +#define QCA_HDR_RECV_TYPE_MIB 0x1 +#define QCA_HDR_RECV_TYPE_RW_REG_ACK 0x2 + +#define QCA_HDR_XMIT_VERSION GENMASK(15, 14) +#define QCA_HDR_XMIT_PRIORITY GENMASK(13, 11) +#define QCA_HDR_XMIT_CONTROL GENMASK(10, 8) +#define QCA_HDR_XMIT_FROM_CPU BIT(7) +#define QCA_HDR_XMIT_DP_BIT GENMASK(6, 0) + +/* Packet type for xmit */ +#define QCA_HDR_XMIT_TYPE_NORMAL 0x0 +#define QCA_HDR_XMIT_TYPE_RW_REG 0x1 + +/* Check code for a valid mgmt packet. Switch will ignore the packet + * with this wrong. + */ +#define QCA_HDR_MGMT_CHECK_CODE_VAL 0x5 + +/* Specific define for in-band MDIO read/write with Ethernet packet */ +#define QCA_HDR_MGMT_SEQ_LEN 4 /* 4 byte for the seq */ +#define QCA_HDR_MGMT_COMMAND_LEN 4 /* 4 byte for the command */ +#define QCA_HDR_MGMT_DATA1_LEN 4 /* First 4 byte for the mdio data */ +#define QCA_HDR_MGMT_HEADER_LEN (QCA_HDR_MGMT_SEQ_LEN + \ + QCA_HDR_MGMT_COMMAND_LEN + \ + QCA_HDR_MGMT_DATA1_LEN) + +#define QCA_HDR_MGMT_DATA2_LEN 12 /* Other 12 byte for the mdio data */ +#define QCA_HDR_MGMT_PADDING_LEN 34 /* Padding to reach the min Ethernet packet */ + +#define QCA_HDR_MGMT_PKT_LEN (QCA_HDR_MGMT_HEADER_LEN + \ + QCA_HDR_LEN + \ + QCA_HDR_MGMT_DATA2_LEN + \ + QCA_HDR_MGMT_PADDING_LEN) + +#define QCA_HDR_MGMT_SEQ_NUM GENMASK(31, 0) /* 63, 32 */ +#define QCA_HDR_MGMT_CHECK_CODE GENMASK(31, 29) /* 31, 29 */ +#define QCA_HDR_MGMT_CMD BIT(28) /* 28 */ +#define QCA_HDR_MGMT_LENGTH GENMASK(23, 20) /* 23, 20 */ +#define QCA_HDR_MGMT_ADDR GENMASK(18, 0) /* 18, 0 */ + +/* Special struct emulating a Ethernet header */ +struct qca_mgmt_ethhdr { + u32 command; /* command bit 31:0 */ + u32 seq; /* seq 63:32 */ + u32 mdio_data; /* first 4byte mdio */ + __be16 hdr; /* qca hdr */ +} __packed; + +enum mdio_cmd { + MDIO_WRITE = 0x0, + MDIO_READ +}; + +struct mib_ethhdr { + u32 data[3]; /* first 3 mib counter */ + __be16 hdr; /* qca hdr */ +} __packed; + +struct qca_tagger_data { + void (*rw_reg_ack_handler)(struct dsa_switch *ds, + struct sk_buff *skb); + void (*mib_autocast_handler)(struct dsa_switch *ds, + struct sk_buff *skb); +}; + +#endif /* __TAG_QCA_H */ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 11efc45de66a..e0853f48b75e 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -70,9 +70,11 @@ enum { /** * struct kernel_ethtool_ringparam - RX/TX ring configuration * @rx_buf_len: Current length of buffers on the rx ring. + * @tcp_data_split: Scatter packet headers and data to separate buffers */ struct kernel_ethtool_ringparam { u32 rx_buf_len; + u8 tcp_data_split; }; /** diff --git a/include/linux/filter.h b/include/linux/filter.h index 71fa57b88bfc..d23e999dc032 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern { s32 level; s32 optname; s32 optlen; - s32 retval; + /* for retval in struct bpf_cg_run_ctx */ + struct task_struct *current_task; + /* Temporary "register" for indirect stores to ppos. */ + u64 tmp_reg; }; int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len); diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a59d25f19385..1e0f8a31f3de 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -371,19 +371,12 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) return NULL; } -static inline struct inet6_request_sock * - inet6_rsk(const struct request_sock *rsk) -{ - return NULL; -} - static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return NULL; } #define inet6_rcv_saddr(__sk) NULL -#define tcp_twsk_ipv6only(__sk) 0 #define inet_v6_ipv6only(__sk) 0 #endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* _IPV6_H */ diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index f8397f300fcd..15e0e0209da4 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -66,11 +66,6 @@ static inline void linkmode_mod_bit(int nr, volatile unsigned long *addr, linkmode_clear_bit(nr, addr); } -static inline void linkmode_change_bit(int nr, volatile unsigned long *addr) -{ - __change_bit(nr, addr); -} - static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr) { return test_bit(nr, addr); diff --git a/include/linux/mii.h b/include/linux/mii.h index 12ea29e04293..5ee13083cec7 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -355,56 +355,6 @@ static inline u32 mii_adv_to_ethtool_adv_x(u32 adv) } /** - * mii_lpa_mod_linkmode_adv_sgmii - * @lp_advertising: pointer to destination link mode. - * @lpa: value of the MII_LPA register - * - * A small helper function that translates MII_LPA bits to - * linkmode advertisement settings for SGMII. - * Leaves other bits unchanged. - */ -static inline void -mii_lpa_mod_linkmode_lpa_sgmii(unsigned long *lp_advertising, u32 lpa) -{ - u32 speed_duplex = lpa & LPA_SGMII_DPX_SPD_MASK; - - linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, lp_advertising, - speed_duplex == LPA_SGMII_1000HALF); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, lp_advertising, - speed_duplex == LPA_SGMII_1000FULL); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, lp_advertising, - speed_duplex == LPA_SGMII_100HALF); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, lp_advertising, - speed_duplex == LPA_SGMII_100FULL); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, lp_advertising, - speed_duplex == LPA_SGMII_10HALF); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, lp_advertising, - speed_duplex == LPA_SGMII_10FULL); -} - -/** - * mii_lpa_to_linkmode_adv_sgmii - * @advertising: pointer to destination link mode. - * @lpa: value of the MII_LPA register - * - * A small helper function that translates MII_ADVERTISE bits - * to linkmode advertisement settings when in SGMII mode. - * Clears the old value of advertising. - */ -static inline void mii_lpa_to_linkmode_lpa_sgmii(unsigned long *lp_advertising, - u32 lpa) -{ - linkmode_zero(lp_advertising); - - mii_lpa_mod_linkmode_lpa_sgmii(lp_advertising, lpa); -} - -/** * mii_adv_mod_linkmode_adv_t * @advertising:pointer to destination link mode. * @adv: value of the MII_ADVERTISE register diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 598ac3bcc901..27145c4d6820 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -64,13 +64,6 @@ enum { }; enum { - MLX5_MODIFY_TIR_BITMASK_LRO = 0x0, - MLX5_MODIFY_TIR_BITMASK_INDIRECT_TABLE = 0x1, - MLX5_MODIFY_TIR_BITMASK_HASH = 0x2, - MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN = 0x3 -}; - -enum { MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 1ec631838af9..bda1c385cffb 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -135,15 +135,6 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, extack->cookie_len = sizeof(cookie); } -static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack, - u32 cookie) -{ - if (!extack) - return; - memcpy(extack->cookie, &cookie, sizeof(cookie)); - extack->cookie_len = sizeof(cookie); -} - void netlink_kernel_release(struct sock *sk); int __netlink_change_ngroups(struct sock *sk, unsigned int groups); int netlink_change_ngroups(struct sock *sk, unsigned int groups); diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index add077a81b21..266eb26fb029 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -31,8 +31,7 @@ void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode, phy_interface_t interface, int speed, int duplex); int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, unsigned int mode); -void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported, - struct phylink_link_state *state); +void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev, diff --git a/include/linux/phy.h b/include/linux/phy.h index 6de8d7a90d78..cd08cf1a8b0d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1661,7 +1661,7 @@ int phy_disable_interrupts(struct phy_device *phydev); void phy_request_interrupt(struct phy_device *phydev); void phy_free_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); -int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); +void phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); void phy_advertise_supported(struct phy_device *phydev); void phy_support_sym_pause(struct phy_device *phydev); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8a636e678902..a27bcc4f7e9a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -557,6 +557,7 @@ struct skb_shared_info { * Warning : all fields before dataref are cleared in __alloc_skb() */ atomic_t dataref; + unsigned int xdp_frags_size; /* Intermediate layers must ensure that destructor_arg * remains valid until skb destructor */ @@ -3898,11 +3899,6 @@ static inline ktime_t net_timedelta(ktime_t t) return ktime_sub(ktime_get_real(), t); } -static inline ktime_t net_invalid_timestamp(void) -{ - return 0; -} - static inline u8 skb_metadata_len(const struct sk_buff *skb) { return skb_shinfo(skb)->meta_len; diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 571f605bc91e..382af90320ac 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -88,6 +88,7 @@ struct svc_xprt { struct list_head xpt_users; /* callbacks on free */ struct net *xpt_net; + netns_tracker ns_tracker; const struct cred *xpt_cred; struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */ struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 955ea4d7af0b..3cdc8d878d81 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -284,6 +284,7 @@ struct rpc_xprt { } stat; struct net *xprt_net; + netns_tracker ns_tracker; const char *servername; const char *address_strings[RPC_DISPLAY_MAX]; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) diff --git a/include/linux/udp.h b/include/linux/udp.h index ae66dadd8543..254a2654400f 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -23,11 +23,6 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) return (struct udphdr *)skb_transport_header(skb); } -static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb) -{ - return (struct udphdr *)skb_inner_transport_header(skb); -} - #define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256) static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) diff --git a/include/net/ax25.h b/include/net/ax25.h index 8221af1811df..0f9790c455bb 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -187,18 +187,12 @@ typedef struct { typedef struct ax25_route { struct ax25_route *next; - refcount_t refcount; ax25_address callsign; struct net_device *dev; ax25_digi *digipeat; char ip_mode; } ax25_route; -static inline void ax25_hold_route(ax25_route *ax25_rt) -{ - refcount_inc(&ax25_rt->refcount); -} - void __ax25_put_route(ax25_route *ax25_rt); extern rwlock_t ax25_route_lock; @@ -213,12 +207,6 @@ static inline void ax25_route_lock_unuse(void) read_unlock(&ax25_route_lock); } -static inline void ax25_put_route(ax25_route *ax25_rt) -{ - if (refcount_dec_and_test(&ax25_rt->refcount)) - __ax25_put_route(ax25_rt); -} - typedef struct { char slave; /* slave_mode? */ struct timer_list slave_timer; /* timeout timer */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 586f69d084a2..f5caff1ddb29 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -258,6 +258,15 @@ struct adv_info { #define HCI_ADV_TX_POWER_NO_PREFERENCE 0x7F +struct monitored_device { + struct list_head list; + + bdaddr_t bdaddr; + __u8 addr_type; + __u16 handle; + bool notified; +}; + struct adv_pattern { struct list_head list; __u8 ad_type; @@ -294,6 +303,9 @@ struct adv_monitor { #define HCI_MAX_SHORT_NAME_LENGTH 10 +#define HCI_CONN_HANDLE_UNSET 0xffff +#define HCI_CONN_HANDLE_MAX 0x0eff + /* Min encryption key size to match with SMP */ #define HCI_MIN_ENC_KEY_SIZE 7 @@ -591,6 +603,9 @@ struct hci_dev { struct delayed_work interleave_scan; + struct list_head monitored_devices; + bool advmon_pend_notify; + #if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; #endif @@ -1847,6 +1862,8 @@ void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status); int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status); +void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, + bdaddr_t *bdaddr, u8 addr_type); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 107b25deae68..99266f7aebdc 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -1104,3 +1104,19 @@ struct mgmt_ev_controller_resume { #define MGMT_WAKE_REASON_NON_BT_WAKE 0x0 #define MGMT_WAKE_REASON_UNEXPECTED 0x1 #define MGMT_WAKE_REASON_REMOTE_WAKE 0x2 + +#define MGMT_EV_ADV_MONITOR_DEVICE_FOUND 0x002f +struct mgmt_ev_adv_monitor_device_found { + __le16 monitor_handle; + struct mgmt_addr_info addr; + __s8 rssi; + __le32 flags; + __le16 eir_len; + __u8 eir[0]; +} __packed; + +#define MGMT_EV_ADV_MONITOR_DEVICE_LOST 0x0030 +struct mgmt_ev_adv_monitor_device_lost { + __le16 monitor_handle; + struct mgmt_addr_info addr; +} __packed; diff --git a/include/net/bonding.h b/include/net/bonding.h index 83cfd2d70247..7dead855a72d 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -699,20 +699,6 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond, } /* Caller must hold rcu_read_lock() for read */ -static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond, - const u8 *mac) -{ - struct list_head *iter; - struct slave *tmp; - - bond_for_each_slave_rcu(bond, tmp, iter) - if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) - return tmp; - - return NULL; -} - -/* Caller must hold rcu_read_lock() for read */ static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac) { struct list_head *iter; diff --git a/include/net/dsa.h b/include/net/dsa.h index 57b3e4e7413b..ca8c14b547b4 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -278,6 +278,10 @@ struct dsa_port { u8 devlink_port_setup:1; + /* Master state bits, valid only on CPU ports */ + u8 master_admin_up:1; + u8 master_oper_up:1; + u8 setup:1; struct device_node *dn; @@ -478,6 +482,12 @@ static inline bool dsa_port_is_unused(struct dsa_port *dp) return dp->type == DSA_PORT_TYPE_UNUSED; } +static inline bool dsa_port_master_is_operational(struct dsa_port *dp) +{ + return dsa_port_is_cpu(dp) && dp->master_admin_up && + dp->master_oper_up; +} + static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED; @@ -581,6 +591,24 @@ static inline bool dsa_is_upstream_port(struct dsa_switch *ds, int port) return port == dsa_upstream_port(ds, port); } +/* Return true if this is a DSA port leading away from the CPU */ +static inline bool dsa_is_downstream_port(struct dsa_switch *ds, int port) +{ + return dsa_is_dsa_port(ds, port) && !dsa_is_upstream_port(ds, port); +} + +/* Return the local port used to reach the CPU port */ +static inline unsigned int dsa_switch_upstream_port(struct dsa_switch *ds) +{ + struct dsa_port *dp; + + dsa_switch_for_each_available_port(dp, ds) { + return dsa_upstream_port(ds, dp->index); + } + + return ds->num_ports; +} + /* Return true if @upstream_ds is an upstream switch of @downstream_ds, meaning * that the routing port from @downstream_ds to @upstream_ds is also the port * which @downstream_ds uses to reach its dedicated CPU. @@ -1036,6 +1064,13 @@ struct dsa_switch_ops { int (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid, u16 flags); int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid); + + /* + * DSA master tracking operations + */ + void (*master_state_change)(struct dsa_switch *ds, + const struct net_device *master, + bool operational); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 4ad47d9f9d27..3908296d103f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -285,6 +285,14 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req); +static inline unsigned long +reqsk_timeout(struct request_sock *req, unsigned long max_timeout) +{ + u64 timeout = (u64)req->timeout << req->num_timeout; + + return (unsigned long)min_t(u64, timeout, max_timeout); +} + static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) { /* The below has to be done to allow calling inet_csk_destroy_sock */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index dfd919b3119e..463ae5d33eb0 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -65,13 +65,13 @@ struct inet_timewait_sock { /* these three are in inet_sock */ __be16 tw_sport; /* And these are ours. */ - unsigned int tw_kill : 1, - tw_transparent : 1, + unsigned int tw_transparent : 1, tw_flowlabel : 20, - tw_pad : 2, /* 2 bits hole */ + tw_pad : 3, /* 3 bits hole */ tw_tos : 8; u32 tw_txhash; u32 tw_priority; + u32 tw_bslot; /* bind bucket slot */ struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; }; @@ -110,8 +110,6 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo void inet_twsk_deschedule_put(struct inet_timewait_sock *tw); -void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family); - static inline struct net *twsk_net(const struct inet_timewait_sock *twsk) { diff --git a/include/net/ip.h b/include/net/ip.h index b51bae43b0dd..3984f2c39c4b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -517,7 +517,6 @@ void ip_dst_metrics_put(struct dst_entry *dst) kfree(p); } -u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, @@ -712,7 +711,7 @@ int ip_forward(struct sk_buff *skb); */ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, - __be32 daddr, struct rtable *rt, int is_frag); + __be32 daddr, struct rtable *rt); int __ip_options_echo(struct net *net, struct ip_options *dopt, struct sk_buff *skb, const struct ip_options *sopt); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3afcb128e064..082f30256f59 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -437,8 +437,16 @@ struct ipv6_txoptions *ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, int newtype, struct ipv6_opt_hdr *newopt); -struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, - struct ipv6_txoptions *opt); +struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space, + struct ipv6_txoptions *opt); + +static inline struct ipv6_txoptions * +ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt) +{ + if (!opt) + return NULL; + return __ipv6_fixup_options(opt_space, opt); +} bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, const struct inet6_skb_parm *opt); @@ -1020,7 +1028,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - struct ipcm6_cookie *ipc6, struct flowi6 *fl6, + struct ipcm6_cookie *ipc6, struct rt6_info *rt, unsigned int flags, struct inet_cork_full *cork); diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h new file mode 100644 index 000000000000..a473b56842c5 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NF_CONNTRACK_BPF_H +#define _NF_CONNTRACK_BPF_H + +#include <linux/btf.h> +#include <linux/kconfig.h> + +#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + +extern int register_nf_conntrack_bpf(void); + +#else + +static inline int register_nf_conntrack_bpf(void) +{ + return 0; +} + +#endif + +#endif /* _NF_CONNTRACK_BPF_H */ diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 552bc25b1933..388244e315e7 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -10,6 +10,7 @@ struct netns_core { struct ctl_table_header *sysctl_hdr; int sysctl_somaxconn; + u8 sysctl_txrehash; #ifdef CONFIG_PROC_FS struct prot_inuse __percpu *prot_inuse; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 78557643526e..f0687867b5cd 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -31,18 +31,16 @@ struct ping_group_range { struct inet_hashinfo; struct inet_timewait_death_row { - atomic_t tw_count; - char tw_pad[L1_CACHE_BYTES - sizeof(atomic_t)]; + refcount_t tw_refcount; - struct inet_hashinfo *hashinfo; + struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; int sysctl_max_tw_buckets; }; struct tcp_fastopen_context; struct netns_ipv4 { - /* Please keep tcp_death_row at first field in netns_ipv4 */ - struct inet_timewait_death_row tcp_death_row ____cacheline_aligned_in_smp; + struct inet_timewait_death_row *tcp_death_row; #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -70,11 +68,9 @@ struct netns_ipv4 { struct hlist_head *fib_table_hash; struct sock *fibnl; - struct sock * __percpu *icmp_sk; struct sock *mc_autojoin_sk; struct inet_peer_base *peers; - struct sock * __percpu *tcp_sk; struct fqdir *fqdir; u8 sysctl_icmp_echo_ignore_all; @@ -87,6 +83,7 @@ struct netns_ipv4 { u32 ip_rt_min_pmtu; int ip_rt_mtu_expires; + int ip_rt_min_advmss; struct local_ports ip_local_ports; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index a4b550380316..30cdfc4e1615 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -88,7 +88,6 @@ struct netns_ipv6 { struct fib6_table *fib6_local_tbl; struct fib_rules_ops *fib6_rules_ops; #endif - struct sock * __percpu *icmp_sk; struct sock *ndisc_sk; struct sock *tcp_sk; struct sock *igmp_sk; diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 79a805542d0f..97c3c19872ff 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -201,21 +201,67 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, } #endif -void page_pool_put_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, bool allow_direct); +void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, + bool allow_direct); -/* Same as above but will try to sync the entire area pool->max_len */ -static inline void page_pool_put_full_page(struct page_pool *pool, - struct page *page, bool allow_direct) +static inline void page_pool_fragment_page(struct page *page, long nr) +{ + atomic_long_set(&page->pp_frag_count, nr); +} + +static inline long page_pool_defrag_page(struct page *page, long nr) +{ + long ret; + + /* If nr == pp_frag_count then we have cleared all remaining + * references to the page. No need to actually overwrite it, instead + * we can leave this to be overwritten by the calling function. + * + * The main advantage to doing this is that an atomic_read is + * generally a much cheaper operation than an atomic update, + * especially when dealing with a page that may be partitioned + * into only 2 or 3 pieces. + */ + if (atomic_long_read(&page->pp_frag_count) == nr) + return 0; + + ret = atomic_long_sub_return(nr, &page->pp_frag_count); + WARN_ON(ret < 0); + return ret; +} + +static inline bool page_pool_is_last_frag(struct page_pool *pool, + struct page *page) +{ + /* If fragments aren't enabled or count is 0 we were the last user */ + return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || + (page_pool_defrag_page(page, 1) == 0); +} + +static inline void page_pool_put_page(struct page_pool *pool, + struct page *page, + unsigned int dma_sync_size, + bool allow_direct) { /* When page_pool isn't compiled-in, net/core/xdp.c doesn't * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL - page_pool_put_page(pool, page, -1, allow_direct); + if (!page_pool_is_last_frag(pool, page)) + return; + + page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct); #endif } +/* Same as above but will try to sync the entire area pool->max_len */ +static inline void page_pool_put_full_page(struct page_pool *pool, + struct page *page, bool allow_direct) +{ + page_pool_put_page(pool, page, -1, allow_direct); +} + /* Same as above but the caller must guarantee safe context. e.g NAPI */ static inline void page_pool_recycle_direct(struct page_pool *pool, struct page *page) @@ -243,30 +289,6 @@ static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) page->dma_addr_upper = upper_32_bits(addr); } -static inline void page_pool_set_frag_count(struct page *page, long nr) -{ - atomic_long_set(&page->pp_frag_count, nr); -} - -static inline long page_pool_atomic_sub_frag_count_return(struct page *page, - long nr) -{ - long ret; - - /* As suggested by Alexander, atomic_long_read() may cover up the - * reference count errors, so avoid calling atomic_long_read() in - * the cases of freeing or draining the page_frags, where we would - * not expect it to match or that are slowpath anyway. - */ - if (__builtin_constant_p(nr) && - atomic_long_read(&page->pp_frag_count) == nr) - return 0; - - ret = atomic_long_sub_return(nr, &page->pp_frag_count); - WARN_ON(ret < 0); - return ret; -} - static inline bool is_page_pool_compiled_in(void) { #ifdef CONFIG_PAGE_POOL diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 9e7b21c0b3a6..44a35531952e 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -63,12 +63,6 @@ static inline psched_time_t psched_get_time(void) return PSCHED_NS2TICKS(ktime_get_ns()); } -static inline psched_tdiff_t -psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound) -{ - return min(tv1 - tv2, bound); -} - struct qdisc_watchdog { u64 last_expires; struct hrtimer timer; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 29e41ff3ec93..144c39db9898 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -70,6 +70,7 @@ struct request_sock { struct saved_syn *saved_syn; u32 secid; u32 peer_secid; + u32 timeout; }; static inline struct request_sock *inet_reqsk(const struct sock *sk) @@ -104,6 +105,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, sk_node_init(&req_to_sk(req)->sk_node); sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; + req->timeout = 0; req->num_timeout = 0; req->num_retrans = 0; req->sk = NULL; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 472843eedbae..9bab396c1f3b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -518,11 +518,6 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) BUILD_BUG_ON(sizeof(qcb->data) < sz); } -static inline int qdisc_qlen_cpu(const struct Qdisc *q) -{ - return this_cpu_ptr(q->cpu_qstats)->qlen; -} - static inline int qdisc_qlen(const struct Qdisc *q) { return q->q.qlen; diff --git a/include/net/sock.h b/include/net/sock.h index ff9b508d9c5f..d6c13f0fba40 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -316,6 +316,7 @@ struct sk_filter; * @sk_rcvtimeo: %SO_RCVTIMEO setting * @sk_sndtimeo: %SO_SNDTIMEO setting * @sk_txhash: computed flow hash for use on transmit + * @sk_txrehash: enable TX hash rethink * @sk_filter: socket filtering instructions * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received @@ -491,6 +492,7 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; + u8 sk_txrehash; #ifdef CONFIG_NET_RX_BUSY_POLL u8 sk_prefer_busy_poll; u16 sk_busy_poll_budget; @@ -587,6 +589,18 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk) __tmp | SK_USER_DATA_NOCOPY); \ }) +static inline +struct net *sock_net(const struct sock *sk) +{ + return read_pnet(&sk->sk_net); +} + +static inline +void sock_net_set(struct sock *sk, struct net *net) +{ + write_pnet(&sk->sk_net, net); +} + /* * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK * or not whether his port will be reused by someone else. SK_FORCE_REUSE @@ -2054,7 +2068,7 @@ static inline void sk_set_txhash(struct sock *sk) static inline bool sk_rethink_txhash(struct sock *sk) { - if (sk->sk_txhash) { + if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) { sk_set_txhash(sk); return true; } @@ -2704,18 +2718,6 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); } -static inline -struct net *sock_net(const struct sock *sk) -{ - return read_pnet(&sk->sk_net); -} - -static inline -void sock_net_set(struct sock *sk, struct net *net) -{ - write_pnet(&sk->sk_net, net); -} - static inline bool skb_sk_is_prefetched(struct sk_buff *skb) { diff --git a/include/net/tcp.h b/include/net/tcp.h index b9fc978fb2ca..eff2487d972d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2358,7 +2358,7 @@ static inline u32 tcp_timeout_init(struct sock *sk) if (timeout <= 0) timeout = TCP_TIMEOUT_INIT; - return timeout; + return min_t(int, timeout, TCP_RTO_MAX); } static inline u32 tcp_rwnd_init_bpf(struct sock *sk) diff --git a/include/net/udplite.h b/include/net/udplite.h index 9185e45b997f..a3c53110d30b 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -70,49 +70,6 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) return 0; } -/* Slow-path computation of checksum. Socket is locked. */ -static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) -{ - const struct udp_sock *up = udp_sk(skb->sk); - int cscov = up->len; - __wsum csum = 0; - - if (up->pcflag & UDPLITE_SEND_CC) { - /* - * Sender has set `partial coverage' option on UDP-Lite socket. - * The special case "up->pcslen == 0" signifies full coverage. - */ - if (up->pcslen < up->len) { - if (0 < up->pcslen) - cscov = up->pcslen; - udp_hdr(skb)->len = htons(up->pcslen); - } - /* - * NOTE: Causes for the error case `up->pcslen > up->len': - * (i) Application error (will not be penalized). - * (ii) Payload too big for send buffer: data is split - * into several packets, each with its own header. - * In this case (e.g. last segment), coverage may - * exceed packet length. - * Since packets with coverage length > packet length are - * illegal, we fall back to the defaults here. - */ - } - - skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ - - skb_queue_walk(&sk->sk_write_queue, skb) { - const int off = skb_transport_offset(skb); - const int len = skb->len - off; - - csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum); - - if ((cscov -= len) <= 0) - break; - } - return csum; -} - /* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) { diff --git a/include/net/xdp.h b/include/net/xdp.h index 8f0812e4996d..b7721c3e4d1f 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -60,12 +60,20 @@ struct xdp_rxq_info { u32 reg_state; struct xdp_mem_info mem; unsigned int napi_id; + u32 frag_size; } ____cacheline_aligned; /* perf critical, avoid false-sharing */ struct xdp_txq_info { struct net_device *dev; }; +enum xdp_buff_flags { + XDP_FLAGS_HAS_FRAGS = BIT(0), /* non-linear xdp buff */ + XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under + * pressure + */ +}; + struct xdp_buff { void *data; void *data_end; @@ -74,13 +82,40 @@ struct xdp_buff { struct xdp_rxq_info *rxq; struct xdp_txq_info *txq; u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ + u32 flags; /* supported values defined in xdp_buff_flags */ }; +static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp) +{ + return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS); +} + +static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp) +{ + xdp->flags |= XDP_FLAGS_HAS_FRAGS; +} + +static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp) +{ + xdp->flags &= ~XDP_FLAGS_HAS_FRAGS; +} + +static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp) +{ + return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); +} + +static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) +{ + xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; +} + static __always_inline void xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) { xdp->frame_sz = frame_sz; xdp->rxq = rxq; + xdp->flags = 0; } static __always_inline void @@ -111,6 +146,20 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp) return (struct skb_shared_info *)xdp_data_hard_end(xdp); } +static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp) +{ + unsigned int len = xdp->data_end - xdp->data; + struct skb_shared_info *sinfo; + + if (likely(!xdp_buff_has_frags(xdp))) + goto out; + + sinfo = xdp_get_shared_info_from_buff(xdp); + len += sinfo->xdp_frags_size; +out: + return len; +} + struct xdp_frame { void *data; u16 len; @@ -122,8 +171,19 @@ struct xdp_frame { */ struct xdp_mem_info mem; struct net_device *dev_rx; /* used by cpumap */ + u32 flags; /* supported values defined in xdp_buff_flags */ }; +static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame) +{ + return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); +} + +static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame) +{ + return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); +} + #define XDP_BULK_QUEUE_SIZE 16 struct xdp_frame_bulk { int count; @@ -159,6 +219,19 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame) frame->dev_rx = NULL; } +static inline void +xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, + unsigned int size, unsigned int truesize, + bool pfmemalloc) +{ + skb_shinfo(skb)->nr_frags = nr_frags; + + skb->len += size; + skb->data_len += size; + skb->truesize += truesize; + skb->pfmemalloc |= pfmemalloc; +} + /* Avoids inlining WARN macro in fast-path */ void xdp_warn(const char *msg, const char *func, const int line); #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__) @@ -180,6 +253,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp) xdp->data_end = frame->data + frame->len; xdp->data_meta = frame->data - frame->metasize; xdp->frame_sz = frame->frame_sz; + xdp->flags = frame->flags; } static inline @@ -206,6 +280,7 @@ int xdp_update_frame_from_buff(struct xdp_buff *xdp, xdp_frame->headroom = headroom - sizeof(*xdp_frame); xdp_frame->metasize = metasize; xdp_frame->frame_sz = xdp->frame_sz; + xdp_frame->flags = xdp->flags; return 0; } @@ -230,6 +305,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) return xdp_frame; } +void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, + struct xdp_buff *xdp); void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); @@ -246,14 +323,37 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem); static inline void xdp_release_frame(struct xdp_frame *xdpf) { struct xdp_mem_info *mem = &xdpf->mem; + struct skb_shared_info *sinfo; + int i; /* Curr only page_pool needs this */ - if (mem->type == MEM_TYPE_PAGE_POOL) - __xdp_release_frame(xdpf->data, mem); + if (mem->type != MEM_TYPE_PAGE_POOL) + return; + + if (likely(!xdp_frame_has_frags(xdpf))) + goto out; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (i = 0; i < sinfo->nr_frags; i++) { + struct page *page = skb_frag_page(&sinfo->frags[i]); + + __xdp_release_frame(page_address(page), mem); + } +out: + __xdp_release_frame(xdpf->data, mem); +} + +int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index, + unsigned int napi_id, u32 frag_size); +static inline int +xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index, + unsigned int napi_id) +{ + return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0); } -int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, - struct net_device *dev, u32 queue_index, unsigned int napi_id); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index c77a1313b3b0..467ca2f28760 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -128,6 +128,8 @@ #define SO_RESERVE_MEM 73 +#define SO_TXREHASH 74 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b0383d371b9a..16a7574292a5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -330,6 +330,8 @@ union bpf_iter_link_info { * *ctx_out*, *data_in* and *data_out* must be NULL. * *repeat* must be zero. * + * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN. + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. @@ -1111,6 +1113,11 @@ enum bpf_link_type { */ #define BPF_F_SLEEPABLE (1U << 4) +/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program + * fully support xdp frags. + */ +#define BPF_F_XDP_HAS_FRAGS (1U << 5) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * @@ -1775,6 +1782,8 @@ union bpf_attr { * 0 on success, or a negative error in case of failure. * * u64 bpf_get_current_pid_tgid(void) + * Description + * Get the current pid and tgid. * Return * A 64-bit integer containing the current tgid and pid, and * created as such: @@ -1782,6 +1791,8 @@ union bpf_attr { * *current_task*\ **->pid**. * * u64 bpf_get_current_uid_gid(void) + * Description + * Get the current uid and gid. * Return * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. @@ -2256,6 +2267,8 @@ union bpf_attr { * The 32-bit hash. * * u64 bpf_get_current_task(void) + * Description + * Get the current task. * Return * A pointer to the current task struct. * @@ -2369,6 +2382,8 @@ union bpf_attr { * indicate that the hash is outdated and to trigger a * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * Return + * void. * * long bpf_get_numa_node_id(void) * Description @@ -2466,6 +2481,8 @@ union bpf_attr { * A 8-byte long unique number or 0 if *sk* is NULL. * * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Description + * Get the owner UID of the socked associated to *skb*. * Return * The owner UID of the socket associated to *skb*. If the socket * is **NULL**, or if it is not a full socket (i.e. if it is a @@ -3240,6 +3257,9 @@ union bpf_attr { * The id is returned or 0 in case the id could not be retrieved. * * u64 bpf_get_current_cgroup_id(void) + * Description + * Get the current cgroup id based on the cgroup within which + * the current task is running. * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. @@ -5018,6 +5038,44 @@ union bpf_attr { * * Return * The number of arguments of the traced function. + * + * int bpf_get_retval(void) + * Description + * Get the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * The syscall's return value. + * + * int bpf_set_retval(int retval) + * Description + * Set the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md) + * Description + * Get the total size of a given xdp buff (linear and paged area) + * Return + * The total size of a given xdp buffer. + * + * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * This helper is provided as an easy way to load data from a + * xdp buffer. It can be used to load *len* bytes from *offset* from + * the frame associated to *xdp_md*, into the buffer pointed by + * *buf*. + * Return + * 0 on success, or a negative error in case of failure. + * + * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * Store *len* bytes from buffer *buf* into the frame + * associated to *xdp_md*, at *offset*. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5206,6 +5264,11 @@ union bpf_attr { FN(get_func_arg), \ FN(get_func_ret), \ FN(get_func_arg_cnt), \ + FN(get_retval), \ + FN(set_retval), \ + FN(xdp_get_buff_len), \ + FN(xdp_load_bytes), \ + FN(xdp_store_bytes), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index cca6e474a085..417d4280d7b5 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -319,6 +319,12 @@ enum { /* RINGS */ enum { + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0, + ETHTOOL_TCP_DATA_SPLIT_DISABLED, + ETHTOOL_TCP_DATA_SPLIT_ENABLED, +}; + +enum { ETHTOOL_A_RINGS_UNSPEC, ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ ETHTOOL_A_RINGS_RX_MAX, /* u32 */ @@ -330,6 +336,7 @@ enum { ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ ETHTOOL_A_RINGS_TX, /* u32 */ ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ + ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index eb0a9a5b6e71..51d6bb2f6765 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -31,4 +31,8 @@ struct __kernel_sockaddr_storage { #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) +#define SOCK_TXREHASH_DEFAULT ((u8)-1) +#define SOCK_TXREHASH_DISABLED 0 +#define SOCK_TXREHASH_ENABLED 1 + #endif /* _UAPI_LINUX_SOCKET_H */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index c7a5be3bf8be..7f145aefbff8 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -837,13 +837,12 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key) static void *prog_fd_array_get_ptr(struct bpf_map *map, struct file *map_file, int fd) { - struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_prog *prog = bpf_prog_get(fd); if (IS_ERR(prog)) return prog; - if (!bpf_prog_array_compatible(array, prog)) { + if (!bpf_prog_map_compatible(map, prog)) { bpf_prog_put(prog); return ERR_PTR(-EINVAL); } @@ -1071,7 +1070,6 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) INIT_WORK(&aux->work, prog_array_map_clear_deferred); INIT_LIST_HEAD(&aux->poke_progs); mutex_init(&aux->poke_mutex); - spin_lock_init(&aux->owner.lock); map = array_map_alloc(attr); if (IS_ERR(map)) { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index e16dafeb2450..a1c44c17ea9c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -198,6 +198,21 @@ DEFINE_IDR(btf_idr); DEFINE_SPINLOCK(btf_idr_lock); +enum btf_kfunc_hook { + BTF_KFUNC_HOOK_XDP, + BTF_KFUNC_HOOK_TC, + BTF_KFUNC_HOOK_STRUCT_OPS, + BTF_KFUNC_HOOK_MAX, +}; + +enum { + BTF_KFUNC_SET_MAX_CNT = 32, +}; + +struct btf_kfunc_set_tab { + struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX]; +}; + struct btf { void *data; struct btf_type **types; @@ -212,6 +227,7 @@ struct btf { refcount_t refcnt; u32 id; struct rcu_head rcu; + struct btf_kfunc_set_tab *kfunc_set_tab; /* split BTF support */ struct btf *base_btf; @@ -1531,8 +1547,30 @@ static void btf_free_id(struct btf *btf) spin_unlock_irqrestore(&btf_idr_lock, flags); } +static void btf_free_kfunc_set_tab(struct btf *btf) +{ + struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab; + int hook, type; + + if (!tab) + return; + /* For module BTF, we directly assign the sets being registered, so + * there is nothing to free except kfunc_set_tab. + */ + if (btf_is_module(btf)) + goto free_tab; + for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) { + for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++) + kfree(tab->sets[hook][type]); + } +free_tab: + kfree(tab); + btf->kfunc_set_tab = NULL; +} + static void btf_free(struct btf *btf) { + btf_free_kfunc_set_tab(btf); kvfree(btf->types); kvfree(btf->resolved_sizes); kvfree(btf->resolved_ids); @@ -5616,17 +5654,45 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log, return true; } +static bool is_kfunc_arg_mem_size(const struct btf *btf, + const struct btf_param *arg, + const struct bpf_reg_state *reg) +{ + int len, sfx_len = sizeof("__sz") - 1; + const struct btf_type *t; + const char *param_name; + + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) + return false; + + /* In the future, this can be ported to use BTF tagging */ + param_name = btf_name_by_offset(btf, arg->name_off); + if (str_is_empty(param_name)) + return false; + len = strlen(param_name); + if (len < sfx_len) + return false; + param_name += len - sfx_len; + if (strncmp(param_name, "__sz", sfx_len)) + return false; + + return true; +} + static int btf_check_func_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, bool ptr_to_mem_ok) { struct bpf_verifier_log *log = &env->log; + u32 i, nargs, ref_id, ref_obj_id = 0; bool is_kfunc = btf_is_kernel(btf); const char *func_name, *ref_tname; const struct btf_type *t, *ref_t; const struct btf_param *args; - u32 i, nargs, ref_id; + int ref_regno = 0; + bool rel = false; t = btf_type_by_id(btf, func_id); if (!t || !btf_type_is_func(t)) { @@ -5704,6 +5770,16 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, if (reg->type == PTR_TO_BTF_ID) { reg_btf = reg->btf; reg_ref_id = reg->btf_id; + /* Ensure only one argument is referenced PTR_TO_BTF_ID */ + if (reg->ref_obj_id) { + if (ref_obj_id) { + bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", + regno, reg->ref_obj_id, ref_obj_id); + return -EFAULT; + } + ref_regno = regno; + ref_obj_id = reg->ref_obj_id; + } } else { reg_btf = btf_vmlinux; reg_ref_id = *reg2btf_ids[reg->type]; @@ -5727,17 +5803,33 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, u32 type_size; if (is_kfunc) { + bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], ®s[regno + 1]); + /* Permit pointer to mem, but only when argument * type is pointer to scalar, or struct composed * (recursively) of scalars. + * When arg_mem_size is true, the pointer can be + * void *. */ if (!btf_type_is_scalar(ref_t) && - !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) { + !__btf_type_is_scalar_struct(log, btf, ref_t, 0) && + (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) { bpf_log(log, - "arg#%d pointer type %s %s must point to scalar or struct with scalar\n", - i, btf_type_str(ref_t), ref_tname); + "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n", + i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : ""); return -EINVAL; } + + /* Check for mem, len pair */ + if (arg_mem_size) { + if (check_kfunc_mem_size_reg(env, ®s[regno + 1], regno + 1)) { + bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", + i, i + 1); + return -EINVAL; + } + i++; + continue; + } } resolve_ret = btf_resolve_size(btf, ref_t, &type_size); @@ -5758,7 +5850,23 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, } } - return 0; + /* Either both are set, or neither */ + WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno)); + if (is_kfunc) { + rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_RELEASE, func_id); + /* We already made sure ref_obj_id is set only for one argument */ + if (rel && !ref_obj_id) { + bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n", + func_name); + return -EINVAL; + } + /* Allow (!rel && ref_obj_id), so that passing such referenced PTR_TO_BTF_ID to + * other kfuncs works + */ + } + /* returns argument register number > 0 in case of reference release kfunc */ + return rel ? ref_regno : 0; } /* Compare BTF of a function with given bpf_reg_state. @@ -6200,12 +6308,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id) return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; } +enum { + BTF_MODULE_F_LIVE = (1 << 0), +}; + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES struct btf_module { struct list_head list; struct module *module; struct btf *btf; struct bin_attribute *sysfs_attr; + int flags; }; static LIST_HEAD(btf_modules); @@ -6233,7 +6346,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, int err = 0; if (mod->btf_data_size == 0 || - (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) + (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE && + op != MODULE_STATE_GOING)) goto out; switch (op) { @@ -6292,6 +6406,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, } break; + case MODULE_STATE_LIVE: + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->module != module) + continue; + + btf_mod->flags |= BTF_MODULE_F_LIVE; + break; + } + mutex_unlock(&btf_module_mutex); + break; case MODULE_STATE_GOING: mutex_lock(&btf_module_mutex); list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { @@ -6338,7 +6463,12 @@ struct module *btf_try_get_module(const struct btf *btf) if (btf_mod->btf != btf) continue; - if (try_module_get(btf_mod->module)) + /* We must only consider module whose __init routine has + * finished, hence we must check for BTF_MODULE_F_LIVE flag, + * which is set from the notifier callback for + * MODULE_STATE_LIVE. + */ + if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module)) res = btf_mod->module; break; @@ -6349,6 +6479,36 @@ struct module *btf_try_get_module(const struct btf *btf) return res; } +/* Returns struct btf corresponding to the struct module + * + * This function can return NULL or ERR_PTR. Note that caller must + * release reference for struct btf iff btf_is_module is true. + */ +static struct btf *btf_get_module_btf(const struct module *module) +{ + struct btf *btf = NULL; +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + struct btf_module *btf_mod, *tmp; +#endif + + if (!module) + return bpf_get_btf_vmlinux(); +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->module != module) + continue; + + btf_get(btf_mod->btf); + btf = btf_mod->btf; + break; + } + mutex_unlock(&btf_module_mutex); +#endif + + return btf; +} + BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags) { struct btf *btf; @@ -6416,53 +6576,181 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE) BTF_TRACING_TYPE_xxx #undef BTF_TRACING_TYPE -/* BTF ID set registration API for modules */ - -#ifdef CONFIG_DEBUG_INFO_BTF_MODULES +/* Kernel Function (kfunc) BTF ID set registration API */ -void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) +static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, + enum btf_kfunc_type type, + struct btf_id_set *add_set, bool vmlinux_set) { - mutex_lock(&l->mutex); - list_add(&s->list, &l->list); - mutex_unlock(&l->mutex); + struct btf_kfunc_set_tab *tab; + struct btf_id_set *set; + u32 set_cnt; + int ret; + + if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) { + ret = -EINVAL; + goto end; + } + + if (!add_set->cnt) + return 0; + + tab = btf->kfunc_set_tab; + if (!tab) { + tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN); + if (!tab) + return -ENOMEM; + btf->kfunc_set_tab = tab; + } + + set = tab->sets[hook][type]; + /* Warn when register_btf_kfunc_id_set is called twice for the same hook + * for module sets. + */ + if (WARN_ON_ONCE(set && !vmlinux_set)) { + ret = -EINVAL; + goto end; + } + + /* We don't need to allocate, concatenate, and sort module sets, because + * only one is allowed per hook. Hence, we can directly assign the + * pointer and return. + */ + if (!vmlinux_set) { + tab->sets[hook][type] = add_set; + return 0; + } + + /* In case of vmlinux sets, there may be more than one set being + * registered per hook. To create a unified set, we allocate a new set + * and concatenate all individual sets being registered. While each set + * is individually sorted, they may become unsorted when concatenated, + * hence re-sorting the final set again is required to make binary + * searching the set using btf_id_set_contains function work. + */ + set_cnt = set ? set->cnt : 0; + + if (set_cnt > U32_MAX - add_set->cnt) { + ret = -EOVERFLOW; + goto end; + } + + if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) { + ret = -E2BIG; + goto end; + } + + /* Grow set */ + set = krealloc(tab->sets[hook][type], + offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]), + GFP_KERNEL | __GFP_NOWARN); + if (!set) { + ret = -ENOMEM; + goto end; + } + + /* For newly allocated set, initialize set->cnt to 0 */ + if (!tab->sets[hook][type]) + set->cnt = 0; + tab->sets[hook][type] = set; + + /* Concatenate the two sets */ + memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0])); + set->cnt += add_set->cnt; + + sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL); + + return 0; +end: + btf_free_kfunc_set_tab(btf); + return ret; } -EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set); -void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) +static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, + const struct btf_kfunc_id_set *kset) { - mutex_lock(&l->mutex); - list_del_init(&s->list); - mutex_unlock(&l->mutex); + bool vmlinux_set = !btf_is_module(btf); + int type, ret; + + for (type = 0; type < ARRAY_SIZE(kset->sets); type++) { + if (!kset->sets[type]) + continue; + + ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set); + if (ret) + break; + } + return ret; } -EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set); -bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, - struct module *owner) +static bool __btf_kfunc_id_set_contains(const struct btf *btf, + enum btf_kfunc_hook hook, + enum btf_kfunc_type type, + u32 kfunc_btf_id) { - struct kfunc_btf_id_set *s; + struct btf_id_set *set; - mutex_lock(&klist->mutex); - list_for_each_entry(s, &klist->list, list) { - if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) { - mutex_unlock(&klist->mutex); - return true; - } + if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) + return false; + if (!btf->kfunc_set_tab) + return false; + set = btf->kfunc_set_tab->sets[hook][type]; + if (!set) + return false; + return btf_id_set_contains(set, kfunc_btf_id); +} + +static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) +{ + switch (prog_type) { + case BPF_PROG_TYPE_XDP: + return BTF_KFUNC_HOOK_XDP; + case BPF_PROG_TYPE_SCHED_CLS: + return BTF_KFUNC_HOOK_TC; + case BPF_PROG_TYPE_STRUCT_OPS: + return BTF_KFUNC_HOOK_STRUCT_OPS; + default: + return BTF_KFUNC_HOOK_MAX; } - mutex_unlock(&klist->mutex); - return false; } -#define DEFINE_KFUNC_BTF_ID_LIST(name) \ - struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \ - __MUTEX_INITIALIZER(name.mutex) }; \ - EXPORT_SYMBOL_GPL(name) +/* Caution: + * Reference to the module (obtained using btf_try_get_module) corresponding to + * the struct btf *MUST* be held when calling this function from verifier + * context. This is usually true as we stash references in prog's kfunc_btf_tab; + * keeping the reference for the duration of the call provides the necessary + * protection for looking up a well-formed btf->kfunc_set_tab. + */ +bool btf_kfunc_id_set_contains(const struct btf *btf, + enum bpf_prog_type prog_type, + enum btf_kfunc_type type, u32 kfunc_btf_id) +{ + enum btf_kfunc_hook hook; -DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list); -DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list); + hook = bpf_prog_type_to_kfunc_hook(prog_type); + return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id); +} -#endif +/* This function must be invoked only from initcalls/module init functions */ +int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, + const struct btf_kfunc_id_set *kset) +{ + enum btf_kfunc_hook hook; + struct btf *btf; + int ret; + + btf = btf_get_module_btf(kset->owner); + if (IS_ERR_OR_NULL(btf)) + return btf ? PTR_ERR(btf) : -ENOENT; + + hook = bpf_prog_type_to_kfunc_hook(prog_type); + ret = btf_populate_kfunc_set(btf, hook, kset); + /* reference is only taken for module BTF */ + if (btf_is_module(btf)) + btf_put(btf); + return ret; +} +EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set); int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 514b4681a90a..279ebbed75a5 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1044,7 +1044,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr * NET_XMIT_CN (2) - continue with packet output and notify TCP * to call cwr - * -EPERM - drop packet + * -err - drop packet * * For ingress packets, this function will return -EPERM if any * attached program was found and if it returned != 1 during execution. @@ -1079,8 +1079,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb); } else { ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb, - __bpf_prog_run_save_cb); - ret = (ret == 1 ? 0 : -EPERM); + __bpf_prog_run_save_cb, 0); + if (ret && !IS_ERR_VALUE((long)ret)) + ret = -EFAULT; } bpf_restore_data_end(skb, saved_data_end); __skb_pull(skb, offset); @@ -1107,10 +1108,9 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, enum cgroup_bpf_attach_type atype) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - int ret; - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run); - return ret == 1 ? 0 : -EPERM; + return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, + bpf_prog_run, 0); } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); @@ -1142,7 +1142,6 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, }; struct sockaddr_storage unspec; struct cgroup *cgrp; - int ret; /* Check socket family since not all sockets represent network * endpoint (e.g. AF_UNIX). @@ -1156,10 +1155,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, } cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx, - bpf_prog_run, flags); - - return ret == 1 ? 0 : -EPERM; + return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx, + bpf_prog_run, 0, flags); } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); @@ -1184,11 +1181,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, enum cgroup_bpf_attach_type atype) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - int ret; - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops, - bpf_prog_run); - return ret == 1 ? 0 : -EPERM; + return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops, + bpf_prog_run, 0); } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); @@ -1201,17 +1196,47 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, .major = major, .minor = minor, }; - int allow; + int ret; rcu_read_lock(); cgrp = task_dfl_cgroup(current); - allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, - bpf_prog_run); + ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, + bpf_prog_run, 0); rcu_read_unlock(); - return !allow; + return ret; } +BPF_CALL_0(bpf_get_retval) +{ + struct bpf_cg_run_ctx *ctx = + container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); + + return ctx->retval; +} + +static const struct bpf_func_proto bpf_get_retval_proto = { + .func = bpf_get_retval, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_1(bpf_set_retval, int, retval) +{ + struct bpf_cg_run_ctx *ctx = + container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); + + ctx->retval = retval; + return 0; +} + +static const struct bpf_func_proto bpf_set_retval_proto = { + .func = bpf_set_retval, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1224,6 +1249,10 @@ cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_current_cgroup_id_proto; case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; + case BPF_FUNC_get_retval: + return &bpf_get_retval_proto; + case BPF_FUNC_set_retval: + return &bpf_set_retval_proto; default: return bpf_base_func_proto(func_id); } @@ -1337,7 +1366,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, rcu_read_lock(); cgrp = task_dfl_cgroup(current); - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run); + ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, + bpf_prog_run, 0); rcu_read_unlock(); kfree(ctx.cur_val); @@ -1350,7 +1380,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, kfree(ctx.new_val); } - return ret == 1 ? 0 : -EPERM; + return ret; } #ifdef CONFIG_NET @@ -1452,13 +1482,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, lock_sock(sk); ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT], - &ctx, bpf_prog_run); + &ctx, bpf_prog_run, 0); release_sock(sk); - if (!ret) { - ret = -EPERM; + if (ret) goto out; - } if (ctx.optlen == -1) { /* optlen set to -1, bypass kernel */ @@ -1518,7 +1546,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, .sk = sk, .level = level, .optname = optname, - .retval = retval, + .current_task = current, }; int ret; @@ -1562,27 +1590,17 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, lock_sock(sk); ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], - &ctx, bpf_prog_run); + &ctx, bpf_prog_run, retval); release_sock(sk); - if (!ret) { - ret = -EPERM; + if (ret < 0) goto out; - } if (ctx.optlen > max_optlen || ctx.optlen < 0) { ret = -EFAULT; goto out; } - /* BPF programs only allowed to set retval to 0, not some - * arbitrary value. - */ - if (ctx.retval != 0 && ctx.retval != retval) { - ret = -EFAULT; - goto out; - } - if (ctx.optlen != 0) { if (copy_to_user(optval, ctx.optval, ctx.optlen) || put_user(ctx.optlen, optlen)) { @@ -1591,8 +1609,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, } } - ret = ctx.retval; - out: sockopt_free_buf(&ctx, &buf); return ret; @@ -1607,10 +1623,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, .sk = sk, .level = level, .optname = optname, - .retval = retval, .optlen = *optlen, .optval = optval, .optval_end = optval + *optlen, + .current_task = current, }; int ret; @@ -1623,25 +1639,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, */ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], - &ctx, bpf_prog_run); - if (!ret) - return -EPERM; + &ctx, bpf_prog_run, retval); + if (ret < 0) + return ret; if (ctx.optlen > *optlen) return -EFAULT; - /* BPF programs only allowed to set retval to 0, not some - * arbitrary value. - */ - if (ctx.retval != 0 && ctx.retval != retval) - return -EFAULT; - /* BPF programs can shrink the buffer, export the modifications. */ if (ctx.optlen != 0) *optlen = ctx.optlen; - return ctx.retval; + return ret; } #endif @@ -2057,10 +2067,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen); break; case offsetof(struct bpf_sockopt, retval): - if (type == BPF_WRITE) - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval); - else - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval); + BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0); + + if (type == BPF_WRITE) { + int treg = BPF_REG_9; + + if (si->src_reg == treg || si->dst_reg == treg) + --treg; + if (si->src_reg == treg || si->dst_reg == treg) + --treg; + *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg, + offsetof(struct bpf_sockopt_kern, tmp_reg)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task), + treg, si->dst_reg, + offsetof(struct bpf_sockopt_kern, current_task)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), + treg, treg, + offsetof(struct task_struct, bpf_ctx)); + *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), + treg, si->src_reg, + offsetof(struct bpf_cg_run_ctx, retval)); + *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg, + offsetof(struct bpf_sockopt_kern, tmp_reg)); + } else { + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sockopt_kern, current_task)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), + si->dst_reg, si->dst_reg, + offsetof(struct task_struct, bpf_ctx)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), + si->dst_reg, si->dst_reg, + offsetof(struct bpf_cg_run_ctx, retval)); + } break; case offsetof(struct bpf_sockopt, optval): *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index de3e5bc6781f..0a1cfd8544b9 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1829,28 +1829,30 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, } #endif -bool bpf_prog_array_compatible(struct bpf_array *array, - const struct bpf_prog *fp) +bool bpf_prog_map_compatible(struct bpf_map *map, + const struct bpf_prog *fp) { bool ret; if (fp->kprobe_override) return false; - spin_lock(&array->aux->owner.lock); - - if (!array->aux->owner.type) { + spin_lock(&map->owner.lock); + if (!map->owner.type) { /* There's no owner yet where we could check for * compatibility. */ - array->aux->owner.type = fp->type; - array->aux->owner.jited = fp->jited; + map->owner.type = fp->type; + map->owner.jited = fp->jited; + map->owner.xdp_has_frags = fp->aux->xdp_has_frags; ret = true; } else { - ret = array->aux->owner.type == fp->type && - array->aux->owner.jited == fp->jited; + ret = map->owner.type == fp->type && + map->owner.jited == fp->jited && + map->owner.xdp_has_frags == fp->aux->xdp_has_frags; } - spin_unlock(&array->aux->owner.lock); + spin_unlock(&map->owner.lock); + return ret; } @@ -1862,13 +1864,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) mutex_lock(&aux->used_maps_mutex); for (i = 0; i < aux->used_map_cnt; i++) { struct bpf_map *map = aux->used_maps[i]; - struct bpf_array *array; - if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + if (!map_type_contains_progs(map)) continue; - array = container_of(map, struct bpf_array, map); - if (!bpf_prog_array_compatible(array, fp)) { + if (!bpf_prog_map_compatible(map, fp)) { ret = -EINVAL; goto out; } diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index b3e6b9422238..650e5d21f90d 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -397,7 +397,8 @@ static int cpu_map_kthread_run(void *data) return 0; } -static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd) +static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, + struct bpf_map *map, int fd) { struct bpf_prog *prog; @@ -405,7 +406,8 @@ static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd) if (IS_ERR(prog)) return PTR_ERR(prog); - if (prog->expected_attach_type != BPF_XDP_CPUMAP) { + if (prog->expected_attach_type != BPF_XDP_CPUMAP || + !bpf_prog_map_compatible(map, prog)) { bpf_prog_put(prog); return -EINVAL; } @@ -457,7 +459,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, rcpu->map_id = map->id; rcpu->value.qsize = value->qsize; - if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd)) + if (fd > 0 && __cpu_map_load_bpf_program(rcpu, map, fd)) goto free_ptr_ring; /* Setup kthread */ diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index fe019dbdb3f0..038f6d7a83e4 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -858,7 +858,8 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, BPF_PROG_TYPE_XDP, false); if (IS_ERR(prog)) goto err_put_dev; - if (prog->expected_attach_type != BPF_XDP_DEVMAP) + if (prog->expected_attach_type != BPF_XDP_DEVMAP || + !bpf_prog_map_compatible(&dtab->map, prog)) goto err_put_prog; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index fa4505f9b611..72ce1edde950 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -556,16 +556,14 @@ static unsigned long bpf_map_memory_footprint(const struct bpf_map *map) static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) { - const struct bpf_map *map = filp->private_data; - const struct bpf_array *array; + struct bpf_map *map = filp->private_data; u32 type = 0, jited = 0; - if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { - array = container_of(map, struct bpf_array, map); - spin_lock(&array->aux->owner.lock); - type = array->aux->owner.type; - jited = array->aux->owner.jited; - spin_unlock(&array->aux->owner.lock); + if (map_type_contains_progs(map)) { + spin_lock(&map->owner.lock); + type = map->owner.type; + jited = map->owner.jited; + spin_unlock(&map->owner.lock); } seq_printf(m, @@ -874,6 +872,7 @@ static int map_create(union bpf_attr *attr) atomic64_set(&map->refcnt, 1); atomic64_set(&map->usercnt, 1); mutex_init(&map->freeze_mutex); + spin_lock_init(&map->owner.lock); map->spin_lock_off = -EINVAL; map->timer_off = -EINVAL; @@ -2217,7 +2216,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) BPF_F_ANY_ALIGNMENT | BPF_F_TEST_STATE_FREQ | BPF_F_SLEEPABLE | - BPF_F_TEST_RND_HI32)) + BPF_F_TEST_RND_HI32 | + BPF_F_XDP_HAS_FRAGS)) return -EINVAL; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && @@ -2303,6 +2303,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) prog->aux->dst_prog = dst_prog; prog->aux->offload_requested = !!attr->prog_ifindex; prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; + prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; err = security_bpf_prog_alloc(prog->aux); if (err) @@ -3318,6 +3319,11 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_FLOW_DISSECTOR: case BPF_SK_LOOKUP: return netns_bpf_prog_query(attr, uattr); + case BPF_SK_SKB_STREAM_PARSER: + case BPF_SK_SKB_STREAM_VERDICT: + case BPF_SK_MSG_VERDICT: + case BPF_SK_SKB_VERDICT: + return sock_map_bpf_prog_query(attr, uattr); default: return -EINVAL; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a39eedecc93a..dcf065ec2774 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -452,7 +452,8 @@ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) { return base_type(type) == PTR_TO_SOCKET || base_type(type) == PTR_TO_TCP_SOCK || - base_type(type) == PTR_TO_MEM; + base_type(type) == PTR_TO_MEM || + base_type(type) == PTR_TO_BTF_ID; } static bool type_is_rdonly_mem(u32 type) @@ -1743,7 +1744,7 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) } static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, - s16 offset, struct module **btf_modp) + s16 offset) { struct bpf_kfunc_btf kf_btf = { .offset = offset }; struct bpf_kfunc_btf_tab *tab; @@ -1797,8 +1798,6 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), kfunc_btf_cmp_by_off, NULL); } - if (btf_modp) - *btf_modp = b->module; return b->btf; } @@ -1815,8 +1814,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) } static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, - u32 func_id, s16 offset, - struct module **btf_modp) + u32 func_id, s16 offset) { if (offset) { if (offset < 0) { @@ -1827,7 +1825,7 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, return ERR_PTR(-EINVAL); } - return __find_kfunc_desc_btf(env, offset, btf_modp); + return __find_kfunc_desc_btf(env, offset); } return btf_vmlinux ?: ERR_PTR(-ENOENT); } @@ -1890,7 +1888,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) prog_aux->kfunc_btf_tab = btf_tab; } - desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL); + desc_btf = find_kfunc_desc_btf(env, func_id, offset); if (IS_ERR(desc_btf)) { verbose(env, "failed to find BTF for kernel function\n"); return PTR_ERR(desc_btf); @@ -2351,7 +2349,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) return NULL; - desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL); + desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off); if (IS_ERR(desc_btf)) return "<error>"; @@ -3498,11 +3496,6 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, #define MAX_PACKET_OFF 0xffff -static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) -{ - return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; -} - static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta, enum bpf_access_type t) @@ -4877,6 +4870,62 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, } } +static int check_mem_size_reg(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno, + bool zero_size_allowed, + struct bpf_call_arg_meta *meta) +{ + int err; + + /* This is used to refine r0 return value bounds for helpers + * that enforce this value as an upper bound on return values. + * See do_refine_retval_range() for helpers that can refine + * the return value. C type of helper is u32 so we pull register + * bound from umax_value however, if negative verifier errors + * out. Only upper bounds can be learned because retval is an + * int type and negative retvals are allowed. + */ + if (meta) + meta->msize_max_value = reg->umax_value; + + /* The register is SCALAR_VALUE; the access check + * happens using its boundaries. + */ + if (!tnum_is_const(reg->var_off)) + /* For unprivileged variable accesses, disable raw + * mode so that the program is required to + * initialize all the memory that the helper could + * just partially fill up. + */ + meta = NULL; + + if (reg->smin_value < 0) { + verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", + regno); + return -EACCES; + } + + if (reg->umin_value == 0) { + err = check_helper_mem_access(env, regno - 1, 0, + zero_size_allowed, + meta); + if (err) + return err; + } + + if (reg->umax_value >= BPF_MAX_VAR_SIZ) { + verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", + regno); + return -EACCES; + } + err = check_helper_mem_access(env, regno - 1, + reg->umax_value, + zero_size_allowed, meta); + if (!err) + err = mark_chain_precision(env, regno); + return err; +} + int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size) { @@ -4900,6 +4949,28 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, return check_helper_mem_access(env, regno, mem_size, true, NULL); } +int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + u32 regno) +{ + struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1]; + bool may_be_null = type_may_be_null(mem_reg->type); + struct bpf_reg_state saved_reg; + int err; + + WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5); + + if (may_be_null) { + saved_reg = *mem_reg; + mark_ptr_not_null_reg(mem_reg); + } + + err = check_mem_size_reg(env, reg, regno, true, NULL); + + if (may_be_null) + *mem_reg = saved_reg; + return err; +} + /* Implementation details: * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL * Two bpf_map_lookups (even with the same key) will have different reg->id. @@ -5439,51 +5510,7 @@ skip_type_check: } else if (arg_type_is_mem_size(arg_type)) { bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); - /* This is used to refine r0 return value bounds for helpers - * that enforce this value as an upper bound on return values. - * See do_refine_retval_range() for helpers that can refine - * the return value. C type of helper is u32 so we pull register - * bound from umax_value however, if negative verifier errors - * out. Only upper bounds can be learned because retval is an - * int type and negative retvals are allowed. - */ - meta->msize_max_value = reg->umax_value; - - /* The register is SCALAR_VALUE; the access check - * happens using its boundaries. - */ - if (!tnum_is_const(reg->var_off)) - /* For unprivileged variable accesses, disable raw - * mode so that the program is required to - * initialize all the memory that the helper could - * just partially fill up. - */ - meta = NULL; - - if (reg->smin_value < 0) { - verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", - regno); - return -EACCES; - } - - if (reg->umin_value == 0) { - err = check_helper_mem_access(env, regno - 1, 0, - zero_size_allowed, - meta); - if (err) - return err; - } - - if (reg->umax_value >= BPF_MAX_VAR_SIZ) { - verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", - regno); - return -EACCES; - } - err = check_helper_mem_access(env, regno - 1, - reg->umax_value, - zero_size_allowed, meta); - if (!err) - err = mark_chain_precision(env, regno); + err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta); } else if (arg_type_is_alloc_size(arg_type)) { if (!tnum_is_const(reg->var_off)) { verbose(env, "R%d is not a known constant'\n", @@ -6842,22 +6869,23 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, } } -static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) +static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx_p) { const struct btf_type *t, *func, *func_proto, *ptr_type; struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; u32 i, nargs, func_id, ptr_type_id; - struct module *btf_mod = NULL; + int err, insn_idx = *insn_idx_p; const struct btf_param *args; struct btf *desc_btf; - int err; + bool acq; /* skip for now, but return error when we find this in fixup_kfunc_call */ if (!insn->imm) return 0; - desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod); + desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off); if (IS_ERR(desc_btf)) return PTR_ERR(desc_btf); @@ -6866,23 +6894,43 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) func_name = btf_name_by_offset(desc_btf, func->name_off); func_proto = btf_type_by_id(desc_btf, func->type); - if (!env->ops->check_kfunc_call || - !env->ops->check_kfunc_call(func_id, btf_mod)) { + if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_CHECK, func_id)) { verbose(env, "calling kernel function %s is not allowed\n", func_name); return -EACCES; } + acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_ACQUIRE, func_id); + /* Check the arguments */ err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs); - if (err) + if (err < 0) return err; + /* In case of release function, we get register number of refcounted + * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now + */ + if (err) { + err = release_reference(env, regs[err].ref_obj_id); + if (err) { + verbose(env, "kfunc %s#%d reference has not been acquired before\n", + func_name, func_id); + return err; + } + } for (i = 0; i < CALLER_SAVED_REGS; i++) mark_reg_not_init(env, regs, caller_saved[i]); /* Check return type */ t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); + + if (acq && !btf_type_is_ptr(t)) { + verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); + return -EINVAL; + } + if (btf_type_is_scalar(t)) { mark_reg_unknown(env, regs, BPF_REG_0); mark_btf_func_reg_size(env, BPF_REG_0, t->size); @@ -6901,7 +6949,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) regs[BPF_REG_0].btf = desc_btf; regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; + if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_RET_NULL, func_id)) { + regs[BPF_REG_0].type |= PTR_MAYBE_NULL; + /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */ + regs[BPF_REG_0].id = ++env->id_gen; + } mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); + if (acq) { + int id = acquire_reference_state(env, insn_idx); + + if (id < 0) + return id; + regs[BPF_REG_0].id = id; + regs[BPF_REG_0].ref_obj_id = id; + } } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ nargs = btf_type_vlen(func_proto); @@ -11549,7 +11611,7 @@ static int do_check(struct bpf_verifier_env *env) if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) - err = check_kfunc_call(env, insn); + err = check_kfunc_call(env, insn, &env->insn_idx); else err = check_helper_call(env, insn, &env->insn_idx); if (err) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 21aa30644219..06a9e220069e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1562,6 +1562,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { extern const struct bpf_func_proto bpf_skb_output_proto; extern const struct bpf_func_proto bpf_xdp_output_proto; +extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto; BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags) @@ -1661,6 +1662,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sock_from_file_proto; case BPF_FUNC_get_socket_cookie: return &bpf_get_socket_ptr_cookie_proto; + case BPF_FUNC_xdp_get_buff_len: + return &bpf_xdp_get_buff_len_trace_proto; #endif case BPF_FUNC_seq_printf: return prog->expected_attach_type == BPF_TRACE_ITER ? diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 9751207f7757..b7c4d656a94b 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -116,7 +116,6 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) return -ENOMEM; } - refcount_set(&ax25_rt->refcount, 1); ax25_rt->callsign = route->dest_addr; ax25_rt->dev = ax25_dev->dev; ax25_rt->digipeat = NULL; @@ -167,12 +166,12 @@ static int ax25_rt_del(struct ax25_routes_struct *route) ax25cmp(&route->dest_addr, &s->callsign) == 0) { if (ax25_route_list == s) { ax25_route_list = s->next; - ax25_put_route(s); + __ax25_put_route(s); } else { for (t = ax25_route_list; t != NULL; t = t->next) { if (t->next == s) { t->next = s->next; - ax25_put_route(s); + __ax25_put_route(s); break; } } diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 04ebe901e86f..d10651108033 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -689,6 +689,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, bacpy(&conn->dst, dst); bacpy(&conn->src, &hdev->bdaddr); + conn->handle = HCI_CONN_HANDLE_UNSET; conn->hdev = hdev; conn->type = type; conn->role = role; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2b7bd3655b07..5bde0ec41177 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2503,6 +2503,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) INIT_LIST_HEAD(&hdev->conn_hash.list); INIT_LIST_HEAD(&hdev->adv_instances); INIT_LIST_HEAD(&hdev->blocked_keys); + INIT_LIST_HEAD(&hdev->monitored_devices); INIT_LIST_HEAD(&hdev->local_codecs); INIT_WORK(&hdev->rx_work, hci_rx_work); @@ -3666,8 +3667,8 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) sco_recv_scodata(conn, skb); return; } else { - bt_dev_err(hdev, "SCO packet for unknown connection handle %d", - handle); + bt_dev_err_ratelimited(hdev, "SCO packet for unknown connection handle %d", + handle); } kfree_skb(skb); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index fc30f4c03d29..63b925921c87 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3068,6 +3068,11 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, struct hci_ev_conn_complete *ev = data; struct hci_conn *conn; + if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for invalid handle"); + return; + } + bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); hci_dev_lock(hdev); @@ -3106,6 +3111,17 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, } } + /* The HCI_Connection_Complete event is only sent once per connection. + * Processing it more than once per connection can corrupt kernel memory. + * + * As the connection handle is set here for the first time, it indicates + * whether the connection is already set up. + */ + if (conn->handle != HCI_CONN_HANDLE_UNSET) { + bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection"); + goto unlock; + } + if (!ev->status) { conn->handle = __le16_to_cpu(ev->handle); @@ -4534,7 +4550,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, if (!info) { bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", HCI_EV_INQUIRY_RESULT_WITH_RSSI); - return; + goto unlock; } bacpy(&data.bdaddr, &info->bdaddr); @@ -4565,7 +4581,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, if (!info) { bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", HCI_EV_INQUIRY_RESULT_WITH_RSSI); - return; + goto unlock; } bacpy(&data.bdaddr, &info->bdaddr); @@ -4587,7 +4603,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", HCI_EV_INQUIRY_RESULT_WITH_RSSI); } - +unlock: hci_dev_unlock(hdev); } @@ -4661,6 +4677,24 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, struct hci_ev_sync_conn_complete *ev = data; struct hci_conn *conn; + switch (ev->link_type) { + case SCO_LINK: + case ESCO_LINK: + break; + default: + /* As per Core 5.3 Vol 4 Part E 7.7.35 (p.2219), Link_Type + * for HCI_Synchronous_Connection_Complete is limited to + * either SCO or eSCO + */ + bt_dev_err(hdev, "Ignoring connect complete event for invalid link type"); + return; + } + + if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete for invalid handle"); + return; + } + bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); hci_dev_lock(hdev); @@ -4684,23 +4718,19 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, goto unlock; } + /* The HCI_Synchronous_Connection_Complete event is only sent once per connection. + * Processing it more than once per connection can corrupt kernel memory. + * + * As the connection handle is set here for the first time, it indicates + * whether the connection is already set up. + */ + if (conn->handle != HCI_CONN_HANDLE_UNSET) { + bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete event for existing connection"); + goto unlock; + } + switch (ev->status) { case 0x00: - /* The synchronous connection complete event should only be - * sent once per new connection. Receiving a successful - * complete event when the connection status is already - * BT_CONNECTED means that the device is misbehaving and sent - * multiple complete event packets for the same new connection. - * - * Registering the device more than once can corrupt kernel - * memory, hence upon detecting this invalid event, we report - * an error and ignore the packet. - */ - if (conn->state == BT_CONNECTED) { - bt_dev_err(hdev, "Ignoring connect complete event for existing connection"); - goto unlock; - } - conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; conn->type = ev->link_type; @@ -5496,6 +5526,11 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, struct smp_irk *irk; u8 addr_type; + if (handle > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Ignoring HCI_LE_Connection_Complete for invalid handle"); + return; + } + hci_dev_lock(hdev); /* All controllers implicitly stop advertising in the event of a @@ -5537,6 +5572,17 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, cancel_delayed_work(&conn->le_conn_timeout); } + /* The HCI_LE_Connection_Complete event is only sent once per connection. + * Processing it more than once per connection can corrupt kernel memory. + * + * As the connection handle is set here for the first time, it indicates + * whether the connection is already set up. + */ + if (conn->handle != HCI_CONN_HANDLE_UNSET) { + bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection"); + goto unlock; + } + le_conn_update_addr(conn, bdaddr, bdaddr_type, local_rpa); /* Lookup the identity address from the stored connection @@ -6798,7 +6844,7 @@ static const struct hci_ev { HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt, sizeof(struct hci_ev_num_comp_blocks)), /* [0xff = HCI_EV_VENDOR] */ - HCI_EV(HCI_EV_VENDOR, msft_vendor_evt, 0), + HCI_EV_VL(HCI_EV_VENDOR, msft_vendor_evt, 0, HCI_MAX_EVENT_SIZE), }; static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb, @@ -6823,8 +6869,9 @@ static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb, * decide if that is acceptable. */ if (skb->len > ev->max_len) - bt_dev_warn(hdev, "unexpected event 0x%2.2x length: %u > %u", - event, skb->len, ev->max_len); + bt_dev_warn_ratelimited(hdev, + "unexpected event 0x%2.2x length: %u > %u", + event, skb->len, ev->max_len); data = hci_ev_skb_pull(hdev, skb, event, ev->min_len); if (!data) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 0feb68f12545..6e71aa6b6fea 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -382,6 +382,9 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, { struct hci_cmd_sync_work_entry *entry; + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + return -ENODEV; + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; @@ -5140,8 +5143,8 @@ static void set_ext_conn_params(struct hci_conn *conn, p->max_ce_len = cpu_to_le16(0x0000); } -int hci_le_ext_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, - u8 own_addr_type) +static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, + struct hci_conn *conn, u8 own_addr_type) { struct hci_cp_le_ext_create_conn *cp; struct hci_cp_le_ext_conn_param *p; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 37087cf7dc5a..5dd684e0b259 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -42,7 +42,7 @@ #include "aosp.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 21 +#define MGMT_REVISION 22 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -174,6 +174,8 @@ static const u16 mgmt_events[] = { MGMT_EV_ADV_MONITOR_REMOVED, MGMT_EV_CONTROLLER_SUSPEND, MGMT_EV_CONTROLLER_RESUME, + MGMT_EV_ADV_MONITOR_DEVICE_FOUND, + MGMT_EV_ADV_MONITOR_DEVICE_LOST, }; static const u16 mgmt_untrusted_commands[] = { @@ -9589,12 +9591,116 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir, return true; } +void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, + bdaddr_t *bdaddr, u8 addr_type) +{ + struct mgmt_ev_adv_monitor_device_lost ev; + + ev.monitor_handle = cpu_to_le16(handle); + bacpy(&ev.addr.bdaddr, bdaddr); + ev.addr.type = addr_type; + + mgmt_event(MGMT_EV_ADV_MONITOR_DEVICE_LOST, hdev, &ev, sizeof(ev), + NULL); +} + +static void mgmt_adv_monitor_device_found(struct hci_dev *hdev, + bdaddr_t *bdaddr, bool report_device, + struct sk_buff *skb, + struct sock *skip_sk) +{ + struct sk_buff *advmon_skb; + size_t advmon_skb_len; + __le16 *monitor_handle; + struct monitored_device *dev, *tmp; + bool matched = false; + bool notify = false; + + /* We have received the Advertisement Report because: + * 1. the kernel has initiated active discovery + * 2. if not, we have pend_le_reports > 0 in which case we are doing + * passive scanning + * 3. if none of the above is true, we have one or more active + * Advertisement Monitor + * + * For case 1 and 2, report all advertisements via MGMT_EV_DEVICE_FOUND + * and report ONLY one advertisement per device for the matched Monitor + * via MGMT_EV_ADV_MONITOR_DEVICE_FOUND event. + * + * For case 3, since we are not active scanning and all advertisements + * received are due to a matched Advertisement Monitor, report all + * advertisements ONLY via MGMT_EV_ADV_MONITOR_DEVICE_FOUND event. + */ + if (report_device && !hdev->advmon_pend_notify) { + mgmt_event_skb(skb, skip_sk); + return; + } + + advmon_skb_len = (sizeof(struct mgmt_ev_adv_monitor_device_found) - + sizeof(struct mgmt_ev_device_found)) + skb->len; + advmon_skb = mgmt_alloc_skb(hdev, MGMT_EV_ADV_MONITOR_DEVICE_FOUND, + advmon_skb_len); + if (!advmon_skb) { + if (report_device) + mgmt_event_skb(skb, skip_sk); + else + kfree_skb(skb); + return; + } + + /* ADV_MONITOR_DEVICE_FOUND is similar to DEVICE_FOUND event except + * that it also has 'monitor_handle'. Make a copy of DEVICE_FOUND and + * store monitor_handle of the matched monitor. + */ + monitor_handle = skb_put(advmon_skb, sizeof(*monitor_handle)); + skb_put_data(advmon_skb, skb->data, skb->len); + + hdev->advmon_pend_notify = false; + + list_for_each_entry_safe(dev, tmp, &hdev->monitored_devices, list) { + if (!bacmp(&dev->bdaddr, bdaddr)) { + matched = true; + + if (!dev->notified) { + *monitor_handle = cpu_to_le16(dev->handle); + notify = true; + dev->notified = true; + } + } + + if (!dev->notified) + hdev->advmon_pend_notify = true; + } + + if (!report_device && + ((matched && !notify) || !msft_monitor_supported(hdev))) { + /* Handle 0 indicates that we are not active scanning and this + * is a subsequent advertisement report for an already matched + * Advertisement Monitor or the controller offloading support + * is not available. + */ + *monitor_handle = 0; + notify = true; + } + + if (report_device) + mgmt_event_skb(skb, skip_sk); + else + kfree_skb(skb); + + if (notify) + mgmt_event_skb(advmon_skb, skip_sk); + else + kfree_skb(advmon_skb); +} + void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) { struct sk_buff *skb; struct mgmt_ev_device_found *ev; + bool report_device = hci_discovery_active(hdev); /* Don't send events for a non-kernel initiated discovery. With * LE one exception is if we have pend_le_reports > 0 in which @@ -9603,11 +9709,10 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, if (!hci_discovery_active(hdev)) { if (link_type == ACL_LINK) return; - if (link_type == LE_LINK && - list_empty(&hdev->pend_le_reports) && - !hci_is_adv_monitoring(hdev)) { + if (link_type == LE_LINK && !list_empty(&hdev->pend_le_reports)) + report_device = true; + else if (!hci_is_adv_monitoring(hdev)) return; - } } if (hdev->discovery.result_filtering) { @@ -9672,7 +9777,7 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); - mgmt_event_skb(skb, NULL); + mgmt_adv_monitor_device_found(hdev, bdaddr, report_device, skb, NULL); } void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index 6a943634b31a..9a3d77d3ca86 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -80,6 +80,14 @@ struct msft_rp_le_set_advertisement_filter_enable { __u8 sub_opcode; } __packed; +#define MSFT_EV_LE_MONITOR_DEVICE 0x02 +struct msft_ev_le_monitor_device { + __u8 addr_type; + bdaddr_t bdaddr; + __u8 monitor_handle; + __u8 monitor_state; +} __packed; + struct msft_monitor_advertisement_handle_data { __u8 msft_handle; __u16 mgmt_handle; @@ -204,6 +212,37 @@ static struct msft_monitor_advertisement_handle_data *msft_find_handle_data return NULL; } +/* This function requires the caller holds hdev->lock */ +static int msft_monitor_device_del(struct hci_dev *hdev, __u16 mgmt_handle, + bdaddr_t *bdaddr, __u8 addr_type, + bool notify) +{ + struct monitored_device *dev, *tmp; + int count = 0; + + list_for_each_entry_safe(dev, tmp, &hdev->monitored_devices, list) { + /* mgmt_handle == 0 indicates remove all devices, whereas, + * bdaddr == NULL indicates remove all devices matching the + * mgmt_handle. + */ + if ((!mgmt_handle || dev->handle == mgmt_handle) && + (!bdaddr || (!bacmp(bdaddr, &dev->bdaddr) && + addr_type == dev->addr_type))) { + if (notify && dev->notified) { + mgmt_adv_monitor_device_lost(hdev, dev->handle, + &dev->bdaddr, + dev->addr_type); + } + + list_del(&dev->list); + kfree(dev); + count++; + } + } + + return count; +} + static void msft_le_monitor_advertisement_cb(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) @@ -294,6 +333,10 @@ static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev, if (monitor && !msft->suspending) hci_free_adv_monitor(hdev, monitor); + /* Clear any monitored devices by this Adv Monitor */ + msft_monitor_device_del(hdev, handle_data->mgmt_handle, NULL, + 0, false); + list_del(&handle_data->list); kfree(handle_data); } @@ -557,6 +600,14 @@ void msft_do_close(struct hci_dev *hdev) list_del(&handle_data->list); kfree(handle_data); } + + hci_dev_lock(hdev); + + /* Clear any devices that are being monitored and notify device lost */ + hdev->advmon_pend_notify = false; + msft_monitor_device_del(hdev, 0, NULL, 0, true); + + hci_dev_unlock(hdev); } void msft_register(struct hci_dev *hdev) @@ -590,10 +641,101 @@ void msft_unregister(struct hci_dev *hdev) kfree(msft); } +/* This function requires the caller holds hdev->lock */ +static void msft_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, + __u8 addr_type, __u16 mgmt_handle) +{ + struct monitored_device *dev; + + dev = kmalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) { + bt_dev_err(hdev, "MSFT vendor event %u: no memory", + MSFT_EV_LE_MONITOR_DEVICE); + return; + } + + bacpy(&dev->bdaddr, bdaddr); + dev->addr_type = addr_type; + dev->handle = mgmt_handle; + dev->notified = false; + + INIT_LIST_HEAD(&dev->list); + list_add(&dev->list, &hdev->monitored_devices); + hdev->advmon_pend_notify = true; +} + +/* This function requires the caller holds hdev->lock */ +static void msft_device_lost(struct hci_dev *hdev, bdaddr_t *bdaddr, + __u8 addr_type, __u16 mgmt_handle) +{ + if (!msft_monitor_device_del(hdev, mgmt_handle, bdaddr, addr_type, + true)) { + bt_dev_err(hdev, "MSFT vendor event %u: dev %pMR not in list", + MSFT_EV_LE_MONITOR_DEVICE, bdaddr); + } +} + +static void *msft_skb_pull(struct hci_dev *hdev, struct sk_buff *skb, + u8 ev, size_t len) +{ + void *data; + + data = skb_pull_data(skb, len); + if (!data) + bt_dev_err(hdev, "Malformed MSFT vendor event: 0x%02x", ev); + + return data; +} + +/* This function requires the caller holds hdev->lock */ +static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct msft_ev_le_monitor_device *ev; + struct msft_monitor_advertisement_handle_data *handle_data; + u8 addr_type; + + ev = msft_skb_pull(hdev, skb, MSFT_EV_LE_MONITOR_DEVICE, sizeof(*ev)); + if (!ev) + return; + + bt_dev_dbg(hdev, + "MSFT vendor event 0x%02x: handle 0x%04x state %d addr %pMR", + MSFT_EV_LE_MONITOR_DEVICE, ev->monitor_handle, + ev->monitor_state, &ev->bdaddr); + + handle_data = msft_find_handle_data(hdev, ev->monitor_handle, false); + if (!handle_data) + return; + + switch (ev->addr_type) { + case ADDR_LE_DEV_PUBLIC: + addr_type = BDADDR_LE_PUBLIC; + break; + + case ADDR_LE_DEV_RANDOM: + addr_type = BDADDR_LE_RANDOM; + break; + + default: + bt_dev_err(hdev, + "MSFT vendor event 0x%02x: unknown addr type 0x%02x", + MSFT_EV_LE_MONITOR_DEVICE, ev->addr_type); + return; + } + + if (ev->monitor_state) + msft_device_found(hdev, &ev->bdaddr, addr_type, + handle_data->mgmt_handle); + else + msft_device_lost(hdev, &ev->bdaddr, addr_type, + handle_data->mgmt_handle); +} + void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { struct msft_data *msft = hdev->msft_data; - u8 event; + u8 *evt_prefix; + u8 *evt; if (!msft) return; @@ -602,13 +744,12 @@ void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) * matches, and otherwise just return. */ if (msft->evt_prefix_len > 0) { - if (skb->len < msft->evt_prefix_len) + evt_prefix = msft_skb_pull(hdev, skb, 0, msft->evt_prefix_len); + if (!evt_prefix) return; - if (memcmp(skb->data, msft->evt_prefix, msft->evt_prefix_len)) + if (memcmp(evt_prefix, msft->evt_prefix, msft->evt_prefix_len)) return; - - skb_pull(skb, msft->evt_prefix_len); } /* Every event starts at least with an event code and the rest of @@ -617,10 +758,23 @@ void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) if (skb->len < 1) return; - event = *skb->data; - skb_pull(skb, 1); + evt = msft_skb_pull(hdev, skb, 0, sizeof(*evt)); + if (!evt) + return; + + hci_dev_lock(hdev); + + switch (*evt) { + case MSFT_EV_LE_MONITOR_DEVICE: + msft_monitor_device_evt(hdev, skb); + break; - bt_dev_dbg(hdev, "MSFT vendor event %u", event); + default: + bt_dev_dbg(hdev, "MSFT vendor event 0x%02x", *evt); + break; + } + + hci_dev_unlock(hdev); } __u64 msft_get_features(struct hci_dev *hdev) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 46dd95755967..65b52b4bd6e1 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -5,6 +5,7 @@ #include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/slab.h> +#include <linux/init.h> #include <linux/vmalloc.h> #include <linux/etherdevice.h> #include <linux/filter.h> @@ -130,7 +131,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, static int bpf_test_finish(const union bpf_attr *kattr, union bpf_attr __user *uattr, const void *data, - u32 size, u32 retval, u32 duration) + struct skb_shared_info *sinfo, u32 size, + u32 retval, u32 duration) { void __user *data_out = u64_to_user_ptr(kattr->test.data_out); int err = -EFAULT; @@ -145,8 +147,36 @@ static int bpf_test_finish(const union bpf_attr *kattr, err = -ENOSPC; } - if (data_out && copy_to_user(data_out, data, copy_size)) - goto out; + if (data_out) { + int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size; + + if (copy_to_user(data_out, data, len)) + goto out; + + if (sinfo) { + int i, offset = len, data_len; + + for (i = 0; i < sinfo->nr_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + + if (offset >= copy_size) { + err = -ENOSPC; + break; + } + + data_len = min_t(int, copy_size - offset, + skb_frag_size(frag)); + + if (copy_to_user(data_out + offset, + skb_frag_address(frag), + data_len)) + goto out; + + offset += data_len; + } + } + } + if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size))) goto out; if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) @@ -171,6 +201,8 @@ int noinline bpf_fentry_test1(int a) { return a + 1; } +EXPORT_SYMBOL_GPL(bpf_fentry_test1); +ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO); int noinline bpf_fentry_test2(int a, u64 b) { @@ -232,28 +264,142 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk) return sk; } +struct prog_test_ref_kfunc { + int a; + int b; + struct prog_test_ref_kfunc *next; +}; + +static struct prog_test_ref_kfunc prog_test_struct = { + .a = 42, + .b = 108, + .next = &prog_test_struct, +}; + +noinline struct prog_test_ref_kfunc * +bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) +{ + /* randomly return NULL */ + if (get_jiffies_64() % 2) + return NULL; + return &prog_test_struct; +} + +noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) +{ +} + +struct prog_test_pass1 { + int x0; + struct { + int x1; + struct { + int x2; + struct { + int x3; + }; + }; + }; +}; + +struct prog_test_pass2 { + int len; + short arr1[4]; + struct { + char arr2[4]; + unsigned long arr3[8]; + } x; +}; + +struct prog_test_fail1 { + void *p; + int x; +}; + +struct prog_test_fail2 { + int x8; + struct prog_test_pass1 x; +}; + +struct prog_test_fail3 { + int len; + char arr1[2]; + char arr2[]; +}; + +noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) +{ +} + +noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) +{ +} + +noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) +{ +} + +noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) +{ +} + +noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) +{ +} + +noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) +{ +} + +noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) +{ +} + +noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) +{ +} + +noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) +{ +} + __diag_pop(); ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); -BTF_SET_START(test_sk_kfunc_ids) +BTF_SET_START(test_sk_check_kfunc_ids) BTF_ID(func, bpf_kfunc_call_test1) BTF_ID(func, bpf_kfunc_call_test2) BTF_ID(func, bpf_kfunc_call_test3) -BTF_SET_END(test_sk_kfunc_ids) - -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner) -{ - if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id)) - return true; - return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner); -} - -static void *bpf_test_init(const union bpf_attr *kattr, u32 size, - u32 headroom, u32 tailroom) +BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_ID(func, bpf_kfunc_call_test_release) +BTF_ID(func, bpf_kfunc_call_test_pass_ctx) +BTF_ID(func, bpf_kfunc_call_test_pass1) +BTF_ID(func, bpf_kfunc_call_test_pass2) +BTF_ID(func, bpf_kfunc_call_test_fail1) +BTF_ID(func, bpf_kfunc_call_test_fail2) +BTF_ID(func, bpf_kfunc_call_test_fail3) +BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1) +BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1) +BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2) +BTF_SET_END(test_sk_check_kfunc_ids) + +BTF_SET_START(test_sk_acquire_kfunc_ids) +BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_SET_END(test_sk_acquire_kfunc_ids) + +BTF_SET_START(test_sk_release_kfunc_ids) +BTF_ID(func, bpf_kfunc_call_test_release) +BTF_SET_END(test_sk_release_kfunc_ids) + +BTF_SET_START(test_sk_ret_null_kfunc_ids) +BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_SET_END(test_sk_ret_null_kfunc_ids) + +static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, + u32 size, u32 headroom, u32 tailroom) { void __user *data_in = u64_to_user_ptr(kattr->test.data_in); - u32 user_size = kattr->test.data_size_in; void *data; if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom) @@ -581,7 +727,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (kattr->test.flags || kattr->test.cpu) return -EINVAL; - data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, + data = bpf_test_init(kattr, kattr->test.data_size_in, + size, NET_SKB_PAD + NET_IP_ALIGN, SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); if (IS_ERR(data)) return PTR_ERR(data); @@ -683,7 +830,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, /* bpf program can never convert linear skb to non-linear */ if (WARN_ON_ONCE(skb_is_nonlinear(skb))) size = skb_headlen(skb); - ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); + ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval, + duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, ctx, sizeof(struct __sk_buff)); @@ -758,16 +906,16 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - u32 headroom = XDP_PACKET_HEADROOM; u32 size = kattr->test.data_size_in; + u32 headroom = XDP_PACKET_HEADROOM; + u32 retval, duration, max_data_sz; u32 repeat = kattr->test.repeat; struct netdev_rx_queue *rxqueue; + struct skb_shared_info *sinfo; struct xdp_buff xdp = {}; - u32 retval, duration; + int i, ret = -EINVAL; struct xdp_md *ctx; - u32 max_data_sz; void *data; - int ret = -EINVAL; if (prog->expected_attach_type == BPF_XDP_DEVMAP || prog->expected_attach_type == BPF_XDP_CPUMAP) @@ -787,26 +935,60 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, headroom -= ctx->data; } - /* XDP have extra tailroom as (most) drivers use full page */ max_data_sz = 4096 - headroom - tailroom; + size = min_t(u32, size, max_data_sz); - data = bpf_test_init(kattr, max_data_sz, headroom, tailroom); + data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom); if (IS_ERR(data)) { ret = PTR_ERR(data); goto free_ctx; } rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); - xdp_init_buff(&xdp, headroom + max_data_sz + tailroom, - &rxqueue->xdp_rxq); + rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom; + xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq); xdp_prepare_buff(&xdp, data, headroom, size, true); + sinfo = xdp_get_shared_info_from_buff(&xdp); ret = xdp_convert_md_to_buff(ctx, &xdp); if (ret) goto free_data; + if (unlikely(kattr->test.data_size_in > size)) { + void __user *data_in = u64_to_user_ptr(kattr->test.data_in); + + while (size < kattr->test.data_size_in) { + struct page *page; + skb_frag_t *frag; + int data_len; + + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto out; + } + + frag = &sinfo->frags[sinfo->nr_frags++]; + __skb_frag_set_page(frag, page); + + data_len = min_t(int, kattr->test.data_size_in - size, + PAGE_SIZE); + skb_frag_size_set(frag, data_len); + + if (copy_from_user(page_address(page), data_in + size, + data_len)) { + ret = -EFAULT; + goto out; + } + sinfo->xdp_frags_size += data_len; + size += data_len; + } + xdp_buff_set_frags_flag(&xdp); + } + if (repeat > 1) bpf_prog_change_xdp(NULL, prog); + ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); /* We convert the xdp_buff back to an xdp_md before checking the return * code so the reference count of any held netdevice will be decremented @@ -816,12 +998,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, if (ret) goto out; - if (xdp.data_meta != data + headroom || - xdp.data_end != xdp.data_meta + size) - size = xdp.data_end - xdp.data_meta; - - ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval, - duration); + size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size; + ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, + retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, ctx, sizeof(struct xdp_md)); @@ -830,6 +1009,8 @@ out: if (repeat > 1) bpf_prog_change_xdp(prog, NULL); free_data: + for (i = 0; i < sinfo->nr_frags; i++) + __free_page(skb_frag_page(&sinfo->frags[i])); kfree(data); free_ctx: kfree(ctx); @@ -876,7 +1057,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (size < ETH_HLEN) return -EINVAL; - data = bpf_test_init(kattr, size, 0, 0); + data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0); if (IS_ERR(data)) return PTR_ERR(data); @@ -911,8 +1092,8 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (ret < 0) goto out; - ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), - retval, duration); + ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL, + sizeof(flow_keys), retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(struct bpf_flow_keys)); @@ -1016,7 +1197,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat user_ctx->cookie = sock_gen_cookie(ctx.selected_sk); } - ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration); + ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx)); @@ -1067,3 +1248,17 @@ out: kfree(ctx); return err; } + +static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &test_sk_check_kfunc_ids, + .acquire_set = &test_sk_acquire_kfunc_ids, + .release_set = &test_sk_release_kfunc_ids, + .ret_null_set = &test_sk_ret_null_kfunc_ids, +}; + +static int __init bpf_prog_test_run_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); +} +late_initcall(bpf_prog_test_run_init); diff --git a/net/core/filter.c b/net/core/filter.c index 4603b7cd3cd1..9615ae1ab530 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3783,6 +3783,28 @@ static const struct bpf_func_proto sk_skb_change_head_proto = { .arg2_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING, }; + +BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp) +{ + return xdp_get_buff_len(xdp); +} + +static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = { + .func = bpf_xdp_get_buff_len, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + +BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff) + +const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = { + .func = bpf_xdp_get_buff_len, + .gpl_only = false, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0], +}; + static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) { return xdp_data_meta_unsupported(xdp) ? 0 : @@ -3817,11 +3839,208 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; +static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, + void *buf, unsigned long len, bool flush) +{ + unsigned long ptr_len, ptr_off = 0; + skb_frag_t *next_frag, *end_frag; + struct skb_shared_info *sinfo; + void *src, *dst; + u8 *ptr_buf; + + if (likely(xdp->data_end - xdp->data >= off + len)) { + src = flush ? buf : xdp->data + off; + dst = flush ? xdp->data + off : buf; + memcpy(dst, src, len); + return; + } + + sinfo = xdp_get_shared_info_from_buff(xdp); + end_frag = &sinfo->frags[sinfo->nr_frags]; + next_frag = &sinfo->frags[0]; + + ptr_len = xdp->data_end - xdp->data; + ptr_buf = xdp->data; + + while (true) { + if (off < ptr_off + ptr_len) { + unsigned long copy_off = off - ptr_off; + unsigned long copy_len = min(len, ptr_len - copy_off); + + src = flush ? buf : ptr_buf + copy_off; + dst = flush ? ptr_buf + copy_off : buf; + memcpy(dst, src, copy_len); + + off += copy_len; + len -= copy_len; + buf += copy_len; + } + + if (!len || next_frag == end_frag) + break; + + ptr_off += ptr_len; + ptr_buf = skb_frag_address(next_frag); + ptr_len = skb_frag_size(next_frag); + next_frag++; + } +} + +static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + u32 size = xdp->data_end - xdp->data; + void *addr = xdp->data; + int i; + + if (unlikely(offset > 0xffff || len > 0xffff)) + return ERR_PTR(-EFAULT); + + if (offset + len > xdp_get_buff_len(xdp)) + return ERR_PTR(-EINVAL); + + if (offset < size) /* linear area */ + goto out; + + offset -= size; + for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */ + u32 frag_size = skb_frag_size(&sinfo->frags[i]); + + if (offset < frag_size) { + addr = skb_frag_address(&sinfo->frags[i]); + size = frag_size; + break; + } + offset -= frag_size; + } +out: + return offset + len < size ? addr + offset : NULL; +} + +BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset, + void *, buf, u32, len) +{ + void *ptr; + + ptr = bpf_xdp_pointer(xdp, offset, len); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + if (!ptr) + bpf_xdp_copy_buf(xdp, offset, buf, len, false); + else + memcpy(buf, ptr, len); + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_load_bytes_proto = { + .func = bpf_xdp_load_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, +}; + +BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset, + void *, buf, u32, len) +{ + void *ptr; + + ptr = bpf_xdp_pointer(xdp, offset, len); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + if (!ptr) + bpf_xdp_copy_buf(xdp, offset, buf, len, true); + else + memcpy(ptr, buf, len); + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_store_bytes_proto = { + .func = bpf_xdp_store_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, +}; + +static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1]; + struct xdp_rxq_info *rxq = xdp->rxq; + unsigned int tailroom; + + if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz) + return -EOPNOTSUPP; + + tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag); + if (unlikely(offset > tailroom)) + return -EINVAL; + + memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset); + skb_frag_size_add(frag, offset); + sinfo->xdp_frags_size += offset; + + return 0; +} + +static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + int i, n_frags_free = 0, len_free = 0; + + if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN)) + return -EINVAL; + + for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) { + skb_frag_t *frag = &sinfo->frags[i]; + int shrink = min_t(int, offset, skb_frag_size(frag)); + + len_free += shrink; + offset -= shrink; + + if (skb_frag_size(frag) == shrink) { + struct page *page = skb_frag_page(frag); + + __xdp_return(page_address(page), &xdp->rxq->mem, + false, NULL); + n_frags_free++; + } else { + skb_frag_size_sub(frag, shrink); + break; + } + } + sinfo->nr_frags -= n_frags_free; + sinfo->xdp_frags_size -= len_free; + + if (unlikely(!sinfo->nr_frags)) { + xdp_buff_clear_frags_flag(xdp); + xdp->data_end -= offset; + } + + return 0; +} + BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset) { void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */ void *data_end = xdp->data_end + offset; + if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */ + if (offset < 0) + return bpf_xdp_frags_shrink_tail(xdp, -offset); + + return bpf_xdp_frags_increase_tail(xdp, offset); + } + /* Notice that xdp_data_hard_end have reserved some tailroom */ if (unlikely(data_end > data_hard_end)) return -EINVAL; @@ -4047,6 +4266,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); enum bpf_map_type map_type = ri->map_type; + /* XDP_REDIRECT is not fully supported yet for xdp frags since + * not all XDP capable drivers can map non-linear xdp_frame in + * ndo_xdp_xmit. + */ + if (unlikely(xdp_buff_has_frags(xdp) && + map_type != BPF_MAP_TYPE_CPUMAP)) + return -EOPNOTSUPP; + if (map_type == BPF_MAP_TYPE_XSKMAP) return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); @@ -4590,10 +4817,12 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = { }; #endif -static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, +static unsigned long bpf_xdp_copy(void *dst, const void *ctx, unsigned long off, unsigned long len) { - memcpy(dst_buff, src_buff + off, len); + struct xdp_buff *xdp = (struct xdp_buff *)ctx; + + bpf_xdp_copy_buf(xdp, off, dst, len, false); return 0; } @@ -4604,11 +4833,11 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map, if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) return -EINVAL; - if (unlikely(!xdp || - xdp_size > (unsigned long)(xdp->data_end - xdp->data))) + + if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp))) return -EFAULT; - return bpf_event_output(map, flags, meta, meta_size, xdp->data, + return bpf_event_output(map, flags, meta, meta_size, xdp, xdp_size, bpf_xdp_copy); } @@ -4862,6 +5091,13 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, case SO_REUSEPORT: sk->sk_reuseport = valbool; break; + case SO_TXREHASH: + if (val < -1 || val > 1) { + ret = -EINVAL; + break; + } + sk->sk_txrehash = (u8)val; + break; default: ret = -EINVAL; } @@ -5040,6 +5276,9 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname, case SO_REUSEPORT: *((int *)optval) = sk->sk_reuseport; break; + case SO_TXREHASH: + *((int *)optval) = sk->sk_txrehash; + break; default: goto err_clear; } @@ -7533,6 +7772,12 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_redirect_map_proto; case BPF_FUNC_xdp_adjust_tail: return &bpf_xdp_adjust_tail_proto; + case BPF_FUNC_xdp_get_buff_len: + return &bpf_xdp_get_buff_len_proto; + case BPF_FUNC_xdp_load_bytes: + return &bpf_xdp_load_bytes_proto; + case BPF_FUNC_xdp_store_bytes: + return &bpf_xdp_store_bytes_proto; case BPF_FUNC_fib_lookup: return &bpf_xdp_fib_lookup_proto; case BPF_FUNC_check_mtu: @@ -10062,7 +10307,6 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, .gen_ld_abs = bpf_gen_ld_abs, - .check_kfunc_call = bpf_prog_test_check_kfunc_call, }; const struct bpf_prog_ops tc_cls_act_prog_ops = { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index a5b5bb99c644..8711350085d6 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -301,6 +301,7 @@ struct net *get_net_ns_by_id(const struct net *net, int id) return peer; } +EXPORT_SYMBOL_GPL(get_net_ns_by_id); /* * setup_net runs the initializers for the network namespace object. @@ -363,6 +364,8 @@ out_undo: static int __net_init net_defaults_init_net(struct net *net) { net->core.sysctl_somaxconn = SOMAXCONN; + net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED; + return 0; } diff --git a/net/core/page_pool.c b/net/core/page_pool.c index bd62c01a2ec3..e25d359d84d9 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -423,11 +423,6 @@ static __always_inline struct page * __page_pool_put_page(struct page_pool *pool, struct page *page, unsigned int dma_sync_size, bool allow_direct) { - /* It is not the last user for the page frag case */ - if (pool->p.flags & PP_FLAG_PAGE_FRAG && - page_pool_atomic_sub_frag_count_return(page, 1)) - return NULL; - /* This allocator is optimized for the XDP mode that uses * one-frame-per-page, but have fallbacks that act like the * regular page allocator APIs. @@ -471,8 +466,8 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, return NULL; } -void page_pool_put_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, bool allow_direct) +void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, bool allow_direct) { page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); if (page && !page_pool_recycle_in_ring(pool, page)) { @@ -480,7 +475,7 @@ void page_pool_put_page(struct page_pool *pool, struct page *page, page_pool_return_page(pool, page); } } -EXPORT_SYMBOL(page_pool_put_page); +EXPORT_SYMBOL(page_pool_put_defragged_page); /* Caller must not use data area after call, as this function overwrites it */ void page_pool_put_page_bulk(struct page_pool *pool, void **data, @@ -491,6 +486,10 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, for (i = 0; i < count; i++) { struct page *page = virt_to_head_page(data[i]); + /* It is not the last user for the page frag case */ + if (!page_pool_is_last_frag(pool, page)) + continue; + page = __page_pool_put_page(pool, page, -1, false); /* Approved for bulk recycling in ptr_ring cache */ if (page) @@ -526,8 +525,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool, long drain_count = BIAS_MAX - pool->frag_users; /* Some user is still using the page frag */ - if (likely(page_pool_atomic_sub_frag_count_return(page, - drain_count))) + if (likely(page_pool_defrag_page(page, drain_count))) return NULL; if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) { @@ -548,8 +546,7 @@ static void page_pool_free_frag(struct page_pool *pool) pool->frag_page = NULL; - if (!page || - page_pool_atomic_sub_frag_count_return(page, drain_count)) + if (!page || page_pool_defrag_page(page, drain_count)) return; page_pool_return_page(pool, page); @@ -588,7 +585,7 @@ frag_reset: pool->frag_users = 1; *offset = 0; pool->frag_offset = size; - page_pool_set_frag_count(page, BIAS_MAX); + page_pool_fragment_page(page, BIAS_MAX); return page; } diff --git a/net/core/sock.c b/net/core/sock.c index 4ff806d71921..09d31a7dc68f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1447,6 +1447,15 @@ set_sndbuf: break; } + case SO_TXREHASH: + if (val < -1 || val > 1) { + ret = -EINVAL; + break; + } + /* Paired with READ_ONCE() in tcp_rtx_synack() */ + WRITE_ONCE(sk->sk_txrehash, (u8)val); + break; + default: ret = -ENOPROTOOPT; break; @@ -1834,6 +1843,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_reserved_mem; break; + case SO_TXREHASH: + v.val = sk->sk_txrehash; + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -2266,6 +2279,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; /* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */ sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size); + sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1); /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1); } @@ -2611,7 +2625,8 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, switch (cmsg->cmsg_type) { case SO_MARK: - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && + !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) return -EINVAL; @@ -3278,6 +3293,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_pacing_rate = ~0UL; WRITE_ONCE(sk->sk_pacing_shift, 10); sk->sk_incoming_cpu = -1; + sk->sk_txrehash = SOCK_TXREHASH_DEFAULT; sk_rx_queue_clear(sk); /* diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 1827669eedd6..2d213c4011db 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1416,38 +1416,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) return NULL; } -static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - struct bpf_prog *old, u32 which) +static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog, + u32 which) { struct sk_psock_progs *progs = sock_map_progs(map); - struct bpf_prog **pprog; if (!progs) return -EOPNOTSUPP; switch (which) { case BPF_SK_MSG_VERDICT: - pprog = &progs->msg_parser; + *pprog = &progs->msg_parser; break; #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) case BPF_SK_SKB_STREAM_PARSER: - pprog = &progs->stream_parser; + *pprog = &progs->stream_parser; break; #endif case BPF_SK_SKB_STREAM_VERDICT: if (progs->skb_verdict) return -EBUSY; - pprog = &progs->stream_verdict; + *pprog = &progs->stream_verdict; break; case BPF_SK_SKB_VERDICT: if (progs->stream_verdict) return -EBUSY; - pprog = &progs->skb_verdict; + *pprog = &progs->skb_verdict; break; default: return -EOPNOTSUPP; } + return 0; +} + +static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, + struct bpf_prog *old, u32 which) +{ + struct bpf_prog **pprog; + int ret; + + ret = sock_map_prog_lookup(map, &pprog, which); + if (ret) + return ret; + if (old) return psock_replace_prog(pprog, prog, old); @@ -1455,6 +1467,57 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, return 0; } +int sock_map_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); + u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd; + struct bpf_prog **pprog; + struct bpf_prog *prog; + struct bpf_map *map; + struct fd f; + u32 id = 0; + int ret; + + if (attr->query.query_flags) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + rcu_read_lock(); + + ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type); + if (ret) + goto end; + + prog = *pprog; + prog_cnt = !prog ? 0 : 1; + + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) + goto end; + + /* we do not hold the refcnt, the bpf prog may be released + * asynchronously and the id would be set to 0. + */ + id = data_race(prog->aux->id); + if (id == 0) + prog_cnt = 0; + +end: + rcu_read_unlock(); + + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) || + (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) || + copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) + ret = -EFAULT; + + fdput(f); + return ret; +} + static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link) { switch (link->map->map_type) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7b4d485aac7a..dbeb8ecbcd98 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -593,6 +593,15 @@ static struct ctl_table netns_core_table[] = { .extra1 = SYSCTL_ZERO, .proc_handler = proc_dointvec_minmax }, + { + .procname = "txrehash", + .data = &init_net.core.sysctl_txrehash, + .maxlen = sizeof(u8), + .mode = 0644, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + .proc_handler = proc_dou8vec_minmax, + }, { } }; @@ -611,7 +620,7 @@ __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); static __net_init int sysctl_core_net_init(struct net *net) { - struct ctl_table *tbl; + struct ctl_table *tbl, *tmp; tbl = netns_core_table; if (!net_eq(net, &init_net)) { @@ -619,7 +628,8 @@ static __net_init int sysctl_core_net_init(struct net *net) if (tbl == NULL) goto err_dup; - tbl[0].data = &net->core.sysctl_somaxconn; + for (tmp = tbl; tmp->procname; tmp++) + tmp->data += (char *)net - (char *)&init_net; /* Don't export any sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) { diff --git a/net/core/xdp.c b/net/core/xdp.c index 7aba35504986..361df312ee7f 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -162,8 +162,9 @@ static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq) } /* Returns 0 on success, negative on failure */ -int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, - struct net_device *dev, u32 queue_index, unsigned int napi_id) +int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index, + unsigned int napi_id, u32 frag_size) { if (!dev) { WARN(1, "Missing net_device from driver"); @@ -185,11 +186,12 @@ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, xdp_rxq->dev = dev; xdp_rxq->queue_index = queue_index; xdp_rxq->napi_id = napi_id; + xdp_rxq->frag_size = frag_size; xdp_rxq->reg_state = REG_STATE_REGISTERED; return 0; } -EXPORT_SYMBOL_GPL(xdp_rxq_info_reg); +EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg); void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq) { @@ -369,8 +371,8 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); * is used for those calls sites. Thus, allowing for faster recycling * of xdp_frames/pages in those cases. */ -static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, - struct xdp_buff *xdp) +void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, + struct xdp_buff *xdp) { struct xdp_mem_allocator *xa; struct page *page; @@ -406,12 +408,38 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, void xdp_return_frame(struct xdp_frame *xdpf) { + struct skb_shared_info *sinfo; + int i; + + if (likely(!xdp_frame_has_frags(xdpf))) + goto out; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (i = 0; i < sinfo->nr_frags; i++) { + struct page *page = skb_frag_page(&sinfo->frags[i]); + + __xdp_return(page_address(page), &xdpf->mem, false, NULL); + } +out: __xdp_return(xdpf->data, &xdpf->mem, false, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) { + struct skb_shared_info *sinfo; + int i; + + if (likely(!xdp_frame_has_frags(xdpf))) + goto out; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (i = 0; i < sinfo->nr_frags; i++) { + struct page *page = skb_frag_page(&sinfo->frags[i]); + + __xdp_return(page_address(page), &xdpf->mem, true, NULL); + } +out: __xdp_return(xdpf->data, &xdpf->mem, true, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); @@ -447,7 +475,7 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_mem_allocator *xa; if (mem->type != MEM_TYPE_PAGE_POOL) { - __xdp_return(xdpf->data, &xdpf->mem, false, NULL); + xdp_return_frame(xdpf); return; } @@ -466,12 +494,38 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf, bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); } + if (unlikely(xdp_frame_has_frags(xdpf))) { + struct skb_shared_info *sinfo; + int i; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (i = 0; i < sinfo->nr_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + + bq->q[bq->count++] = skb_frag_address(frag); + if (bq->count == XDP_BULK_QUEUE_SIZE) + xdp_flush_frame_bulk(bq); + } + } bq->q[bq->count++] = xdpf->data; } EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); void xdp_return_buff(struct xdp_buff *xdp) { + struct skb_shared_info *sinfo; + int i; + + if (likely(!xdp_buff_has_frags(xdp))) + goto out; + + sinfo = xdp_get_shared_info_from_buff(xdp); + for (i = 0; i < sinfo->nr_frags; i++) { + struct page *page = skb_frag_page(&sinfo->frags[i]); + + __xdp_return(page_address(page), &xdp->rxq->mem, true, xdp); + } +out: __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp); } @@ -561,8 +615,14 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, struct sk_buff *skb, struct net_device *dev) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); unsigned int headroom, frame_size; void *hard_start; + u8 nr_frags; + + /* xdp frags frame */ + if (unlikely(xdp_frame_has_frags(xdpf))) + nr_frags = sinfo->nr_frags; /* Part of headroom was reserved to xdpf */ headroom = sizeof(*xdpf) + xdpf->headroom; @@ -582,6 +642,12 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, if (xdpf->metasize) skb_metadata_set(skb, xdpf->metasize); + if (unlikely(xdp_frame_has_frags(xdpf))) + xdp_update_skb_shared_info(skb, nr_frags, + sinfo->xdp_frags_size, + nr_frags * xdpf->frame_sz, + xdp_frame_is_frag_pfmemalloc(xdpf)); + /* Essential SKB info: protocol and skb->dev */ skb->protocol = eth_type_trans(skb, dev); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 5183e627468d..671c377f0889 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -136,11 +136,6 @@ static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3) return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); } -static inline u64 max48(const u64 seq1, const u64 seq2) -{ - return after48(seq1, seq2) ? seq1 : seq2; -} - /** * dccp_loss_count - Approximate the number of lost data packets in a burst loss * @s1: last known sequence number before the loss ('hole') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 0ea29270d7e5..ae662567a6cb 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1030,15 +1030,9 @@ static void __net_exit dccp_v4_exit_net(struct net *net) inet_ctl_sock_destroy(pn->v4_ctl_sk); } -static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list) -{ - inet_twsk_purge(&dccp_hashinfo, AF_INET); -} - static struct pernet_operations dccp_v4_ops = { .init = dccp_v4_init_net, .exit = dccp_v4_exit_net, - .exit_batch = dccp_v4_exit_batch, .id = &dccp_v4_pernet_id, .size = sizeof(struct dccp_v4_pernet), }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index fa663518fa0e..eab3bd1ee9a0 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1115,15 +1115,9 @@ static void __net_exit dccp_v6_exit_net(struct net *net) inet_ctl_sock_destroy(pn->v6_ctl_sk); } -static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list) -{ - inet_twsk_purge(&dccp_hashinfo, AF_INET6); -} - static struct pernet_operations dccp_v6_ops = { .init = dccp_v6_init_net, .exit = dccp_v6_exit_net, - .exit_batch = dccp_v6_exit_batch, .id = &dccp_v6_pernet_id, .size = sizeof(struct dccp_v6_pernet), }; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 91e7a2202697..64d805b27add 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -22,6 +22,7 @@ #include "feat.h" struct inet_timewait_death_row dccp_death_row = { + .tw_refcount = REFCOUNT_INIT(1), .sysctl_max_tw_buckets = NR_FILE * 2, .hashinfo = &dccp_hashinfo, }; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 3d21521453fe..909b045c9b11 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -15,6 +15,7 @@ #include <linux/of.h> #include <linux/of_net.h> #include <net/devlink.h> +#include <net/sch_generic.h> #include "dsa_priv.h" @@ -1064,9 +1065,18 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst) list_for_each_entry(dp, &dst->ports, list) { if (dsa_port_is_cpu(dp)) { - err = dsa_master_setup(dp->master, dp); + struct net_device *master = dp->master; + bool admin_up = (master->flags & IFF_UP) && + !qdisc_tx_is_noop(master); + + err = dsa_master_setup(master, dp); if (err) return err; + + /* Replay master state event */ + dsa_tree_master_admin_state_change(dst, master, admin_up); + dsa_tree_master_oper_state_change(dst, master, + netif_oper_up(master)); } } @@ -1081,9 +1091,19 @@ static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) rtnl_lock(); - list_for_each_entry(dp, &dst->ports, list) - if (dsa_port_is_cpu(dp)) - dsa_master_teardown(dp->master); + list_for_each_entry(dp, &dst->ports, list) { + if (dsa_port_is_cpu(dp)) { + struct net_device *master = dp->master; + + /* Synthesizing an "admin down" state is sufficient for + * the switches to get a notification if the master is + * currently up and running. + */ + dsa_tree_master_admin_state_change(dst, master, false); + + dsa_master_teardown(master); + } + } rtnl_unlock(); } @@ -1279,6 +1299,52 @@ out_unlock: return err; } +static void dsa_tree_master_state_change(struct dsa_switch_tree *dst, + struct net_device *master) +{ + struct dsa_notifier_master_state_info info; + struct dsa_port *cpu_dp = master->dsa_ptr; + + info.master = master; + info.operational = dsa_port_master_is_operational(cpu_dp); + + dsa_tree_notify(dst, DSA_NOTIFIER_MASTER_STATE_CHANGE, &info); +} + +void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst, + struct net_device *master, + bool up) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + bool notify = false; + + if ((dsa_port_master_is_operational(cpu_dp)) != + (up && cpu_dp->master_oper_up)) + notify = true; + + cpu_dp->master_admin_up = up; + + if (notify) + dsa_tree_master_state_change(dst, master); +} + +void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst, + struct net_device *master, + bool up) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + bool notify = false; + + if ((dsa_port_master_is_operational(cpu_dp)) != + (cpu_dp->master_admin_up && up)) + notify = true; + + cpu_dp->master_oper_up = up; + + if (notify) + dsa_tree_master_state_change(dst, master); +} + static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) { struct dsa_switch_tree *dst = ds->dst; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 760306f0012f..2bbfa9efe9f8 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -40,6 +40,7 @@ enum { DSA_NOTIFIER_TAG_PROTO_DISCONNECT, DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, + DSA_NOTIFIER_MASTER_STATE_CHANGE, }; /* DSA_NOTIFIER_AGEING_TIME */ @@ -109,6 +110,12 @@ struct dsa_notifier_tag_8021q_vlan_info { u16 vid; }; +/* DSA_NOTIFIER_MASTER_STATE_CHANGE */ +struct dsa_notifier_master_state_info { + const struct net_device *master; + bool operational; +}; + struct dsa_switchdev_event_work { struct dsa_switch *ds; int port; @@ -482,6 +489,12 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, struct net_device *master, const struct dsa_device_ops *tag_ops, const struct dsa_device_ops *old_tag_ops); +void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst, + struct net_device *master, + bool up); +void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst, + struct net_device *master, + bool up); unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max); void dsa_bridge_num_put(const struct net_device *bridge_dev, unsigned int bridge_num); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 22241afcac81..2b5b0f294233 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -2346,6 +2346,36 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, err = dsa_port_lag_change(dp, info->lower_state_info); return notifier_from_errno(err); } + case NETDEV_CHANGE: + case NETDEV_UP: { + /* Track state of master port. + * DSA driver may require the master port (and indirectly + * the tagger) to be available for some special operation. + */ + if (netdev_uses_dsa(dev)) { + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->ds->dst; + + /* Track when the master port is UP */ + dsa_tree_master_oper_state_change(dst, dev, + netif_oper_up(dev)); + + /* Track when the master port is ready and can accept + * packet. + * NETDEV_UP event is not enough to flag a port as ready. + * We also have to wait for linkwatch_do_dev to dev_activate + * and emit a NETDEV_CHANGE event. + * We check if a master port is ready by checking if the dev + * have a qdisc assigned and is not noop. + */ + dsa_tree_master_admin_state_change(dst, dev, + !qdisc_tx_is_noop(dev)); + + return NOTIFY_OK; + } + + return NOTIFY_DONE; + } case NETDEV_GOING_DOWN: { struct dsa_port *dp, *cpu_dp; struct dsa_switch_tree *dst; @@ -2357,6 +2387,8 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, cpu_dp = dev->dsa_ptr; dst = cpu_dp->ds->dst; + dsa_tree_master_admin_state_change(dst, dev, false); + list_for_each_entry(dp, &dst->ports, list) { if (!dsa_port_is_user(dp)) continue; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index e3c7d2627a61..4866b58649e4 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -113,26 +113,15 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, return dsa_tag_8021q_bridge_join(ds, info); } -static int dsa_switch_bridge_leave(struct dsa_switch *ds, - struct dsa_notifier_bridge_info *info) +static int dsa_switch_sync_vlan_filtering(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) { - struct dsa_switch_tree *dst = ds->dst; struct netlink_ext_ack extack = {0}; bool change_vlan_filtering = false; bool vlan_filtering; struct dsa_port *dp; int err; - if (dst->index == info->tree_index && ds->index == info->sw_index && - ds->ops->port_bridge_leave) - ds->ops->port_bridge_leave(ds, info->port, info->bridge); - - if ((dst->index != info->tree_index || ds->index != info->sw_index) && - ds->ops->crosschip_bridge_leave) - ds->ops->crosschip_bridge_leave(ds, info->tree_index, - info->sw_index, info->port, - info->bridge); - if (ds->needs_standalone_vlan_filtering && !br_vlan_enabled(info->bridge.dev)) { change_vlan_filtering = true; @@ -172,6 +161,31 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, return err; } + return 0; +} + +static int dsa_switch_bridge_leave(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) +{ + struct dsa_switch_tree *dst = ds->dst; + int err; + + if (dst->index == info->tree_index && ds->index == info->sw_index && + ds->ops->port_bridge_leave) + ds->ops->port_bridge_leave(ds, info->port, info->bridge); + + if ((dst->index != info->tree_index || ds->index != info->sw_index) && + ds->ops->crosschip_bridge_leave) + ds->ops->crosschip_bridge_leave(ds, info->tree_index, + info->sw_index, info->port, + info->bridge); + + if (ds->dst->index == info->tree_index && ds->index == info->sw_index) { + err = dsa_switch_sync_vlan_filtering(ds, info); + if (err) + return err; + } + return dsa_tag_8021q_bridge_leave(ds, info); } @@ -683,6 +697,18 @@ dsa_switch_disconnect_tag_proto(struct dsa_switch *ds, return 0; } +static int +dsa_switch_master_state_change(struct dsa_switch *ds, + struct dsa_notifier_master_state_info *info) +{ + if (!ds->ops->master_state_change) + return 0; + + ds->ops->master_state_change(ds, info->master, info->operational); + + return 0; +} + static int dsa_switch_event(struct notifier_block *nb, unsigned long event, void *info) { @@ -756,6 +782,9 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL: err = dsa_switch_tag_8021q_vlan_del(ds, info); break; + case DSA_NOTIFIER_MASTER_STATE_CHANGE: + err = dsa_switch_master_state_change(ds, info); + break; default: err = -EOPNOTSUPP; break; diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 1ea9401b8ace..57d2e00f1e5d 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -4,30 +4,12 @@ */ #include <linux/etherdevice.h> +#include <linux/bitfield.h> +#include <net/dsa.h> +#include <linux/dsa/tag_qca.h> #include "dsa_priv.h" -#define QCA_HDR_LEN 2 -#define QCA_HDR_VERSION 0x2 - -#define QCA_HDR_RECV_VERSION_MASK GENMASK(15, 14) -#define QCA_HDR_RECV_VERSION_S 14 -#define QCA_HDR_RECV_PRIORITY_MASK GENMASK(13, 11) -#define QCA_HDR_RECV_PRIORITY_S 11 -#define QCA_HDR_RECV_TYPE_MASK GENMASK(10, 6) -#define QCA_HDR_RECV_TYPE_S 6 -#define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3) -#define QCA_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) - -#define QCA_HDR_XMIT_VERSION_MASK GENMASK(15, 14) -#define QCA_HDR_XMIT_VERSION_S 14 -#define QCA_HDR_XMIT_PRIORITY_MASK GENMASK(13, 11) -#define QCA_HDR_XMIT_PRIORITY_S 11 -#define QCA_HDR_XMIT_CONTROL_MASK GENMASK(10, 8) -#define QCA_HDR_XMIT_CONTROL_S 8 -#define QCA_HDR_XMIT_FROM_CPU BIT(7) -#define QCA_HDR_XMIT_DP_BIT_MASK GENMASK(6, 0) - static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); @@ -40,8 +22,9 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) phdr = dsa_etype_header_pos_tx(skb); /* Set the version field, and set destination port information */ - hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S | - QCA_HDR_XMIT_FROM_CPU | BIT(dp->index); + hdr = FIELD_PREP(QCA_HDR_XMIT_VERSION, QCA_HDR_VERSION); + hdr |= QCA_HDR_XMIT_FROM_CPU; + hdr |= FIELD_PREP(QCA_HDR_XMIT_DP_BIT, BIT(dp->index)); *phdr = htons(hdr); @@ -50,10 +33,17 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev) { - u8 ver; - u16 hdr; - int port; + struct qca_tagger_data *tagger_data; + struct dsa_port *dp = dev->dsa_ptr; + struct dsa_switch *ds = dp->ds; + u8 ver, pk_type; __be16 *phdr; + int port; + u16 hdr; + + BUILD_BUG_ON(sizeof(struct qca_mgmt_ethhdr) != QCA_HDR_MGMT_HEADER_LEN + QCA_HDR_LEN); + + tagger_data = ds->tagger_data; if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN))) return NULL; @@ -62,16 +52,33 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev) hdr = ntohs(*phdr); /* Make sure the version is correct */ - ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S; + ver = FIELD_GET(QCA_HDR_RECV_VERSION, hdr); if (unlikely(ver != QCA_HDR_VERSION)) return NULL; + /* Get pk type */ + pk_type = FIELD_GET(QCA_HDR_RECV_TYPE, hdr); + + /* Ethernet mgmt read/write packet */ + if (pk_type == QCA_HDR_RECV_TYPE_RW_REG_ACK) { + if (likely(tagger_data->rw_reg_ack_handler)) + tagger_data->rw_reg_ack_handler(ds, skb); + return NULL; + } + + /* Ethernet MIB counter packet */ + if (pk_type == QCA_HDR_RECV_TYPE_MIB) { + if (likely(tagger_data->mib_autocast_handler)) + tagger_data->mib_autocast_handler(ds, skb); + return NULL; + } + /* Remove QCA tag and recalculate checksum */ skb_pull_rcsum(skb, QCA_HDR_LEN); dsa_strip_etype_header(skb, QCA_HDR_LEN); /* Get source port information */ - port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); + port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, hdr); skb->dev = dsa_master_find_slave(dev, 0, port); if (!skb->dev) @@ -80,12 +87,34 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev) return skb; } +static int qca_tag_connect(struct dsa_switch *ds) +{ + struct qca_tagger_data *tagger_data; + + tagger_data = kzalloc(sizeof(*tagger_data), GFP_KERNEL); + if (!tagger_data) + return -ENOMEM; + + ds->tagger_data = tagger_data; + + return 0; +} + +static void qca_tag_disconnect(struct dsa_switch *ds) +{ + kfree(ds->tagger_data); + ds->tagger_data = NULL; +} + static const struct dsa_device_ops qca_netdev_ops = { .name = "qca", .proto = DSA_TAG_PROTO_QCA, + .connect = qca_tag_connect, + .disconnect = qca_tag_disconnect, .xmit = qca_tag_xmit, .rcv = qca_tag_rcv, .needed_headroom = QCA_HDR_LEN, + .promisc_on_master = true, }; MODULE_LICENSE("GPL"); diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index c1d5f5e0fdc9..18a5035d3bee 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -53,7 +53,8 @@ static int rings_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO */ nla_total_size(sizeof(u32)) + /* _RINGS_TX */ - nla_total_size(sizeof(u32)); /* _RINGS_RX_BUF_LEN */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */ + nla_total_size(sizeof(u8)); /* _RINGS_TCP_DATA_SPLIT */ } static int rings_fill_reply(struct sk_buff *skb, @@ -61,9 +62,11 @@ static int rings_fill_reply(struct sk_buff *skb, const struct ethnl_reply_data *reply_base) { const struct rings_reply_data *data = RINGS_REPDATA(reply_base); - const struct kernel_ethtool_ringparam *kernel_ringparam = &data->kernel_ringparam; + const struct kernel_ethtool_ringparam *kr = &data->kernel_ringparam; const struct ethtool_ringparam *ringparam = &data->ringparam; + WARN_ON(kr->tcp_data_split > ETHTOOL_TCP_DATA_SPLIT_ENABLED); + if ((ringparam->rx_max_pending && (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MAX, ringparam->rx_max_pending) || @@ -84,9 +87,11 @@ static int rings_fill_reply(struct sk_buff *skb, ringparam->tx_max_pending) || nla_put_u32(skb, ETHTOOL_A_RINGS_TX, ringparam->tx_pending))) || - (kernel_ringparam->rx_buf_len && - (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN, - kernel_ringparam->rx_buf_len)))) + (kr->rx_buf_len && + (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN, kr->rx_buf_len))) || + (kr->tcp_data_split && + (nla_put_u8(skb, ETHTOOL_A_RINGS_TCP_DATA_SPLIT, + kr->tcp_data_split)))) return -EMSGSIZE; return 0; diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 043e4e9a1694..ff9ec7634218 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -259,11 +259,6 @@ static inline u16 prp_get_skb_sequence_nr(struct prp_rct *rct) return ntohs(rct->sequence_nr); } -static inline u16 get_prp_lan_id(struct prp_rct *rct) -{ - return ntohs(rct->lan_id_and_LSDU_size) >> 12; -} - /* assume there is a valid rct */ static inline bool prp_check_lsdu_size(struct sk_buff *skb, struct prp_rct *rct, diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index de610cb83694..b60c9fd7147e 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ +#include <linux/init.h> #include <linux/types.h> #include <linux/bpf_verifier.h> #include <linux/bpf.h> @@ -212,26 +213,23 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, } } -BTF_SET_START(bpf_tcp_ca_kfunc_ids) +BTF_SET_START(bpf_tcp_ca_check_kfunc_ids) BTF_ID(func, tcp_reno_ssthresh) BTF_ID(func, tcp_reno_cong_avoid) BTF_ID(func, tcp_reno_undo_cwnd) BTF_ID(func, tcp_slow_start) BTF_ID(func, tcp_cong_avoid_ai) -BTF_SET_END(bpf_tcp_ca_kfunc_ids) +BTF_SET_END(bpf_tcp_ca_check_kfunc_ids) -static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner) -{ - if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id)) - return true; - return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner); -} +static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &bpf_tcp_ca_check_kfunc_ids, +}; static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { .get_func_proto = bpf_tcp_ca_get_func_proto, .is_valid_access = bpf_tcp_ca_is_valid_access, .btf_struct_access = bpf_tcp_ca_btf_struct_access, - .check_kfunc_call = bpf_tcp_ca_check_kfunc_call, }; static int bpf_tcp_ca_init_member(const struct btf_type *t, @@ -300,3 +298,9 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = { .init = bpf_tcp_ca_init, .name = "tcp_congestion_ops", }; + +static int __init bpf_tcp_ca_kfunc_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set); +} +late_initcall(bpf_tcp_ca_kfunc_init); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b4589861b84c..4c5399450682 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1257,34 +1257,13 @@ fib_info_laddrhash_bucket(const struct net *net, __be32 val) return &fib_info_laddrhash[slot]; } -static struct hlist_head *fib_info_hash_alloc(int bytes) -{ - if (bytes <= PAGE_SIZE) - return kzalloc(bytes, GFP_KERNEL); - else - return (struct hlist_head *) - __get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(bytes)); -} - -static void fib_info_hash_free(struct hlist_head *hash, int bytes) -{ - if (!hash) - return; - - if (bytes <= PAGE_SIZE) - kfree(hash); - else - free_pages((unsigned long) hash, get_order(bytes)); -} - static void fib_info_hash_move(struct hlist_head *new_info_hash, struct hlist_head *new_laddrhash, unsigned int new_size) { struct hlist_head *old_info_hash, *old_laddrhash; unsigned int old_size = fib_info_hash_size; - unsigned int i, bytes; + unsigned int i; spin_lock_bh(&fib_info_lock); old_info_hash = fib_info_hash; @@ -1325,9 +1304,8 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, spin_unlock_bh(&fib_info_lock); - bytes = old_size * sizeof(struct hlist_head *); - fib_info_hash_free(old_info_hash, bytes); - fib_info_hash_free(old_laddrhash, bytes); + kvfree(old_info_hash); + kvfree(old_laddrhash); } __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc, @@ -1444,19 +1422,19 @@ struct fib_info *fib_create_info(struct fib_config *cfg, unsigned int new_size = fib_info_hash_size << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; - unsigned int bytes; + size_t bytes; if (!new_size) new_size = 16; - bytes = new_size * sizeof(struct hlist_head *); - new_info_hash = fib_info_hash_alloc(bytes); - new_laddrhash = fib_info_hash_alloc(bytes); + bytes = (size_t)new_size * sizeof(struct hlist_head *); + new_info_hash = kvzalloc(bytes, GFP_KERNEL); + new_laddrhash = kvzalloc(bytes, GFP_KERNEL); if (!new_info_hash || !new_laddrhash) { - fib_info_hash_free(new_info_hash, bytes); - fib_info_hash_free(new_laddrhash, bytes); - } else + kvfree(new_info_hash); + kvfree(new_laddrhash); + } else { fib_info_hash_move(new_info_hash, new_laddrhash, new_size); - + } if (!fib_info_hash_size) goto failure; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index b7e277d8a84d..72a375c7f417 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -192,24 +192,14 @@ struct icmp_control { static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; -/* - * The ICMP socket(s). This is the most convenient way to flow control - * our ICMP output as well as maintain a clean interface throughout - * all layers. All Socketless IP sends will soon be gone. - * - * On SMP we have one ICMP socket per-cpu. - */ -static struct sock *icmp_sk(struct net *net) -{ - return this_cpu_read(*net->ipv4.icmp_sk); -} +static DEFINE_PER_CPU(struct sock *, ipv4_icmp_sk); /* Called with BH disabled */ static inline struct sock *icmp_xmit_lock(struct net *net) { struct sock *sk; - sk = icmp_sk(net); + sk = this_cpu_read(ipv4_icmp_sk); if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path signals a @@ -217,11 +207,13 @@ static inline struct sock *icmp_xmit_lock(struct net *net) */ return NULL; } + sock_net_set(sk, net); return sk; } static inline void icmp_xmit_unlock(struct sock *sk) { + sock_net_set(sk, &init_net); spin_unlock(&sk->sk_lock.slock); } @@ -363,14 +355,13 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, return 0; } -static void icmp_push_reply(struct icmp_bxm *icmp_param, +static void icmp_push_reply(struct sock *sk, + struct icmp_bxm *icmp_param, struct flowi4 *fl4, struct ipcm_cookie *ipc, struct rtable **rt) { - struct sock *sk; struct sk_buff *skb; - sk = icmp_sk(dev_net((*rt)->dst.dev)); if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, @@ -452,7 +443,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) if (IS_ERR(rt)) goto out_unlock; if (icmpv4_xrlim_allow(net, rt, &fl4, type, code)) - icmp_push_reply(icmp_param, &fl4, &ipc, &rt); + icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt); ip_rt_put(rt); out_unlock: icmp_xmit_unlock(sk); @@ -766,7 +757,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, if (!fl4.saddr) fl4.saddr = htonl(INADDR_DUMMY); - icmp_push_reply(&icmp_param, &fl4, &ipc, &rt); + icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt); ende: ip_rt_put(rt); out_unlock: @@ -1434,46 +1425,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { }, }; -static void __net_exit icmp_sk_exit(struct net *net) -{ - int i; - - for_each_possible_cpu(i) - inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i)); - free_percpu(net->ipv4.icmp_sk); - net->ipv4.icmp_sk = NULL; -} - static int __net_init icmp_sk_init(struct net *net) { - int i, err; - - net->ipv4.icmp_sk = alloc_percpu(struct sock *); - if (!net->ipv4.icmp_sk) - return -ENOMEM; - - for_each_possible_cpu(i) { - struct sock *sk; - - err = inet_ctl_sock_create(&sk, PF_INET, - SOCK_RAW, IPPROTO_ICMP, net); - if (err < 0) - goto fail; - - *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk; - - /* Enough space for 2 64K ICMP packets, including - * sk_buff/skb_shared_info struct overhead. - */ - sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); - - /* - * Speedup sock_wfree() - */ - sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); - inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT; - } - /* Control parameters for ECHO replies. */ net->ipv4.sysctl_icmp_echo_ignore_all = 0; net->ipv4.sysctl_icmp_echo_enable_probe = 0; @@ -1499,18 +1452,36 @@ static int __net_init icmp_sk_init(struct net *net) net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0; return 0; - -fail: - icmp_sk_exit(net); - return err; } static struct pernet_operations __net_initdata icmp_sk_ops = { .init = icmp_sk_init, - .exit = icmp_sk_exit, }; int __init icmp_init(void) { + int err, i; + + for_each_possible_cpu(i) { + struct sock *sk; + + err = inet_ctl_sock_create(&sk, PF_INET, + SOCK_RAW, IPPROTO_ICMP, &init_net); + if (err < 0) + return err; + + per_cpu(ipv4_icmp_sk, i) = sk; + + /* Enough space for 2 64K ICMP packets, including + * sk_buff/skb_shared_info struct overhead. + */ + sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); + + /* + * Speedup sock_wfree() + */ + sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); + inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT; + } return register_pernet_subsys(&icmp_sk_ops); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index fc2a985f6064..1e5b53c2bb26 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -866,12 +866,9 @@ static void reqsk_timer_handler(struct timer_list *t) (!resend || !inet_rtx_syn_ack(sk_listener, req) || inet_rsk(req)->acked)) { - unsigned long timeo; - if (req->num_timeout++ == 0) atomic_dec(&queue->young); - timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); - mod_timer(&req->rsk_timer, jiffies + timeo); + mod_timer(&req->rsk_timer, jiffies + reqsk_timeout(req, TCP_RTO_MAX)); if (!nreq) return; @@ -1046,6 +1043,9 @@ int inet_csk_listen_start(struct sock *sk) sk->sk_ack_backlog = 0; inet_csk_delack_init(sk); + if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT) + sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash); + /* There is race window here: we announce ourselves listening, * but this transition is still not validated by get_port(). * It is OK, because this socket enters to hash table only diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 437afe392e66..9e0bbd026560 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -52,14 +52,15 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) spin_unlock(lock); /* Disassociate with bind bucket. */ - bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, - hashinfo->bhash_size)]; + bhead = &hashinfo->bhash[tw->tw_bslot]; spin_lock(&bhead->lock); inet_twsk_bind_unhash(tw, hashinfo); spin_unlock(&bhead->lock); - atomic_dec(&tw->tw_dr->tw_count); + if (refcount_dec_and_test(&tw->tw_dr->tw_refcount)) + kfree(tw->tw_dr); + inet_twsk_put(tw); } @@ -110,8 +111,12 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, - hashinfo->bhash_size)]; + /* Cache inet_bhashfn(), because 'struct net' might be no longer + * available later in inet_twsk_kill(). + */ + tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num, + hashinfo->bhash_size); + bhead = &hashinfo->bhash[tw->tw_bslot]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); @@ -145,10 +150,6 @@ static void tw_timer_handler(struct timer_list *t) { struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer); - if (tw->tw_kill) - __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); - else - __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED); inet_twsk_kill(tw); } @@ -158,7 +159,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, { struct inet_timewait_sock *tw; - if (atomic_read(&dr->tw_count) >= dr->sysctl_max_tw_buckets) + if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets) return NULL; tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, @@ -244,59 +245,15 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) * of PAWS. */ - tw->tw_kill = timeo <= 4*HZ; if (!rearm) { + bool kill = timeo <= 4*HZ; + + __NET_INC_STATS(twsk_net(tw), kill ? LINUX_MIB_TIMEWAITKILLED : + LINUX_MIB_TIMEWAITED); BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo)); - atomic_inc(&tw->tw_dr->tw_count); + refcount_inc(&tw->tw_dr->tw_refcount); } else { mod_timer_pending(&tw->tw_timer, jiffies + timeo); } } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); - -void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) -{ - struct inet_timewait_sock *tw; - struct sock *sk; - struct hlist_nulls_node *node; - unsigned int slot; - - for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { - struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; -restart_rcu: - cond_resched(); - rcu_read_lock(); -restart: - sk_nulls_for_each_rcu(sk, node, &head->chain) { - if (sk->sk_state != TCP_TIME_WAIT) - continue; - tw = inet_twsk(sk); - if ((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->ns.count)) - continue; - - if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) - continue; - - if (unlikely((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->ns.count))) { - inet_twsk_put(tw); - goto restart; - } - - rcu_read_unlock(); - local_bh_disable(); - inet_twsk_deschedule_put(tw); - local_bh_enable(); - goto restart_rcu; - } - /* If the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot) - goto restart; - rcu_read_unlock(); - } -} -EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index da1b5038bdfd..a9e22a098872 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -42,7 +42,7 @@ */ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, - __be32 daddr, struct rtable *rt, int is_frag) + __be32 daddr, struct rtable *rt) { unsigned char *iph = skb_network_header(skb); @@ -53,28 +53,15 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, if (opt->srr) memcpy(iph + opt->srr + iph[opt->srr + 1] - 4, &daddr, 4); - if (!is_frag) { - if (opt->rr_needaddr) - ip_rt_get_source(iph + opt->rr + iph[opt->rr + 2] - 5, skb, rt); - if (opt->ts_needaddr) - ip_rt_get_source(iph + opt->ts + iph[opt->ts + 2] - 9, skb, rt); - if (opt->ts_needtime) { - __be32 midtime; + if (opt->rr_needaddr) + ip_rt_get_source(iph + opt->rr + iph[opt->rr + 2] - 5, skb, rt); + if (opt->ts_needaddr) + ip_rt_get_source(iph + opt->ts + iph[opt->ts + 2] - 9, skb, rt); + if (opt->ts_needtime) { + __be32 midtime; - midtime = inet_current_timestamp(); - memcpy(iph + opt->ts + iph[opt->ts + 2] - 5, &midtime, 4); - } - return; - } - if (opt->rr) { - memset(iph + opt->rr, IPOPT_NOP, iph[opt->rr + 1]); - opt->rr = 0; - opt->rr_needaddr = 0; - } - if (opt->ts) { - memset(iph + opt->ts, IPOPT_NOP, iph[opt->ts + 1]); - opt->ts = 0; - opt->ts_needaddr = opt->ts_needtime = 0; + midtime = inet_current_timestamp(); + memcpy(iph + opt->ts + iph[opt->ts + 2] - 5, &midtime, 4); } } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 139cec29ed06..0c0574eb5f5b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -179,7 +179,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; - ip_options_build(skb, &opt->opt, daddr, rt, 0); + ip_options_build(skb, &opt->opt, daddr, rt); } skb->priority = sk->sk_priority; @@ -519,7 +519,7 @@ packet_routed: if (inet_opt && inet_opt->opt.optlen) { iph->ihl += inet_opt->opt.optlen >> 2; - ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); + ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt); } ip_select_ident_segs(net, skb, sk, @@ -1541,7 +1541,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, if (opt) { iph->ihl += opt->optlen >> 2; - ip_options_build(skb, opt, cork->addr, rt, 0); + ip_options_build(skb, opt, cork->addr, rt); } skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f30273afb539..28836071f0a6 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -59,8 +59,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", sock_prot_inuse_get(net, &tcp_prot), orphans, - atomic_read(&net->ipv4.tcp_death_row.tw_count), sockets, - proto_memory_allocated(&tcp_prot)); + refcount_read(&net->ipv4.tcp_death_row->tw_refcount) - 1, + sockets, proto_memory_allocated(&tcp_prot)); seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), proto_memory_allocated(&udp_prot)); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ff6f91cdb6c4..8b35075088e1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -112,14 +112,13 @@ #define DEFAULT_MIN_PMTU (512 + 20 + 20) #define DEFAULT_MTU_EXPIRES (10 * 60 * HZ) - +#define DEFAULT_MIN_ADVMSS 256 static int ip_rt_max_size; static int ip_rt_redirect_number __read_mostly = 9; static int ip_rt_redirect_load __read_mostly = HZ / 50; static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); static int ip_rt_error_cost __read_mostly = HZ; static int ip_rt_error_burst __read_mostly = 5 * HZ; -static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; @@ -458,7 +457,7 @@ static u32 *ip_tstamps __read_mostly; * if one generator is seldom used. This makes hard for an attacker * to infer how many packets were sent between two points in time. */ -u32 ip_idents_reserve(u32 hash, int segs) +static u32 ip_idents_reserve(u32 hash, int segs) { u32 bucket, old, now = (u32)jiffies; atomic_t *p_id; @@ -479,7 +478,6 @@ u32 ip_idents_reserve(u32 hash, int segs) */ return atomic_add_return(segs + delta, p_id) - segs; } -EXPORT_SYMBOL(ip_idents_reserve); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) { @@ -1298,9 +1296,10 @@ static void set_class_tag(struct rtable *rt, u32 tag) static unsigned int ipv4_default_advmss(const struct dst_entry *dst) { + struct net *net = dev_net(dst->dev); unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, - ip_rt_min_advmss); + net->ipv4.ip_rt_min_advmss); return min(advmss, IPV4_MAX_PMTU - header_size); } @@ -3535,13 +3534,6 @@ static struct ctl_table ipv4_route_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "min_adv_mss", - .data = &ip_rt_min_advmss, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { } }; @@ -3569,6 +3561,13 @@ static struct ctl_table ipv4_route_netns_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "min_adv_mss", + .data = &init_net.ipv4.ip_rt_min_advmss, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { }, }; @@ -3631,6 +3630,7 @@ static __net_init int netns_ip_rt_init(struct net *net) /* Set default value for namespaceified sysctls */ net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU; net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES; + net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS; return 0; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 97eb54774924..1cae27b5dcd8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -589,6 +589,14 @@ static struct ctl_table ipv4_table[] = { }; static struct ctl_table ipv4_net_table[] = { + /* tcp_max_tw_buckets must be first in this table. */ + { + .procname = "tcp_max_tw_buckets", +/* .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, */ + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { .procname = "icmp_echo_ignore_all", .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, @@ -1001,13 +1009,6 @@ static struct ctl_table ipv4_net_table[] = { .extra2 = &two, }, { - .procname = "tcp_max_tw_buckets", - .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_max_syn_backlog", .data = &init_net.ipv4.sysctl_max_syn_backlog, .maxlen = sizeof(int), @@ -1400,7 +1401,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!table) goto err_alloc; - for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) { + /* skip first entry (sysctl_max_tw_buckets) */ + for (i = 1; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) { if (table[i].data) { /* Update the variables to point into * the current struct net @@ -1415,6 +1417,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) } } + table[0].data = &net->ipv4.tcp_death_row->sysctl_max_tw_buckets; + net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (!net->ipv4.ipv4_hdr) goto err_reg; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bdf108f544a4..a03a6bfb4353 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -894,8 +894,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, return mss_now; /* Note : tcp_tso_autosize() will eventually split this later */ - new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER; - new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal); + new_size_goal = tcp_bound_to_half_wnd(tp, sk->sk_gso_max_size); /* We try hard to avoid divides here */ size_goal = tp->gso_segs * mss_now; diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index ec5550089b4d..02e8626ccb27 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1154,7 +1154,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .set_state = bbr_set_state, }; -BTF_SET_START(tcp_bbr_kfunc_ids) +BTF_SET_START(tcp_bbr_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID(func, bbr_init) @@ -1167,25 +1167,27 @@ BTF_ID(func, bbr_min_tso_segs) BTF_ID(func, bbr_set_state) #endif #endif -BTF_SET_END(tcp_bbr_kfunc_ids) +BTF_SET_END(tcp_bbr_check_kfunc_ids) -static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set); +static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &tcp_bbr_check_kfunc_ids, +}; static int __init bbr_register(void) { int ret; BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); - ret = tcp_register_congestion_control(&tcp_bbr_cong_ops); - if (ret) + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set); + if (ret < 0) return ret; - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); - return 0; + return tcp_register_congestion_control(&tcp_bbr_cong_ops); } static void __exit bbr_unregister(void) { - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); tcp_unregister_congestion_control(&tcp_bbr_cong_ops); } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index e07837e23b3f..24d562dd6225 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .name = "cubic", }; -BTF_SET_START(tcp_cubic_kfunc_ids) +BTF_SET_START(tcp_cubic_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID(func, cubictcp_init) @@ -496,9 +496,12 @@ BTF_ID(func, cubictcp_cwnd_event) BTF_ID(func, cubictcp_acked) #endif #endif -BTF_SET_END(tcp_cubic_kfunc_ids) +BTF_SET_END(tcp_cubic_check_kfunc_ids) -static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set); +static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &tcp_cubic_check_kfunc_ids, +}; static int __init cubictcp_register(void) { @@ -534,16 +537,14 @@ static int __init cubictcp_register(void) /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); - ret = tcp_register_congestion_control(&cubictcp); - if (ret) + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set); + if (ret < 0) return ret; - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); - return 0; + return tcp_register_congestion_control(&cubictcp); } static void __exit cubictcp_unregister(void) { - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); tcp_unregister_congestion_control(&cubictcp); } diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 0d7ab3cc7b61..1943a6630341 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -238,7 +238,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { .name = "dctcp-reno", }; -BTF_SET_START(tcp_dctcp_kfunc_ids) +BTF_SET_START(tcp_dctcp_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID(func, dctcp_init) @@ -249,25 +249,27 @@ BTF_ID(func, dctcp_cwnd_undo) BTF_ID(func, dctcp_state) #endif #endif -BTF_SET_END(tcp_dctcp_kfunc_ids) +BTF_SET_END(tcp_dctcp_check_kfunc_ids) -static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set); +static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &tcp_dctcp_check_kfunc_ids, +}; static int __init dctcp_register(void) { int ret; BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE); - ret = tcp_register_congestion_control(&dctcp); - if (ret) + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set); + if (ret < 0) return ret; - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); - return 0; + return tcp_register_congestion_control(&dctcp); } static void __exit dctcp_unregister(void) { - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); tcp_unregister_congestion_control(&dctcp); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bfe4112e000c..af94a6d22a9d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6725,6 +6725,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, ireq->ireq_state = TCP_NEW_SYN_RECV; write_pnet(&ireq->ireq_net, sock_net(sk_listener)); ireq->ireq_family = sk_listener->sk_family; + req->timeout = TCP_TIMEOUT_INIT; } return req; @@ -6941,9 +6942,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, sock_put(fastopen_sk); } else { tcp_rsk(req)->tfo_listener = false; - if (!want_cookie) - inet_csk_reqsk_queue_hash_add(sk, req, - tcp_timeout_init((struct sock *)req)); + if (!want_cookie) { + req->timeout = tcp_timeout_init((struct sock *)req); + inet_csk_reqsk_queue_hash_add(sk, req, req->timeout); + } af_ops->send_synack(sk, dst, &fl, req, &foc, !want_cookie ? TCP_SYNACK_NORMAL : TCP_SYNACK_COOKIE, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fec656f5a39e..6873f46fc8ba 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -91,6 +91,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); +static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk); + static u32 tcp_v4_init_seq(const struct sk_buff *skb) { return secure_tcp_seq(ip_hdr(skb)->daddr, @@ -206,7 +208,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct rtable *rt; int err; struct ip_options_rcu *inet_opt; - struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; + struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row; if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -810,7 +812,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.tos = ip_hdr(skb)->tos; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); + ctl_sk = this_cpu_read(ipv4_tcp_sk); + sock_net_set(ctl_sk, net); if (sk) { ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; @@ -825,6 +828,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) transmit_time); ctl_sk->sk_mark = 0; + sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); local_bh_enable(); @@ -908,7 +912,8 @@ static void tcp_v4_send_ack(const struct sock *sk, arg.tos = tos; arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); + ctl_sk = this_cpu_read(ipv4_tcp_sk); + sock_net_set(ctl_sk, net); ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? @@ -921,6 +926,7 @@ static void tcp_v4_send_ack(const struct sock *sk, transmit_time); ctl_sk->sk_mark = 0; + sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); local_bh_enable(); } @@ -3111,41 +3117,18 @@ EXPORT_SYMBOL(tcp_prot); static void __net_exit tcp_sk_exit(struct net *net) { - int cpu; + struct inet_timewait_death_row *tcp_death_row = net->ipv4.tcp_death_row; if (net->ipv4.tcp_congestion_control) bpf_module_put(net->ipv4.tcp_congestion_control, net->ipv4.tcp_congestion_control->owner); - - for_each_possible_cpu(cpu) - inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); - free_percpu(net->ipv4.tcp_sk); + if (refcount_dec_and_test(&tcp_death_row->tw_refcount)) + kfree(tcp_death_row); } static int __net_init tcp_sk_init(struct net *net) { - int res, cpu, cnt; - - net->ipv4.tcp_sk = alloc_percpu(struct sock *); - if (!net->ipv4.tcp_sk) - return -ENOMEM; - - for_each_possible_cpu(cpu) { - struct sock *sk; - - res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, - IPPROTO_TCP, net); - if (res) - goto fail; - sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); - - /* Please enforce IP_DF and IPID==0 for RST and - * ACK sent in SYN-RECV and TIME-WAIT state. - */ - inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; - - *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; - } + int cnt; net->ipv4.sysctl_tcp_ecn = 2; net->ipv4.sysctl_tcp_ecn_fallback = 1; @@ -3172,9 +3155,13 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_tw_reuse = 2; net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1; + net->ipv4.tcp_death_row = kzalloc(sizeof(struct inet_timewait_death_row), GFP_KERNEL); + if (!net->ipv4.tcp_death_row) + return -ENOMEM; + refcount_set(&net->ipv4.tcp_death_row->tw_refcount, 1); cnt = tcp_hashinfo.ehash_mask + 1; - net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2; - net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; + net->ipv4.tcp_death_row->sysctl_max_tw_buckets = cnt / 2; + net->ipv4.tcp_death_row->hashinfo = &tcp_hashinfo; net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128); net->ipv4.sysctl_tcp_sack = 1; @@ -3229,18 +3216,12 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.tcp_congestion_control = &tcp_reno; return 0; -fail: - tcp_sk_exit(net); - - return res; } static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { struct net *net; - inet_twsk_purge(&tcp_hashinfo, AF_INET); - list_for_each_entry(net, net_exit_list, exit_list) tcp_fastopen_ctx_destroy(net); } @@ -3326,6 +3307,24 @@ static void __init bpf_iter_register(void) void __init tcp_v4_init(void) { + int cpu, res; + + for_each_possible_cpu(cpu) { + struct sock *sk; + + res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, + IPPROTO_TCP, &init_net); + if (res) + panic("Failed to create the TCP control socket.\n"); + sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); + + /* Please enforce IP_DF and IPID==0 for RST and + * ACK sent in SYN-RECV and TIME-WAIT state. + */ + inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; + + per_cpu(ipv4_tcp_sk, cpu) = sk; + } if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7c2d3ac2363a..6366df7aaf2a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -248,7 +248,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); struct inet_timewait_sock *tw; - struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; + struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row; tw = inet_twsk_alloc(sk, tcp_death_row, state); @@ -583,7 +583,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * it can be estimated (approximately) * from another data. */ - tmp_opt.ts_recent_stamp = ktime_get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout); + tmp_opt.ts_recent_stamp = ktime_get_seconds() - reqsk_timeout(req, TCP_RTO_MAX) / HZ; paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } } @@ -622,8 +622,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, !inet_rtx_syn_ack(sk, req)) { unsigned long expires = jiffies; - expires += min(TCP_TIMEOUT_INIT << req->num_timeout, - TCP_RTO_MAX); + expires += reqsk_timeout(req, TCP_RTO_MAX); if (!fastopen) mod_timer_pending(&req->rsk_timer, expires); else diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5079832af5c1..e76bf1e9251e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1960,7 +1960,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, bytes = min_t(unsigned long, sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), - sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); + sk->sk_gso_max_size); /* Goal is to send at least one packet per ms, * not one big TSO packet every 100 ms. @@ -4092,7 +4092,9 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) struct flowi fl; int res; - tcp_rsk(req)->txhash = net_tx_rndhash(); + /* Paired with WRITE_ONCE() in sock_setsockopt() */ + if (READ_ONCE(sk->sk_txrehash) == SOCK_TXREHASH_ENABLED) + tcp_rsk(req)->txhash = net_tx_rndhash(); res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL, NULL); if (!res) { diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 77e34aec7e82..658d5eabaf7e 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -1344,14 +1344,14 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, return opt2; } -struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, - struct ipv6_txoptions *opt) +struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space, + struct ipv6_txoptions *opt) { /* * ignore the dest before srcrt unless srcrt is being included. * --yoshfuji */ - if (opt && opt->dst0opt && !opt->srcrt) { + if (opt->dst0opt && !opt->srcrt) { if (opt_space != opt) { memcpy(opt_space, opt, sizeof(*opt_space)); opt = opt_space; @@ -1362,7 +1362,7 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, return opt; } -EXPORT_SYMBOL_GPL(ipv6_fixup_options); +EXPORT_SYMBOL_GPL(__ipv6_fixup_options); /** * fl6_update_dst - update flowi destination address with info given diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 96c5cc0f30ce..e6b978ea0e87 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -69,17 +69,7 @@ #include <linux/uaccess.h> -/* - * The ICMP socket(s). This is the most convenient way to flow control - * our ICMP output as well as maintain a clean interface throughout - * all layers. All Socketless IP sends will soon be gone. - * - * On SMP we have one ICMP socket per-cpu. - */ -static struct sock *icmpv6_sk(struct net *net) -{ - return this_cpu_read(*net->ipv6.icmp_sk); -} +static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk); static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) @@ -110,11 +100,11 @@ static const struct inet6_protocol icmpv6_protocol = { }; /* Called with BH disabled */ -static __inline__ struct sock *icmpv6_xmit_lock(struct net *net) +static struct sock *icmpv6_xmit_lock(struct net *net) { struct sock *sk; - sk = icmpv6_sk(net); + sk = this_cpu_read(ipv6_icmp_sk); if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path (f.e. SIT or * ip6ip6 tunnel) signals dst_link_failure() for an @@ -122,11 +112,13 @@ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net) */ return NULL; } + sock_net_set(sk, net); return sk; } -static __inline__ void icmpv6_xmit_unlock(struct sock *sk) +static void icmpv6_xmit_unlock(struct sock *sk) { + sock_net_set(sk, &init_net); spin_unlock(&sk->sk_lock.slock); } @@ -1034,59 +1026,27 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); } -static void __net_exit icmpv6_sk_exit(struct net *net) -{ - int i; - - for_each_possible_cpu(i) - inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i)); - free_percpu(net->ipv6.icmp_sk); -} - -static int __net_init icmpv6_sk_init(struct net *net) +int __init icmpv6_init(void) { struct sock *sk; int err, i; - net->ipv6.icmp_sk = alloc_percpu(struct sock *); - if (!net->ipv6.icmp_sk) - return -ENOMEM; - for_each_possible_cpu(i) { err = inet_ctl_sock_create(&sk, PF_INET6, - SOCK_RAW, IPPROTO_ICMPV6, net); + SOCK_RAW, IPPROTO_ICMPV6, &init_net); if (err < 0) { pr_err("Failed to initialize the ICMP6 control socket (err %d)\n", err); - goto fail; + return err; } - *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk; + per_cpu(ipv6_icmp_sk, i) = sk; /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); } - return 0; - - fail: - icmpv6_sk_exit(net); - return err; -} - -static struct pernet_operations icmpv6_sk_ops = { - .init = icmpv6_sk_init, - .exit = icmpv6_sk_exit, -}; - -int __init icmpv6_init(void) -{ - int err; - - err = register_pernet_subsys(&icmpv6_sk_ops); - if (err < 0) - return err; err = -EAGAIN; if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) @@ -1101,14 +1061,12 @@ sender_reg_err: inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); fail: pr_err("Failed to register ICMP6 protocol\n"); - unregister_pernet_subsys(&icmpv6_sk_ops); return err; } void icmpv6_cleanup(void) { inet6_unregister_icmp_sender(icmp6_send); - unregister_pernet_subsys(&icmpv6_sk_ops); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index b29e9ba5e113..d37a79a8554e 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -249,7 +249,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, if ((first_word & htonl(0xF00FFFFF)) || !ipv6_addr_equal(&iph->saddr, &iph2->saddr) || !ipv6_addr_equal(&iph->daddr, &iph2->daddr) || - *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) { + iph->nexthdr != iph2->nexthdr) { not_same_flow: NAPI_GRO_CB(p)->same_flow = 0; continue; @@ -260,7 +260,8 @@ not_same_flow: goto not_same_flow; } /* flush if Traffic Class fields are different */ - NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); + NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) | + (__force __be32)(iph->hop_limit ^ iph2->hop_limit)); NAPI_GRO_CB(p)->flush |= flush; /* If the previous IP ID value was based on an atomic diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2995f8d89e7e..0c6c971ce0a5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1350,11 +1350,16 @@ static void ip6_append_data_mtu(unsigned int *mtu, static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, - struct rt6_info *rt, struct flowi6 *fl6) + struct rt6_info *rt) { struct ipv6_pinfo *np = inet6_sk(sk); unsigned int mtu; - struct ipv6_txoptions *opt = ipc6->opt; + struct ipv6_txoptions *nopt, *opt = ipc6->opt; + + /* callers pass dst together with a reference, set it first so + * ip6_cork_release() can put it down even in case of an error. + */ + cork->base.dst = &rt->dst; /* * setup for corking @@ -1363,39 +1368,32 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (WARN_ON(v6_cork->opt)) return -EINVAL; - v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); - if (unlikely(!v6_cork->opt)) + nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); + if (unlikely(!nopt)) return -ENOBUFS; - v6_cork->opt->tot_len = sizeof(*opt); - v6_cork->opt->opt_flen = opt->opt_flen; - v6_cork->opt->opt_nflen = opt->opt_nflen; + nopt->tot_len = sizeof(*opt); + nopt->opt_flen = opt->opt_flen; + nopt->opt_nflen = opt->opt_nflen; - v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, - sk->sk_allocation); - if (opt->dst0opt && !v6_cork->opt->dst0opt) + nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); + if (opt->dst0opt && !nopt->dst0opt) return -ENOBUFS; - v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, - sk->sk_allocation); - if (opt->dst1opt && !v6_cork->opt->dst1opt) + nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); + if (opt->dst1opt && !nopt->dst1opt) return -ENOBUFS; - v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, - sk->sk_allocation); - if (opt->hopopt && !v6_cork->opt->hopopt) + nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); + if (opt->hopopt && !nopt->hopopt) return -ENOBUFS; - v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, - sk->sk_allocation); - if (opt->srcrt && !v6_cork->opt->srcrt) + nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); + if (opt->srcrt && !nopt->srcrt) return -ENOBUFS; /* need source address above miyazawa*/ } - dst_hold(&rt->dst); - cork->base.dst = &rt->dst; - cork->fl.u.ip6 = *fl6; v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) @@ -1426,9 +1424,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, } static int __ip6_append_data(struct sock *sk, - struct flowi6 *fl6, struct sk_buff_head *queue, - struct inet_cork *cork, + struct inet_cork_full *cork_full, struct inet6_cork *v6_cork, struct page_frag *pfrag, int getfrag(void *from, char *to, int offset, @@ -1437,6 +1434,8 @@ static int __ip6_append_data(struct sock *sk, unsigned int flags, struct ipcm6_cookie *ipc6) { struct sk_buff *skb, *skb_prev = NULL; + struct inet_cork *cork = &cork_full->base; + struct flowi6 *fl6 = &cork_full->fl.u.ip6; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; struct ubuf_info *uarg = NULL; int exthdrlen = 0; @@ -1788,34 +1787,46 @@ int ip6_append_data(struct sock *sk, /* * setup for corking */ + dst_hold(&rt->dst); err = ip6_setup_cork(sk, &inet->cork, &np->cork, - ipc6, rt, fl6); + ipc6, rt); if (err) return err; + inet->cork.fl.u.ip6 = *fl6; exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; } else { - fl6 = &inet->cork.fl.u.ip6; transhdrlen = 0; } - return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, + return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, &np->cork, sk_page_frag(sk), getfrag, from, length, transhdrlen, flags, ipc6); } EXPORT_SYMBOL_GPL(ip6_append_data); +static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) +{ + struct dst_entry *dst = cork->base.dst; + + cork->base.dst = NULL; + cork->base.flags &= ~IPCORK_ALLFRAG; + skb_dst_set(skb, dst); +} + static void ip6_cork_release(struct inet_cork_full *cork, struct inet6_cork *v6_cork) { if (v6_cork->opt) { - kfree(v6_cork->opt->dst0opt); - kfree(v6_cork->opt->dst1opt); - kfree(v6_cork->opt->hopopt); - kfree(v6_cork->opt->srcrt); - kfree(v6_cork->opt); + struct ipv6_txoptions *opt = v6_cork->opt; + + kfree(opt->dst0opt); + kfree(opt->dst1opt); + kfree(opt->hopopt); + kfree(opt->srcrt); + kfree(opt); v6_cork->opt = NULL; } @@ -1824,7 +1835,6 @@ static void ip6_cork_release(struct inet_cork_full *cork, cork->base.dst = NULL; cork->base.flags &= ~IPCORK_ALLFRAG; } - memset(&cork->fl, 0, sizeof(cork->fl)); } struct sk_buff *__ip6_make_skb(struct sock *sk, @@ -1834,7 +1844,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; - struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; + struct in6_addr *final_dst; struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *hdr; @@ -1864,9 +1874,9 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, /* Allow local fragmentation. */ skb->ignore_df = ip6_sk_ignore_df(sk); - - *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); + + final_dst = &fl6->daddr; if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) @@ -1886,10 +1896,9 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb->priority = sk->sk_priority; skb->mark = cork->base.mark; - skb->tstamp = cork->base.transmit_time; - skb_dst_set(skb, dst_clone(&rt->dst)); + ip6_cork_steal_dst(skb, cork); IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); @@ -1961,26 +1970,26 @@ struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - struct ipcm6_cookie *ipc6, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, - struct inet_cork_full *cork) + struct ipcm6_cookie *ipc6, struct rt6_info *rt, + unsigned int flags, struct inet_cork_full *cork) { struct inet6_cork v6_cork; struct sk_buff_head queue; int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); int err; - if (flags & MSG_PROBE) + if (flags & MSG_PROBE) { + dst_release(&rt->dst); return NULL; + } __skb_queue_head_init(&queue); cork->base.flags = 0; cork->base.addr = 0; cork->base.opt = NULL; - cork->base.dst = NULL; v6_cork.opt = NULL; - err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6); + err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt); if (err) { ip6_cork_release(cork, &v6_cork); return ERR_PTR(err); @@ -1988,7 +1997,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, if (ipc6->dontfrag < 0) ipc6->dontfrag = inet6_sk(sk)->dontfrag; - err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork, + err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, flags, ipc6); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 97ade833f58c..b47ffc81f9e5 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1121,6 +1121,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); + } else if (skb->protocol == htons(ETH_P_IP)) { + struct rtable *rt = skb_rtable(skb); + + if (rt->rt_gw_family == AF_INET6) + memcpy(&fl6->daddr, &rt->rt_gw6, sizeof(fl6->daddr)); } } else if (t->parms.proto != 0 && !(t->parms.flags & (IP6_TNL_F_USE_ORIG_TCLASS | diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 075ee8a2df3b..0c648bf07f39 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -148,6 +148,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_timewait_death_row *tcp_death_row; struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; @@ -156,7 +157,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct dst_entry *dst; int addr_type; int err; - struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; @@ -308,6 +308,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->inet_dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); + tcp_death_row = sock_net(sk)->ipv4.tcp_death_row; err = inet6_hash_connect(tcp_death_row, sk); if (err) goto late_failure; @@ -2237,15 +2238,9 @@ static void __net_exit tcpv6_net_exit(struct net *net) inet_ctl_sock_destroy(net->ipv6.tcp_sk); } -static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) -{ - inet_twsk_purge(&tcp_hashinfo, AF_INET6); -} - static struct pernet_operations tcpv6_net_ops = { .init = tcpv6_net_init, .exit = tcpv6_net_exit, - .exit_batch = tcpv6_net_exit_batch, }; int __init tcpv6_init(void) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 528b81ef19c9..c6872596b408 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1266,23 +1266,17 @@ static int udp_v6_push_pending_frames(struct sock *sk) { struct sk_buff *skb; struct udp_sock *up = udp_sk(sk); - struct flowi6 fl6; int err = 0; if (up->pending == AF_INET) return udp_push_pending_frames(sk); - /* ip6_finish_skb will release the cork, so make a copy of - * fl6 here. - */ - fl6 = inet_sk(sk)->cork.fl.u.ip6; - skb = ip6_finish_skb(sk); if (!skb) goto out; - err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base); - + err = udp_v6_send_skb(skb, &inet_sk(sk)->cork.fl.u.ip6, + &inet_sk(sk)->cork.base); out: up->len = 0; up->pending = 0; @@ -1300,7 +1294,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ipv6_txoptions *opt = NULL; struct ipv6_txoptions *opt_to_free = NULL; struct ip6_flowlabel *flowlabel = NULL; - struct flowi6 fl6; + struct inet_cork_full cork; + struct flowi6 *fl6 = &cork.fl.u.ip6; struct dst_entry *dst; struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; @@ -1363,9 +1358,6 @@ do_udp_sendmsg: } } - if (up->pending == AF_INET) - return udp_sendmsg(sk, msg, len); - /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). */ @@ -1374,6 +1366,8 @@ do_udp_sendmsg: getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; if (up->pending) { + if (up->pending == AF_INET) + return udp_sendmsg(sk, msg, len); /* * There are pending frames. * The socket lock must be held while it's corked. @@ -1391,19 +1385,19 @@ do_udp_sendmsg: } ulen += sizeof(struct udphdr); - memset(&fl6, 0, sizeof(fl6)); + memset(fl6, 0, sizeof(*fl6)); if (sin6) { if (sin6->sin6_port == 0) return -EINVAL; - fl6.fl6_dport = sin6->sin6_port; + fl6->fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (np->sndflow) { - fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); + fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } @@ -1420,24 +1414,24 @@ do_udp_sendmsg: if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) - fl6.flowi6_oif = sin6->sin6_scope_id; + fl6->flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl6.fl6_dport = inet->inet_dport; + fl6->fl6_dport = inet->inet_dport; daddr = &sk->sk_v6_daddr; - fl6.flowlabel = np->flow_label; + fl6->flowlabel = np->flow_label; connected = true; } - if (!fl6.flowi6_oif) - fl6.flowi6_oif = sk->sk_bound_dev_if; + if (!fl6->flowi6_oif) + fl6->flowi6_oif = sk->sk_bound_dev_if; - if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl6->flowi6_oif) + fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl6.flowi6_uid = sk->sk_uid; + fl6->flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { opt = &opt_space; @@ -1447,14 +1441,14 @@ do_udp_sendmsg: err = udp_cmsg_send(sk, msg, &ipc6.gso_size); if (err > 0) - err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6, &ipc6); if (err < 0) { fl6_sock_release(flowlabel); return err; } - if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); + if ((fl6->flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } @@ -1471,16 +1465,17 @@ do_udp_sendmsg: opt = ipv6_fixup_options(&opt_space, opt); ipc6.opt = opt; - fl6.flowi6_proto = sk->sk_protocol; - fl6.flowi6_mark = ipc6.sockc.mark; - fl6.daddr = *daddr; - if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) - fl6.saddr = np->saddr; - fl6.fl6_sport = inet->inet_sport; + fl6->flowi6_proto = sk->sk_protocol; + fl6->flowi6_mark = ipc6.sockc.mark; + fl6->daddr = *daddr; + if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr)) + fl6->saddr = np->saddr; + fl6->fl6_sport = inet->inet_sport; if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, - (struct sockaddr *)sin6, &fl6.saddr); + (struct sockaddr *)sin6, + &fl6->saddr); if (err) goto out_no_dst; if (sin6) { @@ -1496,32 +1491,32 @@ do_udp_sendmsg: err = -EINVAL; goto out_no_dst; } - fl6.fl6_dport = sin6->sin6_port; - fl6.daddr = sin6->sin6_addr; + fl6->fl6_dport = sin6->sin6_port; + fl6->daddr = sin6->sin6_addr; } } - if (ipv6_addr_any(&fl6.daddr)) - fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + if (ipv6_addr_any(&fl6->daddr)) + fl6->daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ - final_p = fl6_update_dst(&fl6, opt, &final); + final_p = fl6_update_dst(fl6, opt, &final); if (final_p) connected = false; - if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) { - fl6.flowi6_oif = np->mcast_oif; + if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) { + fl6->flowi6_oif = np->mcast_oif; connected = false; - } else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + } else if (!fl6->flowi6_oif) + fl6->flowi6_oif = np->ucast_oif; - security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); if (ipc6.tclass < 0) ipc6.tclass = np->tclass; - fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); + fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel); - dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, connected); + dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -1529,7 +1524,7 @@ do_udp_sendmsg: } if (ipc6.hlimit < 0) - ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6, dst); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; @@ -1537,17 +1532,17 @@ back_from_confirm: /* Lockless fast path for the non-corking case */ if (!corkreq) { - struct inet_cork_full cork; struct sk_buff *skb; skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, - &fl6, (struct rt6_info *)dst, + (struct rt6_info *)dst, msg->msg_flags, &cork); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) - err = udp_v6_send_skb(skb, &fl6, &cork.base); - goto out; + err = udp_v6_send_skb(skb, fl6, &cork.base); + /* ip6_make_skb steals dst reference */ + goto out_no_dst; } lock_sock(sk); @@ -1568,7 +1563,7 @@ do_append_data: ipc6.dontfrag = np->dontfrag; up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), - &ipc6, &fl6, (struct rt6_info *)dst, + &ipc6, fl6, (struct rt6_info *)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); @@ -1603,7 +1598,7 @@ out_no_dst: do_confirm: if (msg->msg_flags & MSG_PROBE) - dst_confirm_neigh(dst, &fl6.daddr); + dst_confirm_neigh(dst, &fl6->daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 645dd984fef0..3e82ac24d548 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -336,6 +336,8 @@ static void mptcp_parse_option(const struct sk_buff *skb, flags = *ptr++; mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT; mp_opt->reset_reason = *ptr; + pr_debug("MP_RST: transient=%u reason=%u", + mp_opt->reset_transient, mp_opt->reset_reason); break; case MPTCPOPT_MP_FAIL: @@ -1264,22 +1266,30 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext) void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { - if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) { - const struct sock *ssk = (const struct sock *)tp; - struct mptcp_subflow_context *subflow; - - subflow = mptcp_subflow_ctx(ssk); - subflow->send_mp_fail = 0; - - *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL, - TCPOLEN_MPTCP_FAIL, - 0, 0); - put_unaligned_be64(opts->fail_seq, ptr); - ptr += 2; - } - - /* DSS, MPC, MPJ, ADD_ADDR, FASTCLOSE and RST are mutually exclusive, - * see mptcp_established_options*() + const struct sock *ssk = (const struct sock *)tp; + struct mptcp_subflow_context *subflow; + + /* Which options can be used together? + * + * X: mutually exclusive + * O: often used together + * C: can be used together in some cases + * P: could be used together but we prefer not to (optimisations) + * + * Opt: | MPC | MPJ | DSS | ADD | RM | PRIO | FAIL | FC | + * ------|------|------|------|------|------|------|------|------| + * MPC |------|------|------|------|------|------|------|------| + * MPJ | X |------|------|------|------|------|------|------| + * DSS | X | X |------|------|------|------|------|------| + * ADD | X | X | P |------|------|------|------|------| + * RM | C | C | C | P |------|------|------|------| + * PRIO | X | C | C | C | C |------|------|------| + * FAIL | X | X | C | X | X | X |------|------| + * FC | X | X | X | X | X | X | X |------| + * RST | X | X | X | X | X | X | O | O | + * ------|------|------|------|------|------|------|------|------| + * + * The same applies in mptcp_established_options() function. */ if (likely(OPTION_MPTCP_DSS & opts->suboptions)) { struct mptcp_ext *mpext = &opts->ext_copy; @@ -1336,6 +1346,10 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, } ptr += 1; } + + /* We might need to add MP_FAIL options in rare cases */ + if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) + goto mp_fail; } else if (OPTIONS_MPTCP_MPC & opts->suboptions) { u8 len, flag = MPTCP_CAP_HMAC_SHA256; @@ -1479,6 +1493,21 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, if (OPTION_MPTCP_RST & opts->suboptions) goto mp_rst; return; + } else if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) { +mp_fail: + /* MP_FAIL is mutually exclusive with others except RST */ + subflow = mptcp_subflow_ctx(ssk); + subflow->send_mp_fail = 0; + + *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL, + TCPOLEN_MPTCP_FAIL, + 0, 0); + put_unaligned_be64(opts->fail_seq, ptr); + ptr += 2; + + if (OPTION_MPTCP_RST & opts->suboptions) + goto mp_rst; + return; } else if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) { mp_rst: *ptr++ = mptcp_option(MPTCPOPT_RST, @@ -1489,9 +1518,6 @@ mp_rst: } if (OPTION_MPTCP_PRIO & opts->suboptions) { - const struct sock *ssk = (const struct sock *)tp; - struct mptcp_subflow_context *subflow; - subflow = mptcp_subflow_ctx(ssk); subflow->send_mp_prio = 0; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 782b1d452269..d47795748ad7 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1728,9 +1728,20 @@ fail: return -EMSGSIZE; } -static int mptcp_nl_addr_backup(struct net *net, - struct mptcp_addr_info *addr, - u8 bkup) +static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) +{ + struct mptcp_rm_list list = { .nr = 0 }; + + list.ids[list.nr++] = addr->id; + + mptcp_pm_nl_rm_subflow_received(msk, &list); + mptcp_pm_create_subflow_or_signal_addr(msk); +} + +static int mptcp_nl_set_flags(struct net *net, + struct mptcp_addr_info *addr, + u8 bkup, u8 changed) { long s_slot = 0, s_num = 0; struct mptcp_sock *msk; @@ -1744,7 +1755,10 @@ static int mptcp_nl_addr_backup(struct net *net, lock_sock(sk); spin_lock_bh(&msk->pm.lock); - ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup); + if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) + ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup); + if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) + mptcp_pm_nl_fullmesh(msk, addr); spin_unlock_bh(&msk->pm.lock); release_sock(sk); @@ -1761,6 +1775,8 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry; struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | + MPTCP_PM_ADDR_FLAG_FULLMESH; struct net *net = sock_net(skb->sk); u8 bkup = 0, lookup_by_id = 0; int ret; @@ -1783,15 +1799,18 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&pernet->lock); return -EINVAL; } + if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && + (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; + } - if (bkup) - entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; - else - entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + changed = (addr.flags ^ entry->flags) & mask; + entry->flags = (entry->flags & ~mask) | (addr.flags & mask); addr = *entry; spin_unlock_bh(&pernet->lock); - mptcp_nl_addr_backup(net, &addr.addr, bkup); + mptcp_nl_set_flags(net, &addr.addr, bkup, changed); return 0; } diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a135b1a46014..238b6a620e88 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -14,6 +14,11 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o +ifeq ($(CONFIG_NF_CONNTRACK),m) +nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o +else ifeq ($(CONFIG_NF_CONNTRACK),y) +nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o +endif obj-$(CONFIG_NETFILTER) = netfilter.o diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c new file mode 100644 index 000000000000..8ad3f52579f3 --- /dev/null +++ b/net/netfilter/nf_conntrack_bpf.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable Conntrack Helpers for XDP and TC-BPF hook + * + * These are called from the XDP and SCHED_CLS BPF programs. Note that it is + * allowed to break compatibility for these functions since the interface they + * are exposed through to BPF programs is explicitly unstable. + */ + +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/types.h> +#include <linux/btf_ids.h> +#include <linux/net_namespace.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> + +/* bpf_ct_opts - Options for CT lookup helpers + * + * Members: + * @netns_id - Specify the network namespace for lookup + * Values: + * BPF_F_CURRENT_NETNS (-1) + * Use namespace associated with ctx (xdp_md, __sk_buff) + * [0, S32_MAX] + * Network Namespace ID + * @error - Out parameter, set for any errors encountered + * Values: + * -EINVAL - Passed NULL for bpf_tuple pointer + * -EINVAL - opts->reserved is not 0 + * -EINVAL - netns_id is less than -1 + * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12) + * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP + * -ENONET - No network namespace found for netns_id + * -ENOENT - Conntrack lookup could not find entry for tuple + * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4) + * or sizeof(tuple->ipv6) + * @l4proto - Layer 4 protocol + * Values: + * IPPROTO_TCP, IPPROTO_UDP + * @reserved - Reserved member, will be reused for more options in future + * Values: + * 0 + */ +struct bpf_ct_opts { + s32 netns_id; + s32 error; + u8 l4proto; + u8 reserved[3]; +}; + +enum { + NF_BPF_CT_OPTS_SZ = 12, +}; + +static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, + struct bpf_sock_tuple *bpf_tuple, + u32 tuple_len, u8 protonum, + s32 netns_id) +{ + struct nf_conntrack_tuple_hash *hash; + struct nf_conntrack_tuple tuple; + + if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP)) + return ERR_PTR(-EPROTO); + if (unlikely(netns_id < BPF_F_CURRENT_NETNS)) + return ERR_PTR(-EINVAL); + + memset(&tuple, 0, sizeof(tuple)); + switch (tuple_len) { + case sizeof(bpf_tuple->ipv4): + tuple.src.l3num = AF_INET; + tuple.src.u3.ip = bpf_tuple->ipv4.saddr; + tuple.src.u.tcp.port = bpf_tuple->ipv4.sport; + tuple.dst.u3.ip = bpf_tuple->ipv4.daddr; + tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport; + break; + case sizeof(bpf_tuple->ipv6): + tuple.src.l3num = AF_INET6; + memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr)); + tuple.src.u.tcp.port = bpf_tuple->ipv6.sport; + memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr)); + tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport; + break; + default: + return ERR_PTR(-EAFNOSUPPORT); + } + + tuple.dst.protonum = protonum; + + if (netns_id >= 0) { + net = get_net_ns_by_id(net, netns_id); + if (unlikely(!net)) + return ERR_PTR(-ENONET); + } + + hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple); + if (netns_id >= 0) + put_net(net); + if (!hash) + return ERR_PTR(-ENOENT); + return nf_ct_tuplehash_to_ctrack(hash); +} + +__diag_push(); +__diag_ignore(GCC, 8, "-Wmissing-prototypes", + "Global functions as their definitions will be in nf_conntrack BTF"); + +/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a + * reference to it + * + * Parameters: + * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for lookup (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn * +bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx; + struct net *caller_net; + struct nf_conn *nfct; + + BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); + + if (!opts) + return NULL; + if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || + opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + caller_net = dev_net(ctx->rxq->dev); + nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, + opts->netns_id); + if (IS_ERR(nfct)) { + opts->error = PTR_ERR(nfct); + return NULL; + } + return nfct; +} + +/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a + * reference to it + * + * Parameters: + * @skb_ctx - Pointer to ctx (__sk_buff) in TC program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for lookup (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn * +bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct net *caller_net; + struct nf_conn *nfct; + + BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); + + if (!opts) + return NULL; + if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || + opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); + nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, + opts->netns_id); + if (IS_ERR(nfct)) { + opts->error = PTR_ERR(nfct); + return NULL; + } + return nfct; +} + +/* bpf_ct_release - Release acquired nf_conn object + * + * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects + * the program if any references remain in the program in all of the explored + * states. + * + * Parameters: + * @nf_conn - Pointer to referenced nf_conn object, obtained using + * bpf_xdp_ct_lookup or bpf_skb_ct_lookup. + */ +void bpf_ct_release(struct nf_conn *nfct) +{ + if (!nfct) + return; + nf_ct_put(nfct); +} + +__diag_pop() + +BTF_SET_START(nf_ct_xdp_check_kfunc_ids) +BTF_ID(func, bpf_xdp_ct_lookup) +BTF_ID(func, bpf_ct_release) +BTF_SET_END(nf_ct_xdp_check_kfunc_ids) + +BTF_SET_START(nf_ct_tc_check_kfunc_ids) +BTF_ID(func, bpf_skb_ct_lookup) +BTF_ID(func, bpf_ct_release) +BTF_SET_END(nf_ct_tc_check_kfunc_ids) + +BTF_SET_START(nf_ct_acquire_kfunc_ids) +BTF_ID(func, bpf_xdp_ct_lookup) +BTF_ID(func, bpf_skb_ct_lookup) +BTF_SET_END(nf_ct_acquire_kfunc_ids) + +BTF_SET_START(nf_ct_release_kfunc_ids) +BTF_ID(func, bpf_ct_release) +BTF_SET_END(nf_ct_release_kfunc_ids) + +/* Both sets are identical */ +#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids + +static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &nf_ct_xdp_check_kfunc_ids, + .acquire_set = &nf_ct_acquire_kfunc_ids, + .release_set = &nf_ct_release_kfunc_ids, + .ret_null_set = &nf_ct_ret_null_kfunc_ids, +}; + +static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &nf_ct_tc_check_kfunc_ids, + .acquire_set = &nf_ct_acquire_kfunc_ids, + .release_set = &nf_ct_release_kfunc_ids, + .ret_null_set = &nf_ct_ret_null_kfunc_ids, +}; + +int register_nf_conntrack_bpf(void) +{ + int ret; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set); +} diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index d6aa5b47031e..d38d689de23c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -34,6 +34,7 @@ #include <linux/rculist_nulls.h> #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_bpf.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> @@ -2750,8 +2751,15 @@ int nf_conntrack_init_start(void) conntrack_gc_work_init(&conntrack_gc_work); queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); + ret = register_nf_conntrack_bpf(); + if (ret < 0) + goto err_kfunc; + return 0; +err_kfunc: + cancel_delayed_work_sync(&conntrack_gc_work.dwork); + nf_conntrack_proto_fini(); err_proto: nf_conntrack_seqadj_fini(); err_seqadj: diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 8c89d0b0ca18..00b2e9deabb0 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2626,8 +2626,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, sk->sk_state != SMC_CLOSED) { if (!val) { SMC_STAT_INC(smc, cork_cnt); - mod_delayed_work(smc->conn.lgr->tx_wq, - &smc->conn.tx_work, 0); + smc_tx_pending(&smc->conn); + cancel_delayed_work(&smc->conn.tx_work); } } break; @@ -2765,8 +2765,10 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page, rc = kernel_sendpage(smc->clcsock, page, offset, size, flags); } else { + lock_sock(sk); + rc = smc_tx_sendpage(smc, page, offset, size, flags); + release_sock(sk); SMC_STAT_INC(smc, sendpage_cnt); - rc = sock_no_sendpage(sock, page, offset, size, flags); } out: diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index be241d53020f..a96ce162825e 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -31,7 +31,6 @@ #include "smc_tracepoint.h" #define SMC_TX_WORK_DELAY 0 -#define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */ /***************************** sndbuf producer *******************************/ @@ -236,16 +235,15 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) */ if ((msg->msg_flags & MSG_OOB) && !send_remaining) conn->urg_tx_pend = true; - if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) && - (atomic_read(&conn->sndbuf_space) > - (conn->sndbuf_desc->len >> 1))) - /* for a corked socket defer the RDMA writes if there - * is still sufficient sndbuf_space available + if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc) || + msg->msg_flags & MSG_SENDPAGE_NOTLAST) && + (atomic_read(&conn->sndbuf_space))) + /* for a corked socket defer the RDMA writes if + * sndbuf_space is still available. The applications + * should known how/when to uncork it. */ - queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, - SMC_TX_CORK_DELAY); - else - smc_tx_sndbuf_nonempty(conn); + continue; + smc_tx_sndbuf_nonempty(conn); trace_smc_tx_sendmsg(smc, copylen); } /* while (msg_data_left(msg)) */ @@ -260,6 +258,22 @@ out_err: return rc; } +int smc_tx_sendpage(struct smc_sock *smc, struct page *page, int offset, + size_t size, int flags) +{ + struct msghdr msg = {.msg_flags = flags}; + char *kaddr = kmap(page); + struct kvec iov; + int rc; + + iov.iov_base = kaddr + offset; + iov.iov_len = size; + iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size); + rc = smc_tx_sendmsg(smc, &msg, size); + kunmap(page); + return rc; +} + /***************************** sndbuf consumer *******************************/ /* sndbuf consumer: actual data transfer of one target chunk with ISM write */ @@ -597,27 +611,32 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) return rc; } -/* Wakeup sndbuf consumers from process context - * since there is more data to transmit - */ -void smc_tx_work(struct work_struct *work) +void smc_tx_pending(struct smc_connection *conn) { - struct smc_connection *conn = container_of(to_delayed_work(work), - struct smc_connection, - tx_work); struct smc_sock *smc = container_of(conn, struct smc_sock, conn); int rc; - lock_sock(&smc->sk); if (smc->sk.sk_err) - goto out; + return; rc = smc_tx_sndbuf_nonempty(conn); if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && !atomic_read(&conn->bytes_to_rcv)) conn->local_rx_ctrl.prod_flags.write_blocked = 0; +} + +/* Wakeup sndbuf consumers from process context + * since there is more data to transmit + */ +void smc_tx_work(struct work_struct *work) +{ + struct smc_connection *conn = container_of(to_delayed_work(work), + struct smc_connection, + tx_work); + struct smc_sock *smc = container_of(conn, struct smc_sock, conn); -out: + lock_sock(&smc->sk); + smc_tx_pending(conn); release_sock(&smc->sk); } diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h index 07e6ad76224a..34b578498b1f 100644 --- a/net/smc/smc_tx.h +++ b/net/smc/smc_tx.h @@ -27,9 +27,12 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn) return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep); } +void smc_tx_pending(struct smc_connection *conn); void smc_tx_work(struct work_struct *work); void smc_tx_init(struct smc_sock *smc); int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len); +int smc_tx_sendpage(struct smc_sock *smc, struct page *page, int offset, + size_t size, int flags); int smc_tx_sndbuf_nonempty(struct smc_connection *conn); void smc_tx_sndbuf_nonfull(struct smc_sock *smc); void smc_tx_consumer_update(struct smc_connection *conn, bool force); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 5f42aa5fc612..8eb7e8544815 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -72,7 +72,8 @@ struct gss_auth { struct gss_api_mech *mech; enum rpc_gss_svc service; struct rpc_clnt *client; - struct net *net; + struct net *net; + netns_tracker ns_tracker; /* * There are two upcall pipes; dentry[1], named "gssd", is used * for the new text-based upcall; dentry[0] is named after the @@ -1013,7 +1014,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) goto err_free; } gss_auth->client = clnt; - gss_auth->net = get_net(rpc_net_ns(clnt)); + gss_auth->net = get_net_track(rpc_net_ns(clnt), &gss_auth->ns_tracker, + GFP_KERNEL); err = -EINVAL; gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); if (!gss_auth->mech) @@ -1068,7 +1070,7 @@ err_destroy_credcache: err_put_mech: gss_mech_put(gss_auth->mech); err_put_net: - put_net(gss_auth->net); + put_net_track(gss_auth->net, &gss_auth->ns_tracker); err_free: kfree(gss_auth->target_name); kfree(gss_auth); @@ -1084,7 +1086,7 @@ gss_free(struct gss_auth *gss_auth) gss_pipe_free(gss_auth->gss_pipe[0]); gss_pipe_free(gss_auth->gss_pipe[1]); gss_mech_put(gss_auth->mech); - put_net(gss_auth->net); + put_net_track(gss_auth->net, &gss_auth->ns_tracker); kfree(gss_auth->target_name); kfree(gss_auth); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b21ad7994147..db878e833b67 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -162,7 +162,7 @@ static void svc_xprt_free(struct kref *kref) if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) svcauth_unix_info_release(xprt); put_cred(xprt->xpt_cred); - put_net(xprt->xpt_net); + put_net_track(xprt->xpt_net, &xprt->ns_tracker); /* See comment on corresponding get in xs_setup_bc_tcp(): */ if (xprt->xpt_bc_xprt) xprt_put(xprt->xpt_bc_xprt); @@ -198,7 +198,7 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, mutex_init(&xprt->xpt_mutex); spin_lock_init(&xprt->xpt_lock); set_bit(XPT_BUSY, &xprt->xpt_flags); - xprt->xpt_net = get_net(net); + xprt->xpt_net = get_net_track(net, &xprt->ns_tracker, GFP_ATOMIC); strcpy(xprt->xpt_remotebuf, "uninitialized"); } EXPORT_SYMBOL_GPL(svc_xprt_init); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a02de2bddb28..5af484d6ba5e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1835,7 +1835,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc); void xprt_free(struct rpc_xprt *xprt) { - put_net(xprt->xprt_net); + put_net_track(xprt->xprt_net, &xprt->ns_tracker); xprt_free_all_slots(xprt); xprt_free_id(xprt); rpc_sysfs_xprt_destroy(xprt); @@ -2027,7 +2027,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) xprt_init_xid(xprt); - xprt->xprt_net = get_net(net); + xprt->xprt_net = get_net_track(net, &xprt->ns_tracker, GFP_KERNEL); } /** diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 64ae4c4c44f8..c5eec16213d7 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -226,14 +226,6 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w, m->hdr[w] |= htonl(val); } -static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b) -{ - u32 temp = msg->hdr[a]; - - msg->hdr[a] = msg->hdr[b]; - msg->hdr[b] = temp; -} - /* * Word 0 */ @@ -480,11 +472,6 @@ static inline void msg_incr_reroute_cnt(struct tipc_msg *m) msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1); } -static inline void msg_reset_reroute_cnt(struct tipc_msg *m) -{ - msg_set_bits(m, 1, 21, 0xf, 0); -} - static inline u32 msg_lookup_scope(struct tipc_msg *m) { return msg_bits(m, 1, 19, 0x3); @@ -800,11 +787,6 @@ static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n) msg_set_word(m, 2, n); } -static inline u32 msg_bcgap_after(struct tipc_msg *m) -{ - return msg_bits(m, 2, 16, 0xffff); -} - static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n) { msg_set_bits(m, 2, 16, 0xffff, n); @@ -868,11 +850,6 @@ static inline void msg_set_next_sent(struct tipc_msg *m, u16 n) msg_set_bits(m, 4, 0, 0xffff, n); } -static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n) -{ - msg_set_bits(m, 4, 0, 0xffff, n); -} - static inline u32 msg_bc_netid(struct tipc_msg *m) { return msg_word(m, 4); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c19569819866..3e0d6281fd1e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3240,49 +3240,58 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) return sk; } -static struct sock *unix_next_socket(struct seq_file *seq, - struct sock *sk, - loff_t *pos) +static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) { unsigned long bucket = get_bucket(*pos); + struct sock *sk; - while (sk > (struct sock *)SEQ_START_TOKEN) { - sk = sk_next(sk); - if (!sk) - goto next_bucket; - if (sock_net(sk) == seq_file_net(seq)) - return sk; - } - - do { + while (bucket < ARRAY_SIZE(unix_socket_table)) { spin_lock(&unix_table_locks[bucket]); + sk = unix_from_bucket(seq, pos); if (sk) return sk; -next_bucket: - spin_unlock(&unix_table_locks[bucket++]); - *pos = set_bucket_offset(bucket, 1); - } while (bucket < ARRAY_SIZE(unix_socket_table)); + spin_unlock(&unix_table_locks[bucket]); + + *pos = set_bucket_offset(++bucket, 1); + } return NULL; } +static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, + loff_t *pos) +{ + unsigned long bucket = get_bucket(*pos); + + for (sk = sk_next(sk); sk; sk = sk_next(sk)) + if (sock_net(sk) == seq_file_net(seq)) + return sk; + + spin_unlock(&unix_table_locks[bucket]); + + *pos = set_bucket_offset(++bucket, 1); + + return unix_get_first(seq, pos); +} + static void *unix_seq_start(struct seq_file *seq, loff_t *pos) { if (!*pos) return SEQ_START_TOKEN; - if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) - return NULL; - - return unix_next_socket(seq, NULL, pos); + return unix_get_first(seq, pos); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; - return unix_next_socket(seq, v, pos); + + if (v == SEQ_START_TOKEN) + return unix_get_first(seq, pos); + + return unix_get_next(seq, v, pos); } static void unix_seq_stop(struct seq_file *seq, void *v) @@ -3347,6 +3356,15 @@ static const struct seq_operations unix_seq_ops = { }; #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) +struct bpf_unix_iter_state { + struct seq_net_private p; + unsigned int cur_sk; + unsigned int end_sk; + unsigned int max_sk; + struct sock **batch; + bool st_bucket_done; +}; + struct bpf_iter__unix { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct unix_sock *, unix_sk); @@ -3365,24 +3383,156 @@ static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, return bpf_iter_run_prog(prog, &ctx); } +static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) + +{ + struct bpf_unix_iter_state *iter = seq->private; + unsigned int expected = 1; + struct sock *sk; + + sock_hold(start_sk); + iter->batch[iter->end_sk++] = start_sk; + + for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { + if (sock_net(sk) != seq_file_net(seq)) + continue; + + if (iter->end_sk < iter->max_sk) { + sock_hold(sk); + iter->batch[iter->end_sk++] = sk; + } + + expected++; + } + + spin_unlock(&unix_table_locks[start_sk->sk_hash]); + + return expected; +} + +static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) +{ + while (iter->cur_sk < iter->end_sk) + sock_put(iter->batch[iter->cur_sk++]); +} + +static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, + unsigned int new_batch_sz) +{ + struct sock **new_batch; + + new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, + GFP_USER | __GFP_NOWARN); + if (!new_batch) + return -ENOMEM; + + bpf_iter_unix_put_batch(iter); + kvfree(iter->batch); + iter->batch = new_batch; + iter->max_sk = new_batch_sz; + + return 0; +} + +static struct sock *bpf_iter_unix_batch(struct seq_file *seq, + loff_t *pos) +{ + struct bpf_unix_iter_state *iter = seq->private; + unsigned int expected; + bool resized = false; + struct sock *sk; + + if (iter->st_bucket_done) + *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); + +again: + /* Get a new batch */ + iter->cur_sk = 0; + iter->end_sk = 0; + + sk = unix_get_first(seq, pos); + if (!sk) + return NULL; /* Done */ + + expected = bpf_iter_unix_hold_batch(seq, sk); + + if (iter->end_sk == expected) { + iter->st_bucket_done = true; + return sk; + } + + if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { + resized = true; + goto again; + } + + return sk; +} + +static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (!*pos) + return SEQ_START_TOKEN; + + /* bpf iter does not support lseek, so it always + * continue from where it was stop()-ped. + */ + return bpf_iter_unix_batch(seq, pos); +} + +static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_unix_iter_state *iter = seq->private; + struct sock *sk; + + /* Whenever seq_next() is called, the iter->cur_sk is + * done with seq_show(), so advance to the next sk in + * the batch. + */ + if (iter->cur_sk < iter->end_sk) + sock_put(iter->batch[iter->cur_sk++]); + + ++*pos; + + if (iter->cur_sk < iter->end_sk) + sk = iter->batch[iter->cur_sk]; + else + sk = bpf_iter_unix_batch(seq, pos); + + return sk; +} + static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) { struct bpf_iter_meta meta; struct bpf_prog *prog; struct sock *sk = v; uid_t uid; + bool slow; + int ret; if (v == SEQ_START_TOKEN) return 0; + slow = lock_sock_fast(sk); + + if (unlikely(sk_unhashed(sk))) { + ret = SEQ_SKIP; + goto unlock; + } + uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); meta.seq = seq; prog = bpf_iter_get_info(&meta, false); - return unix_prog_seq_show(prog, &meta, v, uid); + ret = unix_prog_seq_show(prog, &meta, v, uid); +unlock: + unlock_sock_fast(sk, slow); + return ret; } static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) { + struct bpf_unix_iter_state *iter = seq->private; struct bpf_iter_meta meta; struct bpf_prog *prog; @@ -3393,12 +3543,13 @@ static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) (void)unix_prog_seq_show(prog, &meta, v, 0); } - unix_seq_stop(seq, v); + if (iter->cur_sk < iter->end_sk) + bpf_iter_unix_put_batch(iter); } static const struct seq_operations bpf_iter_unix_seq_ops = { - .start = unix_seq_start, - .next = unix_seq_next, + .start = bpf_iter_unix_seq_start, + .next = bpf_iter_unix_seq_next, .stop = bpf_iter_unix_seq_stop, .show = bpf_iter_unix_seq_show, }; @@ -3447,13 +3598,55 @@ static struct pernet_operations unix_net_ops = { DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, struct unix_sock *unix_sk, uid_t uid) +#define INIT_BATCH_SZ 16 + +static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) +{ + struct bpf_unix_iter_state *iter = priv_data; + int err; + + err = bpf_iter_init_seq_net(priv_data, aux); + if (err) + return err; + + err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); + if (err) { + bpf_iter_fini_seq_net(priv_data); + return err; + } + + return 0; +} + +static void bpf_iter_fini_unix(void *priv_data) +{ + struct bpf_unix_iter_state *iter = priv_data; + + bpf_iter_fini_seq_net(priv_data); + kvfree(iter->batch); +} + static const struct bpf_iter_seq_info unix_seq_info = { .seq_ops = &bpf_iter_unix_seq_ops, - .init_seq_private = bpf_iter_init_seq_net, - .fini_seq_private = bpf_iter_fini_seq_net, - .seq_priv_size = sizeof(struct seq_net_private), + .init_seq_private = bpf_iter_init_unix, + .fini_seq_private = bpf_iter_fini_unix, + .seq_priv_size = sizeof(struct bpf_unix_iter_state), }; +static const struct bpf_func_proto * +bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_setsockopt: + return &bpf_sk_setsockopt_proto; + case BPF_FUNC_getsockopt: + return &bpf_sk_getsockopt_proto; + default: + return NULL; + } +} + static struct bpf_iter_reg unix_reg_info = { .target = "unix", .ctx_arg_info_size = 1, @@ -3461,6 +3654,7 @@ static struct bpf_iter_reg unix_reg_info = { { offsetof(struct bpf_iter__unix, unix_sk), PTR_TO_BTF_ID_OR_NULL }, }, + .get_func_proto = bpf_iter_unix_get_func_proto, .seq_info = &unix_seq_info, }; diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 8675fa5273df..3ec8ad9c1750 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -26,12 +26,12 @@ static void int_exit(int sig) { __u32 curr_prog_id = 0; - if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); + if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { + printf("bpf_xdp_query_id failed\n"); exit(1); } if (prog_id == curr_prog_id) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + bpf_xdp_detach(ifindex, xdp_flags, NULL); else if (!curr_prog_id) printf("couldn't find a prog id on a given interface\n"); else @@ -143,7 +143,7 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { + if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { printf("link set xdp fd failed\n"); return 1; } diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index a70b094c8ec5..6c61d5f570fb 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -34,12 +34,12 @@ static void int_exit(int sig) __u32 curr_prog_id = 0; if (ifindex > -1) { - if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); + if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { + printf("bpf_xdp_query_id failed\n"); exit(1); } if (prog_id == curr_prog_id) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + bpf_xdp_detach(ifindex, xdp_flags, NULL); else if (!curr_prog_id) printf("couldn't find a prog id on a given iface\n"); else @@ -173,7 +173,7 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { + if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { printf("link set xdp fd failed\n"); return 1; } diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c index 4ad896782f77..79ccd9891924 100644 --- a/samples/bpf/xdp_fwd_user.c +++ b/samples/bpf/xdp_fwd_user.c @@ -33,7 +33,7 @@ static int do_attach(int idx, int prog_fd, int map_fd, const char *name) { int err; - err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags); + err = bpf_xdp_attach(idx, prog_fd, xdp_flags, NULL); if (err < 0) { printf("ERROR: failed to attach program to %s\n", name); return err; @@ -51,7 +51,7 @@ static int do_detach(int idx, const char *name) { int err; - err = bpf_set_link_xdp_fd(idx, -1, xdp_flags); + err = bpf_xdp_detach(idx, xdp_flags, NULL); if (err < 0) printf("ERROR: failed to detach program from %s\n", name); diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index cfaf7e50e431..2d565ba54b8c 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -43,13 +43,13 @@ static void int_exit(int sig) int i = 0; for (i = 0; i < total_ifindex; i++) { - if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) { - printf("bpf_get_link_xdp_id on iface %d failed\n", + if (bpf_xdp_query_id(ifindex_list[i], flags, &prog_id)) { + printf("bpf_xdp_query_id on iface %d failed\n", ifindex_list[i]); exit(1); } if (prog_id_list[i] == prog_id) - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); + bpf_xdp_detach(ifindex_list[i], flags, NULL); else if (!prog_id) printf("couldn't find a prog id on iface %d\n", ifindex_list[i]); @@ -716,12 +716,12 @@ int main(int ac, char **argv) } prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *)); for (i = 0; i < total_ifindex; i++) { - if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) { + if (bpf_xdp_attach(ifindex_list[i], prog_fd, flags, NULL) < 0) { printf("link set xdp fd failed\n"); int recovery_index = i; for (i = 0; i < recovery_index; i++) - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); + bpf_xdp_detach(ifindex_list[i], flags, NULL); return 1; } diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index 74a2926eba08..fb2532d13aac 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -62,15 +62,15 @@ static void int_exit(int sig) __u32 curr_prog_id = 0; if (ifindex > -1) { - if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); + if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { + printf("bpf_xdp_query_id failed\n"); exit(EXIT_FAIL); } if (prog_id == curr_prog_id) { fprintf(stderr, "Interrupted: Removing XDP program on ifindex:%d device:%s\n", ifindex, ifname); - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + bpf_xdp_detach(ifindex, xdp_flags, NULL); } else if (!curr_prog_id) { printf("couldn't find a prog id on a given iface\n"); } else { @@ -209,7 +209,7 @@ static struct datarec *alloc_record_per_cpu(void) static struct record *alloc_record_per_rxq(void) { - unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; + unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); struct record *array; array = calloc(nr_rxqs, sizeof(struct record)); @@ -222,7 +222,7 @@ static struct record *alloc_record_per_rxq(void) static struct stats_record *alloc_stats_record(void) { - unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; + unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); struct stats_record *rec; int i; @@ -241,7 +241,7 @@ static struct stats_record *alloc_stats_record(void) static void free_stats_record(struct stats_record *r) { - unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; + unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); int i; for (i = 0; i < nr_rxqs; i++) @@ -289,7 +289,7 @@ static void stats_collect(struct stats_record *rec) map_collect_percpu(fd, 0, &rec->stats); fd = bpf_map__fd(rx_queue_index_map); - max_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; + max_rxqs = bpf_map__max_entries(rx_queue_index_map); for (i = 0; i < max_rxqs; i++) map_collect_percpu(fd, i, &rec->rxq[i]); } @@ -335,7 +335,7 @@ static void stats_print(struct stats_record *stats_rec, struct stats_record *stats_prev, int action, __u32 cfg_opt) { - unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; + unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); unsigned int nr_cpus = bpf_num_possible_cpus(); double pps = 0, err = 0; struct record *rec, *prev; @@ -582,7 +582,7 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { + if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { fprintf(stderr, "link set xdp fd failed\n"); return EXIT_FAIL_XDP; } diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 587eacb49103..0a2b3e997aed 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -30,7 +30,7 @@ static int do_attach(int idx, int fd, const char *name) __u32 info_len = sizeof(info); int err; - err = bpf_set_link_xdp_fd(idx, fd, xdp_flags); + err = bpf_xdp_attach(idx, fd, xdp_flags, NULL); if (err < 0) { printf("ERROR: failed to attach program to %s\n", name); return err; @@ -51,13 +51,13 @@ static int do_detach(int idx, const char *name) __u32 curr_prog_id = 0; int err = 0; - err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags); + err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id); if (err) { - printf("bpf_get_link_xdp_id failed\n"); + printf("bpf_xdp_query_id failed\n"); return err; } if (prog_id == curr_prog_id) { - err = bpf_set_link_xdp_fd(idx, -1, xdp_flags); + err = bpf_xdp_detach(idx, xdp_flags, NULL); if (err < 0) printf("ERROR: failed to detach prog from %s\n", name); } else if (!curr_prog_id) { diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c index 8740838e7767..ae70a7943d85 100644 --- a/samples/bpf/xdp_sample_user.c +++ b/samples/bpf/xdp_sample_user.c @@ -1265,7 +1265,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags) int ret; if (prog_id) { - ret = bpf_get_link_xdp_id(ifindex, &cur_prog_id, xdp_flags); + ret = bpf_xdp_query_id(ifindex, xdp_flags, &cur_prog_id); if (ret < 0) return -errno; @@ -1278,7 +1278,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags) } } - return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + return bpf_xdp_detach(ifindex, xdp_flags, NULL); } int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic, @@ -1295,8 +1295,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic, xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0; xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; - ret = bpf_set_link_xdp_fd(ifindex, bpf_program__fd(xdp_prog), - xdp_flags); + ret = bpf_xdp_attach(ifindex, bpf_program__fd(xdp_prog), xdp_flags, NULL); if (ret < 0) { ret = -errno; fprintf(stderr, @@ -1308,7 +1307,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic, return ret; } - ret = bpf_get_link_xdp_id(ifindex, &prog_id, xdp_flags); + ret = bpf_xdp_query_id(ifindex, xdp_flags, &prog_id); if (ret < 0) { ret = -errno; fprintf(stderr, diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 1d4f305d02aa..7370c03c96fc 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -32,12 +32,12 @@ static void int_exit(int sig) __u32 curr_prog_id = 0; if (ifindex > -1) { - if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); + if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { + printf("bpf_xdp_query_id failed\n"); exit(1); } if (prog_id == curr_prog_id) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + bpf_xdp_detach(ifindex, xdp_flags, NULL); else if (!curr_prog_id) printf("couldn't find a prog id on a given iface\n"); else @@ -288,7 +288,7 @@ int main(int argc, char **argv) } } - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { + if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { printf("link set xdp fd failed\n"); return 1; } @@ -302,7 +302,7 @@ int main(int argc, char **argv) poll_stats(kill_after_s); - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + bpf_xdp_detach(ifindex, xdp_flags, NULL); return 0; } diff --git a/samples/bpf/xdpsock_ctrl_proc.c b/samples/bpf/xdpsock_ctrl_proc.c index cc4408797ab7..28b5f2a9fa08 100644 --- a/samples/bpf/xdpsock_ctrl_proc.c +++ b/samples/bpf/xdpsock_ctrl_proc.c @@ -173,7 +173,7 @@ main(int argc, char **argv) unlink(SOCKET_NAME); /* Unset fd for given ifindex */ - err = bpf_set_link_xdp_fd(ifindex, -1, 0); + err = bpf_xdp_detach(ifindex, 0, NULL); if (err) { fprintf(stderr, "Error when unsetting bpf prog_fd for ifindex(%d)\n", ifindex); return err; diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index aa50864e4415..19288a2bbc75 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -571,13 +571,13 @@ static void remove_xdp_program(void) { u32 curr_prog_id = 0; - if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); + if (bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &curr_prog_id)) { + printf("bpf_xdp_query_id failed\n"); exit(EXIT_FAILURE); } if (prog_id == curr_prog_id) - bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); + bpf_xdp_detach(opt_ifindex, opt_xdp_flags, NULL); else if (!curr_prog_id) printf("couldn't find a prog id on a given interface\n"); else @@ -1027,7 +1027,7 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, if (ret) exit_with_error(-ret); - ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags); + ret = bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &prog_id); if (ret) exit_with_error(-ret); @@ -1760,7 +1760,7 @@ static void load_xdp_program(char **argv, struct bpf_object **obj) exit(EXIT_FAILURE); } - if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) { + if (bpf_xdp_attach(opt_ifindex, prog_fd, opt_xdp_flags, NULL) < 0) { fprintf(stderr, "ERROR: link set xdp fd failed\n"); exit(EXIT_FAILURE); } diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c index 52e7c4ffd228..2220509588a0 100644 --- a/samples/bpf/xsk_fwd.c +++ b/samples/bpf/xsk_fwd.c @@ -974,8 +974,8 @@ static void remove_xdp_program(void) int i; for (i = 0 ; i < n_ports; i++) - bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1, - port_params[i].xsk_cfg.xdp_flags); + bpf_xdp_detach(if_nametoindex(port_params[i].iface), + port_params[i].xsk_cfg.xdp_flags, NULL); } int main(int argc, char **argv) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index a6403ddf5de7..096625242475 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -87,21 +87,25 @@ class HeaderParser(object): self.line = '' self.helpers = [] self.commands = [] + self.desc_unique_helpers = set() + self.define_unique_helpers = [] + self.desc_syscalls = [] + self.enum_syscalls = [] def parse_element(self): proto = self.parse_symbol() - desc = self.parse_desc() - ret = self.parse_ret() + desc = self.parse_desc(proto) + ret = self.parse_ret(proto) return APIElement(proto=proto, desc=desc, ret=ret) def parse_helper(self): proto = self.parse_proto() - desc = self.parse_desc() - ret = self.parse_ret() + desc = self.parse_desc(proto) + ret = self.parse_ret(proto) return Helper(proto=proto, desc=desc, ret=ret) def parse_symbol(self): - p = re.compile(' \* ?(.+)$') + p = re.compile(' \* ?(BPF\w+)$') capture = p.match(self.line) if not capture: raise NoSyscallCommandFound @@ -127,16 +131,15 @@ class HeaderParser(object): self.line = self.reader.readline() return capture.group(1) - def parse_desc(self): + def parse_desc(self, proto): p = re.compile(' \* ?(?:\t| {5,8})Description$') capture = p.match(self.line) if not capture: - # Helper can have empty description and we might be parsing another - # attribute: return but do not consume. - return '' + raise Exception("No description section found for " + proto) # Description can be several lines, some of them possibly empty, and it # stops when another subsection title is met. desc = '' + desc_present = False while True: self.line = self.reader.readline() if self.line == ' *\n': @@ -145,21 +148,24 @@ class HeaderParser(object): p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)') capture = p.match(self.line) if capture: + desc_present = True desc += capture.group(1) + '\n' else: break + + if not desc_present: + raise Exception("No description found for " + proto) return desc - def parse_ret(self): + def parse_ret(self, proto): p = re.compile(' \* ?(?:\t| {5,8})Return$') capture = p.match(self.line) if not capture: - # Helper can have empty retval and we might be parsing another - # attribute: return but do not consume. - return '' + raise Exception("No return section found for " + proto) # Return value description can be several lines, some of them possibly # empty, and it stops when another subsection title is met. ret = '' + ret_present = False while True: self.line = self.reader.readline() if self.line == ' *\n': @@ -168,44 +174,101 @@ class HeaderParser(object): p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)') capture = p.match(self.line) if capture: + ret_present = True ret += capture.group(1) + '\n' else: break + + if not ret_present: + raise Exception("No return found for " + proto) return ret - def seek_to(self, target, help_message): + def seek_to(self, target, help_message, discard_lines = 1): self.reader.seek(0) offset = self.reader.read().find(target) if offset == -1: raise Exception(help_message) self.reader.seek(offset) self.reader.readline() - self.reader.readline() + for _ in range(discard_lines): + self.reader.readline() self.line = self.reader.readline() - def parse_syscall(self): + def parse_desc_syscall(self): self.seek_to('* DOC: eBPF Syscall Commands', 'Could not find start of eBPF syscall descriptions list') while True: try: command = self.parse_element() self.commands.append(command) + self.desc_syscalls.append(command.proto) + except NoSyscallCommandFound: break - def parse_helpers(self): + def parse_enum_syscall(self): + self.seek_to('enum bpf_cmd {', + 'Could not find start of bpf_cmd enum', 0) + # Searches for either one or more BPF\w+ enums + bpf_p = re.compile('\s*(BPF\w+)+') + # Searches for an enum entry assigned to another entry, + # for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is + # not documented hence should be skipped in check to + # determine if the right number of syscalls are documented + assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)') + bpf_cmd_str = '' + while True: + capture = assign_p.match(self.line) + if capture: + # Skip line if an enum entry is assigned to another entry + self.line = self.reader.readline() + continue + capture = bpf_p.match(self.line) + if capture: + bpf_cmd_str += self.line + else: + break + self.line = self.reader.readline() + # Find the number of occurences of BPF\w+ + self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str) + + def parse_desc_helpers(self): self.seek_to('* Start of BPF helper function descriptions:', 'Could not find start of eBPF helper descriptions list') while True: try: helper = self.parse_helper() self.helpers.append(helper) + proto = helper.proto_break_down() + self.desc_unique_helpers.add(proto['name']) except NoHelperFound: break + def parse_define_helpers(self): + # Parse the number of FN(...) in #define __BPF_FUNC_MAPPER to compare + # later with the number of unique function names present in description. + # Note: seek_to(..) discards the first line below the target search text, + # resulting in FN(unspec) being skipped and not added to self.define_unique_helpers. + self.seek_to('#define __BPF_FUNC_MAPPER(FN)', + 'Could not find start of eBPF helper definition list') + # Searches for either one or more FN(\w+) defines or a backslash for newline + p = re.compile('\s*(FN\(\w+\))+|\\\\') + fn_defines_str = '' + while True: + capture = p.match(self.line) + if capture: + fn_defines_str += self.line + else: + break + self.line = self.reader.readline() + # Find the number of occurences of FN(\w+) + self.define_unique_helpers = re.findall('FN\(\w+\)', fn_defines_str) + def run(self): - self.parse_syscall() - self.parse_helpers() + self.parse_desc_syscall() + self.parse_enum_syscall() + self.parse_desc_helpers() + self.parse_define_helpers() self.reader.close() ############################################################################### @@ -235,6 +298,25 @@ class Printer(object): self.print_one(elem) self.print_footer() + def elem_number_check(self, desc_unique_elem, define_unique_elem, type, instance): + """ + Checks the number of helpers/syscalls documented within the header file + description with those defined as part of enum/macro and raise an + Exception if they don't match. + """ + nr_desc_unique_elem = len(desc_unique_elem) + nr_define_unique_elem = len(define_unique_elem) + if nr_desc_unique_elem != nr_define_unique_elem: + exception_msg = ''' +The number of unique %s in description (%d) doesn\'t match the number of unique %s defined in %s (%d) +''' % (type, nr_desc_unique_elem, type, instance, nr_define_unique_elem) + if nr_desc_unique_elem < nr_define_unique_elem: + # Function description is parsed until no helper is found (which can be due to + # misformatting). Hence, only print the first missing/misformatted helper/enum. + exception_msg += ''' +The description for %s is not present or formatted correctly. +''' % (define_unique_elem[nr_desc_unique_elem]) + raise Exception(exception_msg) class PrinterRST(Printer): """ @@ -295,7 +377,6 @@ class PrinterRST(Printer): print('') - class PrinterHelpersRST(PrinterRST): """ A printer for dumping collected information about helpers as a ReStructured @@ -305,6 +386,7 @@ class PrinterHelpersRST(PrinterRST): """ def __init__(self, parser): self.elements = parser.helpers + self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER') def print_header(self): header = '''\ @@ -478,6 +560,7 @@ class PrinterSyscallRST(PrinterRST): """ def __init__(self, parser): self.elements = parser.commands + self.elem_number_check(parser.desc_syscalls, parser.enum_syscalls, 'syscall', 'bpf_cmd') def print_header(self): header = '''\ @@ -509,6 +592,7 @@ class PrinterHelpers(Printer): """ def __init__(self, parser): self.elements = parser.helpers + self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER') type_fwds = [ 'struct bpf_fib_lookup', diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 842889f3dcb7..a9f8c63a96d1 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -838,7 +838,7 @@ int devcgroup_check_permission(short type, u32 major, u32 minor, short access) int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access); if (rc) - return -EPERM; + return rc; #ifdef CONFIG_CGROUP_DEVICE return devcgroup_legacy_check_permission(type, major, minor, access); diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 59833125ac0a..a2c665beda87 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -902,7 +902,7 @@ static int do_show(int argc, char **argv) equal_fn_for_key_as_id, NULL); btf_map_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!btf_prog_table || !btf_map_table) { + if (IS_ERR(btf_prog_table) || IS_ERR(btf_map_table)) { hashmap__free(btf_prog_table); hashmap__free(btf_map_table); if (fd >= 0) diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 3571a281c43f..effe136119d7 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -50,6 +50,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, const char *attach_flags_str, int level) { + char prog_name[MAX_PROG_FULL_NAME]; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); int prog_fd; @@ -63,6 +64,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, return -1; } + get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name)); if (json_output) { jsonw_start_object(json_wtr); jsonw_uint_field(json_wtr, "id", info.id); @@ -73,7 +75,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, jsonw_uint_field(json_wtr, "attach_type", attach_type); jsonw_string_field(json_wtr, "attach_flags", attach_flags_str); - jsonw_string_field(json_wtr, "name", info.name); + jsonw_string_field(json_wtr, "name", prog_name); jsonw_end_object(json_wtr); } else { printf("%s%-8u ", level ? " " : "", info.id); @@ -81,7 +83,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, printf("%-15s", attach_type_name[attach_type]); else printf("type %-10u", attach_type); - printf(" %-15s %-15s\n", attach_flags_str, info.name); + printf(" %-15s %-15s\n", attach_flags_str, prog_name); } close(prog_fd); diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index fa8eb8134344..111dff809c7b 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -24,6 +24,7 @@ #include <bpf/bpf.h> #include <bpf/hashmap.h> #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ +#include <bpf/btf.h> #include "main.h" @@ -304,6 +305,49 @@ const char *get_fd_type_name(enum bpf_obj_type type) return names[type]; } +void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, + char *name_buff, size_t buff_len) +{ + const char *prog_name = prog_info->name; + const struct btf_type *func_type; + const struct bpf_func_info finfo; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + struct btf *prog_btf = NULL; + + if (buff_len <= BPF_OBJ_NAME_LEN || + strlen(prog_info->name) < BPF_OBJ_NAME_LEN - 1) + goto copy_name; + + if (!prog_info->btf_id || prog_info->nr_func_info == 0) + goto copy_name; + + info.nr_func_info = 1; + info.func_info_rec_size = prog_info->func_info_rec_size; + if (info.func_info_rec_size > sizeof(finfo)) + info.func_info_rec_size = sizeof(finfo); + info.func_info = ptr_to_u64(&finfo); + + if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) + goto copy_name; + + prog_btf = btf__load_from_kernel_by_id(info.btf_id); + if (!prog_btf) + goto copy_name; + + func_type = btf__type_by_id(prog_btf, finfo.type_id); + if (!func_type || !btf_is_func(func_type)) + goto copy_name; + + prog_name = btf__name_by_offset(prog_btf, func_type->name_off); + +copy_name: + snprintf(name_buff, buff_len, "%s", prog_name); + + if (prog_btf) + btf__free(prog_btf); +} + int get_fd_type(int fd) { char path[PATH_MAX]; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index b4695df2ea3d..43e3f8700ecc 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -227,7 +227,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) /* only generate definitions for memory-mapped internal maps */ if (!bpf_map__is_internal(map)) continue; - if (!(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + if (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) continue; if (!get_map_ident(map, map_ident, sizeof(map_ident))) @@ -468,7 +468,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) if (!get_map_ident(map, ident, sizeof(ident))) continue; if (bpf_map__is_internal(map) && - (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) printf("\tmunmap(skel->%1$s, %2$zd);\n", ident, bpf_map_mmap_sz(map)); codegen("\ @@ -536,7 +536,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h continue; if (!bpf_map__is_internal(map) || - !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + !(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) continue; codegen("\ @@ -600,10 +600,10 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h continue; if (!bpf_map__is_internal(map) || - !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + !(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) continue; - if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG) + if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG) mmap_flags = "PROT_READ"; else mmap_flags = "PROT_READ | PROT_WRITE"; @@ -927,7 +927,6 @@ static int do_skeleton(int argc, char **argv) s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\ if (!s) \n\ goto err; \n\ - obj->skeleton = s; \n\ \n\ s->sz = sizeof(*s); \n\ s->name = \"%1$s\"; \n\ @@ -962,7 +961,7 @@ static int do_skeleton(int argc, char **argv) i, bpf_map__name(map), i, ident); /* memory-mapped internal maps */ if (bpf_map__is_internal(map) && - (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) { + (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) { printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n", i, ident); } @@ -1000,6 +999,7 @@ static int do_skeleton(int argc, char **argv) \n\ s->data = (void *)%2$s__elf_bytes(&s->data_sz); \n\ \n\ + obj->skeleton = s; \n\ return 0; \n\ err: \n\ bpf_object__destroy_skeleton(s); \n\ diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 2c258db0d352..97dec81950e5 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -2,6 +2,7 @@ /* Copyright (C) 2020 Facebook */ #include <errno.h> +#include <linux/err.h> #include <net/if.h> #include <stdio.h> #include <unistd.h> @@ -306,7 +307,7 @@ static int do_show(int argc, char **argv) if (show_pinned) { link_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!link_table) { + if (IS_ERR(link_table)) { p_err("failed to create hashmap for pinned paths"); return -1; } diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 020e91a542d5..9d01fa9de033 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -478,7 +478,14 @@ int main(int argc, char **argv) } if (!legacy_libbpf) { - ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + enum libbpf_strict_mode mode; + + /* Allow legacy map definitions for skeleton generation. + * It will still be rejected if users use LIBBPF_STRICT_ALL + * mode for loading generated skeleton. + */ + mode = (__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS; + ret = libbpf_set_strict_mode(mode); if (ret) p_err("failed to enable libbpf strict mode: %d", ret); } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 8d76d937a62b..0c3840596b5a 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -140,6 +140,10 @@ struct cmd { int cmd_select(const struct cmd *cmds, int argc, char **argv, int (*help)(int argc, char **argv)); +#define MAX_PROG_FULL_NAME 128 +void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, + char *name_buff, size_t buff_len); + int get_fd_type(int fd); const char *get_fd_type_name(enum bpf_obj_type type); char *get_fdinfo(int fd, const char *key); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index cc530a229812..c66a3c979b7a 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -699,7 +699,7 @@ static int do_show(int argc, char **argv) if (show_pinned) { map_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!map_table) { + if (IS_ERR(map_table)) { p_err("failed to create hashmap for pinned paths"); return -1; } diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 649053704bd7..526a332c48e6 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -551,7 +551,7 @@ static int do_attach_detach_xdp(int progfd, enum net_attach_type attach_type, if (attach_type == NET_ATTACH_TYPE_XDP_OFFLOAD) flags |= XDP_FLAGS_HW_MODE; - return bpf_set_link_xdp_fd(ifindex, progfd, flags); + return bpf_xdp_attach(ifindex, progfd, flags, NULL); } static int do_attach(int argc, char **argv) diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 56b598eee043..7c384d10e95f 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2020 Facebook */ #include <errno.h> +#include <linux/err.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> @@ -101,7 +102,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) libbpf_print_fn_t default_print; *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!*map) { + if (IS_ERR(*map)) { p_err("failed to create hashmap for PID references"); return -1; } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2a21d50516bc..cf935c63e6f5 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -424,8 +424,10 @@ out_free: free(value); } -static void print_prog_header_json(struct bpf_prog_info *info) +static void print_prog_header_json(struct bpf_prog_info *info, int fd) { + char prog_name[MAX_PROG_FULL_NAME]; + jsonw_uint_field(json_wtr, "id", info->id); if (info->type < ARRAY_SIZE(prog_type_name)) jsonw_string_field(json_wtr, "type", @@ -433,8 +435,10 @@ static void print_prog_header_json(struct bpf_prog_info *info) else jsonw_uint_field(json_wtr, "type", info->type); - if (*info->name) - jsonw_string_field(json_wtr, "name", info->name); + if (*info->name) { + get_prog_full_name(info, fd, prog_name, sizeof(prog_name)); + jsonw_string_field(json_wtr, "name", prog_name); + } jsonw_name(json_wtr, "tag"); jsonw_printf(json_wtr, "\"" BPF_TAG_FMT "\"", @@ -455,7 +459,7 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) char *memlock; jsonw_start_object(json_wtr); - print_prog_header_json(info); + print_prog_header_json(info, fd); print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); if (info->load_time) { @@ -507,16 +511,20 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) jsonw_end_object(json_wtr); } -static void print_prog_header_plain(struct bpf_prog_info *info) +static void print_prog_header_plain(struct bpf_prog_info *info, int fd) { + char prog_name[MAX_PROG_FULL_NAME]; + printf("%u: ", info->id); if (info->type < ARRAY_SIZE(prog_type_name)) printf("%s ", prog_type_name[info->type]); else printf("type %u ", info->type); - if (*info->name) - printf("name %s ", info->name); + if (*info->name) { + get_prog_full_name(info, fd, prog_name, sizeof(prog_name)); + printf("name %s ", prog_name); + } printf("tag "); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); @@ -534,7 +542,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) { char *memlock; - print_prog_header_plain(info); + print_prog_header_plain(info, fd); if (info->load_time) { char buf[32]; @@ -641,7 +649,7 @@ static int do_show(int argc, char **argv) if (show_pinned) { prog_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!prog_table) { + if (IS_ERR(prog_table)) { p_err("failed to create hashmap for pinned paths"); return -1; } @@ -972,10 +980,10 @@ static int do_dump(int argc, char **argv) if (json_output && nb_fds > 1) { jsonw_start_object(json_wtr); /* prog object */ - print_prog_header_json(&info); + print_prog_header_json(&info, fds[i]); jsonw_name(json_wtr, "insns"); } else if (nb_fds > 1) { - print_prog_header_plain(&info); + print_prog_header_plain(&info, fds[i]); } err = prog_dump(&info, mode, filepath, opcodes, visual, linum); diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index 2f693b082bdb..e08a6ff2866c 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -480,7 +480,6 @@ static int do_unregister(int argc, char **argv) static int do_register(int argc, char **argv) { LIBBPF_OPTS(bpf_object_open_opts, open_opts); - const struct bpf_map_def *def; struct bpf_map_info info = {}; __u32 info_len = sizeof(info); int nr_errs = 0, nr_maps = 0; @@ -510,8 +509,7 @@ static int do_register(int argc, char **argv) } bpf_object__for_each_map(map, obj) { - def = bpf_map__def(map); - if (def->type != BPF_MAP_TYPE_STRUCT_OPS) + if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS) continue; link = bpf_map__attach_struct_ops(map); diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 320a88ac28c9..19a3112e271a 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -24,6 +24,8 @@ LD = $(HOSTLD) ARCH = $(HOSTARCH) RM ?= rm CROSS_COMPILE = +CFLAGS := $(KBUILD_HOSTCFLAGS) +LDFLAGS := $(KBUILD_HOSTLDFLAGS) OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ @@ -51,10 +53,10 @@ $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT) $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT) \ - DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + DESTDIR=$(LIBBPF_DESTDIR) prefix= EXTRA_CFLAGS="$(CFLAGS)" \ $(abspath $@) install_headers -CFLAGS := -g \ +CFLAGS += -g \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ -I$(LIBBPF_INCLUDE) \ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b0383d371b9a..16a7574292a5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -330,6 +330,8 @@ union bpf_iter_link_info { * *ctx_out*, *data_in* and *data_out* must be NULL. * *repeat* must be zero. * + * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN. + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. @@ -1111,6 +1113,11 @@ enum bpf_link_type { */ #define BPF_F_SLEEPABLE (1U << 4) +/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program + * fully support xdp frags. + */ +#define BPF_F_XDP_HAS_FRAGS (1U << 5) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * @@ -1775,6 +1782,8 @@ union bpf_attr { * 0 on success, or a negative error in case of failure. * * u64 bpf_get_current_pid_tgid(void) + * Description + * Get the current pid and tgid. * Return * A 64-bit integer containing the current tgid and pid, and * created as such: @@ -1782,6 +1791,8 @@ union bpf_attr { * *current_task*\ **->pid**. * * u64 bpf_get_current_uid_gid(void) + * Description + * Get the current uid and gid. * Return * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. @@ -2256,6 +2267,8 @@ union bpf_attr { * The 32-bit hash. * * u64 bpf_get_current_task(void) + * Description + * Get the current task. * Return * A pointer to the current task struct. * @@ -2369,6 +2382,8 @@ union bpf_attr { * indicate that the hash is outdated and to trigger a * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * Return + * void. * * long bpf_get_numa_node_id(void) * Description @@ -2466,6 +2481,8 @@ union bpf_attr { * A 8-byte long unique number or 0 if *sk* is NULL. * * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Description + * Get the owner UID of the socked associated to *skb*. * Return * The owner UID of the socket associated to *skb*. If the socket * is **NULL**, or if it is not a full socket (i.e. if it is a @@ -3240,6 +3257,9 @@ union bpf_attr { * The id is returned or 0 in case the id could not be retrieved. * * u64 bpf_get_current_cgroup_id(void) + * Description + * Get the current cgroup id based on the cgroup within which + * the current task is running. * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. @@ -5018,6 +5038,44 @@ union bpf_attr { * * Return * The number of arguments of the traced function. + * + * int bpf_get_retval(void) + * Description + * Get the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * The syscall's return value. + * + * int bpf_set_retval(int retval) + * Description + * Set the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md) + * Description + * Get the total size of a given xdp buff (linear and paged area) + * Return + * The total size of a given xdp buffer. + * + * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * This helper is provided as an easy way to load data from a + * xdp buffer. It can be used to load *len* bytes from *offset* from + * the frame associated to *xdp_md*, into the buffer pointed by + * *buf*. + * Return + * 0 on success, or a negative error in case of failure. + * + * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * Store *len* bytes from buffer *buf* into the frame + * associated to *xdp_md*, at *offset*. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5206,6 +5264,11 @@ union bpf_attr { FN(get_func_arg), \ FN(get_func_ret), \ FN(get_func_arg_cnt), \ + FN(get_retval), \ + FN(set_retval), \ + FN(xdp_get_buff_len), \ + FN(xdp_load_bytes), \ + FN(xdp_store_bytes), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 550b4cbb6c99..418b259166f8 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -754,10 +754,10 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, .flags = flags, ); - return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts); + return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts); } -int bpf_prog_attach_xattr(int prog_fd, int target_fd, +int bpf_prog_attach_opts(int prog_fd, int target_fd, enum bpf_attach_type type, const struct bpf_prog_attach_opts *opts) { @@ -778,6 +778,11 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd, return libbpf_err_errno(ret); } +__attribute__((alias("bpf_prog_attach_opts"))) +int bpf_prog_attach_xattr(int prog_fd, int target_fd, + enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts); + int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 14e0d97ad2cf..c2e8327010f9 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -391,6 +391,10 @@ struct bpf_prog_attach_opts { LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, unsigned int flags); +LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd, + enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead") LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd, enum bpf_attach_type type, const struct bpf_prog_attach_opts *opts); diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 963b1060d944..44df982d2a5c 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -133,7 +133,7 @@ struct bpf_map_def { unsigned int value_size; unsigned int max_entries; unsigned int map_flags; -}; +} __attribute__((deprecated("use BTF-defined maps in .maps section"))); enum libbpf_pin_type { LIBBPF_PIN_NONE, diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 9aa19c89f758..1383e26c5d1f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1620,20 +1620,37 @@ static int btf_commit_type(struct btf *btf, int data_sz) struct btf_pipe { const struct btf *src; struct btf *dst; + struct hashmap *str_off_map; /* map string offsets from src to dst */ }; static int btf_rewrite_str(__u32 *str_off, void *ctx) { struct btf_pipe *p = ctx; - int off; + void *mapped_off; + int off, err; if (!*str_off) /* nothing to do for empty strings */ return 0; + if (p->str_off_map && + hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) { + *str_off = (__u32)(long)mapped_off; + return 0; + } + off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off)); if (off < 0) return off; + /* Remember string mapping from src to dst. It avoids + * performing expensive string comparisons. + */ + if (p->str_off_map) { + err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off); + if (err) + return err; + } + *str_off = off; return 0; } @@ -1680,6 +1697,9 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) return 0; } +static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx); +static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx); + int btf__add_btf(struct btf *btf, const struct btf *src_btf) { struct btf_pipe p = { .src = src_btf, .dst = btf }; @@ -1713,6 +1733,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) if (!off) return libbpf_err(-ENOMEM); + /* Map the string offsets from src_btf to the offsets from btf to improve performance */ + p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); + if (IS_ERR(p.str_off_map)) + return libbpf_err(-ENOMEM); + /* bulk copy types data for all types from src_btf */ memcpy(t, src_btf->types_data, data_sz); @@ -1754,6 +1779,8 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) btf->hdr->str_off += data_sz; btf->nr_types += cnt; + hashmap__free(p.str_off_map); + /* return type ID of the first added BTF type */ return btf->start_id + btf->nr_types - cnt; err_out: @@ -1767,6 +1794,8 @@ err_out: * wasn't modified, so doesn't need restoring, see big comment above */ btf->hdr->str_len = old_strs_len; + hashmap__free(p.str_off_map); + return libbpf_err(err); } diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 061839f04525..51862fdee850 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -375,8 +375,28 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id, const struct btf_dump_type_data_opts *opts); /* - * A set of helpers for easier BTF types handling + * A set of helpers for easier BTF types handling. + * + * The inline functions below rely on constants from the kernel headers which + * may not be available for applications including this header file. To avoid + * compilation errors, we define all the constants here that were added after + * the initial introduction of the BTF_KIND* constants. */ +#ifndef BTF_KIND_FUNC +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#endif +#ifndef BTF_KIND_VAR +#define BTF_KIND_VAR 14 /* Variable */ +#define BTF_KIND_DATASEC 15 /* Section */ +#endif +#ifndef BTF_KIND_FLOAT +#define BTF_KIND_FLOAT 16 /* Floating point */ +#endif +/* The kernel header switched to enums, so these two were never #defined */ +#define BTF_KIND_DECL_TAG 17 /* Decl Tag */ +#define BTF_KIND_TYPE_TAG 18 /* Type Tag */ + static inline __u16 btf_kind(const struct btf_type *t) { return BTF_INFO_KIND(t->info); diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c index 3c20b126d60d..aeb09c288716 100644 --- a/tools/lib/bpf/hashmap.c +++ b/tools/lib/bpf/hashmap.c @@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map) void hashmap__free(struct hashmap *map) { - if (!map) + if (IS_ERR_OR_NULL(map)) return; hashmap__clear(map); @@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key, return true; } - diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7f10dd501a52..a8c750373ad5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -235,6 +235,8 @@ enum sec_def_flags { SEC_SLEEPABLE = 8, /* allow non-strict prefix matching */ SEC_SLOPPY_PFX = 16, + /* BPF program support non-linear XDP buffer */ + SEC_XDP_FRAGS = 32, }; struct bpf_sec_def { @@ -1937,6 +1939,11 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) if (obj->efile.maps_shndx < 0) return 0; + if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) { + pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n"); + return -EOPNOTSUPP; + } + if (!symbols) return -EINVAL; @@ -1999,6 +2006,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) return -LIBBPF_ERRNO__FORMAT; } + pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name); + if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); return -ENOTSUP; @@ -4190,6 +4199,7 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) return 0; if (!bpf_map__is_internal(map)) { + pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n"); ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size, def->value_size, &key_type_id, &value_type_id); @@ -6562,6 +6572,9 @@ static int libbpf_preload_prog(struct bpf_program *prog, if (def & SEC_SLEEPABLE) opts->prog_flags |= BPF_F_SLEEPABLE; + if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) + opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; + if ((prog->type == BPF_PROG_TYPE_TRACING || prog->type == BPF_PROG_TYPE_LSM || prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { @@ -8600,8 +8613,11 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), + SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), + SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), + SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX), SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX), @@ -11795,6 +11811,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) { + if (!s) + return; + if (s->progs) bpf_object__detach_skeleton(s); if (s->obj) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 8b9bc5e90c2b..94670066de62 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -706,7 +706,8 @@ bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); LIBBPF_API int bpf_map__fd(const struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); /* get map definition */ -LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead") +const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); /* get map name */ LIBBPF_API const char *bpf_map__name(const struct bpf_map *map); /* get/set map type */ @@ -832,13 +833,42 @@ struct bpf_xdp_set_link_opts { }; #define bpf_xdp_set_link_opts__last_field old_fd +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead") LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead") LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, const struct bpf_xdp_set_link_opts *opts); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead") LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead") LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, size_t info_size, __u32 flags); +struct bpf_xdp_attach_opts { + size_t sz; + int old_prog_fd; + size_t :0; +}; +#define bpf_xdp_attach_opts__last_field old_prog_fd + +struct bpf_xdp_query_opts { + size_t sz; + __u32 prog_id; /* output */ + __u32 drv_prog_id; /* output */ + __u32 hw_prog_id; /* output */ + __u32 skb_prog_id; /* output */ + __u8 attach_mode; /* output */ + size_t :0; +}; +#define bpf_xdp_query_opts__last_field attach_mode + +LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, + const struct bpf_xdp_attach_opts *opts); +LIBBPF_API int bpf_xdp_detach(int ifindex, __u32 flags, + const struct bpf_xdp_attach_opts *opts); +LIBBPF_API int bpf_xdp_query(int ifindex, int flags, struct bpf_xdp_query_opts *opts); +LIBBPF_API int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id); + /* TC related API */ enum bpf_tc_attach_point { BPF_TC_INGRESS = 1 << 0, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 529783967793..e10f0822845a 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -247,6 +247,7 @@ LIBBPF_0.0.8 { bpf_link_create; bpf_link_update; bpf_map__set_initial_value; + bpf_prog_attach_opts; bpf_program__attach_cgroup; bpf_program__attach_lsm; bpf_program__is_lsm; @@ -427,6 +428,10 @@ LIBBPF_0.7.0 { bpf_program__log_level; bpf_program__set_log_buf; bpf_program__set_log_level; + bpf_xdp_attach; + bpf_xdp_detach; + bpf_xdp_query; + bpf_xdp_query_id; libbpf_probe_bpf_helper; libbpf_probe_bpf_map_type; libbpf_probe_bpf_prog_type; diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 79131f761a27..3c2b281c2bc3 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -73,6 +73,11 @@ enum libbpf_strict_mode { * operation. */ LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10, + /* + * Error out on any SEC("maps") map definition, which are deprecated + * in favor of BTF-defined map definitions in SEC(".maps"). + */ + LIBBPF_STRICT_MAP_DEFINITIONS = 0x20, __LIBBPF_STRICT_LAST, }; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 39f25e09b51e..c39c37f99d5c 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -217,6 +217,28 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd, return libbpf_netlink_send_recv(&req, NULL, NULL, NULL); } +int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts) +{ + int old_prog_fd, err; + + if (!OPTS_VALID(opts, bpf_xdp_attach_opts)) + return libbpf_err(-EINVAL); + + old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); + if (old_prog_fd) + flags |= XDP_FLAGS_REPLACE; + else + old_prog_fd = -1; + + err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags); + return libbpf_err(err); +} + +int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts) +{ + return bpf_xdp_attach(ifindex, -1, flags, opts); +} + int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, const struct bpf_xdp_set_link_opts *opts) { @@ -303,69 +325,98 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) return 0; } -int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, - size_t info_size, __u32 flags) +int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts) { - struct xdp_id_md xdp_id = {}; - __u32 mask; - int ret; struct libbpf_nla_req req = { .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), .nh.nlmsg_type = RTM_GETLINK, .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, .ifinfo.ifi_family = AF_PACKET, }; + struct xdp_id_md xdp_id = {}; + int err; - if (flags & ~XDP_FLAGS_MASK || !info_size) + if (!OPTS_VALID(opts, bpf_xdp_query_opts)) + return libbpf_err(-EINVAL); + + if (xdp_flags & ~XDP_FLAGS_MASK) return libbpf_err(-EINVAL); /* Check whether the single {HW,DRV,SKB} mode is set */ - flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE); - mask = flags - 1; - if (flags && flags & mask) + xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE; + if (xdp_flags & (xdp_flags - 1)) return libbpf_err(-EINVAL); xdp_id.ifindex = ifindex; - xdp_id.flags = flags; + xdp_id.flags = xdp_flags; - ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg, + err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg, get_xdp_info, &xdp_id); - if (!ret) { - size_t sz = min(info_size, sizeof(xdp_id.info)); + if (err) + return libbpf_err(err); - memcpy(info, &xdp_id.info, sz); - memset((void *) info + sz, 0, info_size - sz); - } + OPTS_SET(opts, prog_id, xdp_id.info.prog_id); + OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id); + OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id); + OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id); + OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode); - return libbpf_err(ret); + return 0; } -static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) +int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, + size_t info_size, __u32 flags) { - flags &= XDP_FLAGS_MODES; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); + size_t sz; + int err; + + if (!info_size) + return libbpf_err(-EINVAL); - if (info->attach_mode != XDP_ATTACHED_MULTI && !flags) - return info->prog_id; - if (flags & XDP_FLAGS_DRV_MODE) - return info->drv_prog_id; - if (flags & XDP_FLAGS_HW_MODE) - return info->hw_prog_id; - if (flags & XDP_FLAGS_SKB_MODE) - return info->skb_prog_id; + err = bpf_xdp_query(ifindex, flags, &opts); + if (err) + return libbpf_err(err); + + /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts + * layout after sz field + */ + sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode)); + memcpy(info, &opts.prog_id, sz); + memset((void *)info + sz, 0, info_size - sz); return 0; } -int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id) { - struct xdp_link_info info; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); int ret; - ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags); - if (!ret) - *prog_id = get_xdp_id(&info, flags); + ret = bpf_xdp_query(ifindex, flags, &opts); + if (ret) + return libbpf_err(ret); + + flags &= XDP_FLAGS_MODES; - return libbpf_err(ret); + if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags) + *prog_id = opts.prog_id; + else if (flags & XDP_FLAGS_DRV_MODE) + *prog_id = opts.drv_prog_id; + else if (flags & XDP_FLAGS_HW_MODE) + *prog_id = opts.hw_prog_id; + else if (flags & XDP_FLAGS_SKB_MODE) + *prog_id = opts.skb_prog_id; + else + *prog_id = 0; + + return 0; +} + + +int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +{ + return bpf_xdp_query_id(ifindex, flags, prog_id); } typedef int (*qdisc_config_t)(struct libbpf_nla_req *req); diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 7ecfaac7536a..ef2832b4d5cc 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1005,24 +1005,22 @@ __bpf_map__config_value(struct bpf_map *map, { struct bpf_map_op *op; const char *map_name = bpf_map__name(map); - const struct bpf_map_def *def = bpf_map__def(map); - if (IS_ERR(def)) { - pr_debug("Unable to get map definition from '%s'\n", - map_name); + if (!map) { + pr_debug("Map '%s' is invalid\n", map_name); return -BPF_LOADER_ERRNO__INTERNAL; } - if (def->type != BPF_MAP_TYPE_ARRAY) { + if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) { pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n", map_name); return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; } - if (def->key_size < sizeof(unsigned int)) { + if (bpf_map__key_size(map) < sizeof(unsigned int)) { pr_debug("Map %s has incorrect key size\n", map_name); return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE; } - switch (def->value_size) { + switch (bpf_map__value_size(map)) { case 1: case 2: case 4: @@ -1064,7 +1062,6 @@ __bpf_map__config_event(struct bpf_map *map, struct parse_events_term *term, struct evlist *evlist) { - const struct bpf_map_def *def; struct bpf_map_op *op; const char *map_name = bpf_map__name(map); struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str); @@ -1075,18 +1072,16 @@ __bpf_map__config_event(struct bpf_map *map, return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT; } - def = bpf_map__def(map); - if (IS_ERR(def)) { - pr_debug("Unable to get map definition from '%s'\n", - map_name); - return PTR_ERR(def); + if (!map) { + pr_debug("Map '%s' is invalid\n", map_name); + return PTR_ERR(map); } /* * No need to check key_size and value_size: * kernel has already checked them. */ - if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", map_name); return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; @@ -1135,7 +1130,6 @@ config_map_indices_range_check(struct parse_events_term *term, const char *map_name) { struct parse_events_array *array = &term->array; - const struct bpf_map_def *def; unsigned int i; if (!array->nr_ranges) @@ -1146,10 +1140,8 @@ config_map_indices_range_check(struct parse_events_term *term, return -BPF_LOADER_ERRNO__INTERNAL; } - def = bpf_map__def(map); - if (IS_ERR(def)) { - pr_debug("ERROR: Unable to get map definition from '%s'\n", - map_name); + if (!map) { + pr_debug("Map '%s' is invalid\n", map_name); return -BPF_LOADER_ERRNO__INTERNAL; } @@ -1158,7 +1150,7 @@ config_map_indices_range_check(struct parse_events_term *term, size_t length = array->ranges[i].length; unsigned int idx = start + length - 1; - if (idx >= def->max_entries) { + if (idx >= bpf_map__max_entries(map)) { pr_debug("ERROR: index %d too large\n", idx); return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG; } @@ -1251,21 +1243,21 @@ out: } typedef int (*map_config_func_t)(const char *name, int map_fd, - const struct bpf_map_def *pdef, + const struct bpf_map *map, struct bpf_map_op *op, void *pkey, void *arg); static int foreach_key_array_all(map_config_func_t func, void *arg, const char *name, - int map_fd, const struct bpf_map_def *pdef, + int map_fd, const struct bpf_map *map, struct bpf_map_op *op) { unsigned int i; int err; - for (i = 0; i < pdef->max_entries; i++) { - err = func(name, map_fd, pdef, op, &i, arg); + for (i = 0; i < bpf_map__max_entries(map); i++) { + err = func(name, map_fd, map, op, &i, arg); if (err) { pr_debug("ERROR: failed to insert value to %s[%u]\n", name, i); @@ -1278,7 +1270,7 @@ foreach_key_array_all(map_config_func_t func, static int foreach_key_array_ranges(map_config_func_t func, void *arg, const char *name, int map_fd, - const struct bpf_map_def *pdef, + const struct bpf_map *map, struct bpf_map_op *op) { unsigned int i, j; @@ -1291,7 +1283,7 @@ foreach_key_array_ranges(map_config_func_t func, void *arg, for (j = 0; j < length; j++) { unsigned int idx = start + j; - err = func(name, map_fd, pdef, op, &idx, arg); + err = func(name, map_fd, map, op, &idx, arg); if (err) { pr_debug("ERROR: failed to insert value to %s[%u]\n", name, idx); @@ -1307,9 +1299,8 @@ bpf_map_config_foreach_key(struct bpf_map *map, map_config_func_t func, void *arg) { - int err, map_fd; + int err, map_fd, type; struct bpf_map_op *op; - const struct bpf_map_def *def; const char *name = bpf_map__name(map); struct bpf_map_priv *priv = bpf_map__priv(map); @@ -1322,9 +1313,8 @@ bpf_map_config_foreach_key(struct bpf_map *map, return 0; } - def = bpf_map__def(map); - if (IS_ERR(def)) { - pr_debug("ERROR: failed to get definition from map %s\n", name); + if (!map) { + pr_debug("Map '%s' is invalid\n", name); return -BPF_LOADER_ERRNO__INTERNAL; } map_fd = bpf_map__fd(map); @@ -1333,19 +1323,19 @@ bpf_map_config_foreach_key(struct bpf_map *map, return map_fd; } + type = bpf_map__type(map); list_for_each_entry(op, &priv->ops_list, list) { - switch (def->type) { + switch (type) { case BPF_MAP_TYPE_ARRAY: case BPF_MAP_TYPE_PERF_EVENT_ARRAY: switch (op->key_type) { case BPF_MAP_KEY_ALL: err = foreach_key_array_all(func, arg, name, - map_fd, def, op); + map_fd, map, op); break; case BPF_MAP_KEY_RANGES: err = foreach_key_array_ranges(func, arg, name, - map_fd, def, - op); + map_fd, map, op); break; default: pr_debug("ERROR: keytype for map '%s' invalid\n", @@ -1454,7 +1444,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey, static int apply_obj_config_map_for_key(const char *name, int map_fd, - const struct bpf_map_def *pdef, + const struct bpf_map *map, struct bpf_map_op *op, void *pkey, void *arg __maybe_unused) { @@ -1463,7 +1453,7 @@ apply_obj_config_map_for_key(const char *name, int map_fd, switch (op->op_type) { case BPF_MAP_OP_SET_VALUE: err = apply_config_value_for_key(map_fd, pkey, - pdef->value_size, + bpf_map__value_size(map), op->v.value); break; case BPF_MAP_OP_SET_EVSEL: diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c index eb853ca67cf4..c863ae0c5cb5 100644 --- a/tools/perf/util/bpf_map.c +++ b/tools/perf/util/bpf_map.c @@ -9,25 +9,25 @@ #include <stdlib.h> #include <unistd.h> -static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def) +static bool bpf_map__is_per_cpu(enum bpf_map_type type) { - return def->type == BPF_MAP_TYPE_PERCPU_HASH || - def->type == BPF_MAP_TYPE_PERCPU_ARRAY || - def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH || - def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE; + return type == BPF_MAP_TYPE_PERCPU_HASH || + type == BPF_MAP_TYPE_PERCPU_ARRAY || + type == BPF_MAP_TYPE_LRU_PERCPU_HASH || + type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE; } -static void *bpf_map_def__alloc_value(const struct bpf_map_def *def) +static void *bpf_map__alloc_value(const struct bpf_map *map) { - if (bpf_map_def__is_per_cpu(def)) - return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF)); + if (bpf_map__is_per_cpu(bpf_map__type(map))) + return malloc(round_up(bpf_map__value_size(map), 8) * + sysconf(_SC_NPROCESSORS_CONF)); - return malloc(def->value_size); + return malloc(bpf_map__value_size(map)); } int bpf_map__fprintf(struct bpf_map *map, FILE *fp) { - const struct bpf_map_def *def = bpf_map__def(map); void *prev_key = NULL, *key, *value; int fd = bpf_map__fd(map), err; int printed = 0; @@ -35,15 +35,15 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp) if (fd < 0) return fd; - if (IS_ERR(def)) - return PTR_ERR(def); + if (!map) + return PTR_ERR(map); err = -ENOMEM; - key = malloc(def->key_size); + key = malloc(bpf_map__key_size(map)); if (key == NULL) goto out; - value = bpf_map_def__alloc_value(def); + value = bpf_map__alloc_value(map); if (value == NULL) goto out_free_key; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 42ffc24e9e71..945f92d71db3 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -21,7 +21,7 @@ endif BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= -CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \ +CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) LDFLAGS += $(SAN_CFLAGS) @@ -292,7 +292,7 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) -BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ +BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ -I$(abspath $(OUTPUT)/../usr/include) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index df3b292a8ffe..bdbacf5adcd2 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -109,26 +109,31 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; -BTF_SET_START(bpf_testmod_kfunc_ids) +BTF_SET_START(bpf_testmod_check_kfunc_ids) BTF_ID(func, bpf_testmod_test_mod_kfunc) -BTF_SET_END(bpf_testmod_kfunc_ids) +BTF_SET_END(bpf_testmod_check_kfunc_ids) -static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set); +static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &bpf_testmod_check_kfunc_ids, +}; + +extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) { int ret; - ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); - if (ret) + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); + if (ret < 0) return ret; - register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); - return 0; + if (bpf_fentry_test1(0) < 0) + return -EINVAL; + return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } static void bpf_testmod_exit(void) { - unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index f6287132fa89..763db63a3890 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -48,3 +48,8 @@ CONFIG_IMA_READ_POLICY=y CONFIG_BLK_DEV_LOOP=y CONFIG_FUNCTION_TRACER=y CONFIG_DYNAMIC_FTRACE=y +CONFIG_NETFILTER=y +CONFIG_NF_DEFRAG_IPV4=y +CONFIG_NF_DEFRAG_IPV6=y +CONFIG_NF_CONNTRACK=y +CONFIG_USERFAULTFD=y diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c index d0f06e40c16d..eac71fbb24ce 100644 --- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c @@ -1,13 +1,24 @@ // SPDX-License-Identifier: GPL-2.0 -#include <test_progs.h> -#include "bind_perm.skel.h" - +#define _GNU_SOURCE +#include <sched.h> +#include <stdlib.h> #include <sys/types.h> #include <sys/socket.h> #include <sys/capability.h> +#include "test_progs.h" +#include "bind_perm.skel.h" + static int duration; +static int create_netns(void) +{ + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) + return -1; + + return 0; +} + void try_bind(int family, int port, int expected_errno) { struct sockaddr_storage addr = {}; @@ -75,6 +86,9 @@ void test_bind_perm(void) struct bind_perm *skel; int cgroup_fd; + if (create_netns()) + return; + cgroup_fd = test__join_cgroup("/bind_perm"); if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c new file mode 100644 index 000000000000..ee725d4d98a5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ +#include <sys/socket.h> +#include <sys/un.h> +#include <test_progs.h> +#include "bpf_iter_setsockopt_unix.skel.h" + +#define NR_CASES 5 + +static int create_unix_socket(struct bpf_iter_setsockopt_unix *skel) +{ + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + .sun_path = "", + }; + socklen_t len; + int fd, err; + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (!ASSERT_NEQ(fd, -1, "socket")) + return -1; + + len = offsetof(struct sockaddr_un, sun_path); + err = bind(fd, (struct sockaddr *)&addr, len); + if (!ASSERT_OK(err, "bind")) + return -1; + + len = sizeof(addr); + err = getsockname(fd, (struct sockaddr *)&addr, &len); + if (!ASSERT_OK(err, "getsockname")) + return -1; + + memcpy(&skel->bss->sun_path, &addr.sun_path, + len - offsetof(struct sockaddr_un, sun_path)); + + return fd; +} + +static void test_sndbuf(struct bpf_iter_setsockopt_unix *skel, int fd) +{ + socklen_t optlen; + int i, err; + + for (i = 0; i < NR_CASES; i++) { + if (!ASSERT_NEQ(skel->data->sndbuf_getsockopt[i], -1, + "bpf_(get|set)sockopt")) + return; + + err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, + &(skel->data->sndbuf_setsockopt[i]), + sizeof(skel->data->sndbuf_setsockopt[i])); + if (!ASSERT_OK(err, "setsockopt")) + return; + + optlen = sizeof(skel->bss->sndbuf_getsockopt_expected[i]); + err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, + &(skel->bss->sndbuf_getsockopt_expected[i]), + &optlen); + if (!ASSERT_OK(err, "getsockopt")) + return; + + if (!ASSERT_EQ(skel->data->sndbuf_getsockopt[i], + skel->bss->sndbuf_getsockopt_expected[i], + "bpf_(get|set)sockopt")) + return; + } +} + +void test_bpf_iter_setsockopt_unix(void) +{ + struct bpf_iter_setsockopt_unix *skel; + int err, unix_fd, iter_fd; + char buf; + + skel = bpf_iter_setsockopt_unix__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + unix_fd = create_unix_socket(skel); + if (!ASSERT_NEQ(unix_fd, -1, "create_unix_server")) + goto destroy; + + skel->links.change_sndbuf = bpf_program__attach_iter(skel->progs.change_sndbuf, NULL); + if (!ASSERT_OK_PTR(skel->links.change_sndbuf, "bpf_program__attach_iter")) + goto destroy; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.change_sndbuf)); + if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create")) + goto destroy; + + while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 && + errno == EAGAIN) + ; + if (!ASSERT_OK(err, "read iter error")) + goto destroy; + + test_sndbuf(skel, unix_fd); +destroy: + bpf_iter_setsockopt_unix__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c new file mode 100644 index 000000000000..d43f548c572c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <unistd.h> +#include <pthread.h> +#include <sys/mman.h> +#include <stdatomic.h> +#include <test_progs.h> +#include <sys/syscall.h> +#include <linux/module.h> +#include <linux/userfaultfd.h> + +#include "ksym_race.skel.h" +#include "bpf_mod_race.skel.h" +#include "kfunc_call_race.skel.h" + +/* This test crafts a race between btf_try_get_module and do_init_module, and + * checks whether btf_try_get_module handles the invocation for a well-formed + * but uninitialized module correctly. Unless the module has completed its + * initcalls, the verifier should fail the program load and return ENXIO. + * + * userfaultfd is used to trigger a fault in an fmod_ret program, and make it + * sleep, then the BPF program is loaded and the return value from verifier is + * inspected. After this, the userfaultfd is closed so that the module loading + * thread makes forward progress, and fmod_ret injects an error so that the + * module load fails and it is freed. + * + * If the verifier succeeded in loading the supplied program, it will end up + * taking reference to freed module, and trigger a crash when the program fd + * is closed later. This is true for both kfuncs and ksyms. In both cases, + * the crash is triggered inside bpf_prog_free_deferred, when module reference + * is finally released. + */ + +struct test_config { + const char *str_open; + void *(*bpf_open_and_load)(); + void (*bpf_destroy)(void *); +}; + +enum test_state { + _TS_INVALID, + TS_MODULE_LOAD, + TS_MODULE_LOAD_FAIL, +}; + +static _Atomic enum test_state state = _TS_INVALID; + +static int sys_finit_module(int fd, const char *param_values, int flags) +{ + return syscall(__NR_finit_module, fd, param_values, flags); +} + +static int sys_delete_module(const char *name, unsigned int flags) +{ + return syscall(__NR_delete_module, name, flags); +} + +static int load_module(const char *mod) +{ + int ret, fd; + + fd = open("bpf_testmod.ko", O_RDONLY); + if (fd < 0) + return fd; + + ret = sys_finit_module(fd, "", 0); + close(fd); + if (ret < 0) + return ret; + return 0; +} + +static void *load_module_thread(void *p) +{ + + if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail")) + atomic_store(&state, TS_MODULE_LOAD); + else + atomic_store(&state, TS_MODULE_LOAD_FAIL); + return p; +} + +static int sys_userfaultfd(int flags) +{ + return syscall(__NR_userfaultfd, flags); +} + +static int test_setup_uffd(void *fault_addr) +{ + struct uffdio_register uffd_register = {}; + struct uffdio_api uffd_api = {}; + int uffd; + + uffd = sys_userfaultfd(O_CLOEXEC); + if (uffd < 0) + return -errno; + + uffd_api.api = UFFD_API; + uffd_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffd_api)) { + close(uffd); + return -1; + } + + uffd_register.range.start = (unsigned long)fault_addr; + uffd_register.range.len = 4096; + uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) { + close(uffd); + return -1; + } + return uffd; +} + +static void test_bpf_mod_race_config(const struct test_config *config) +{ + void *fault_addr, *skel_fail; + struct bpf_mod_race *skel; + struct uffd_msg uffd_msg; + pthread_t load_mod_thrd; + _Atomic int *blockingp; + int uffd, ret; + + fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration")) + return; + + if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod")) + goto end_mmap; + + skel = bpf_mod_race__open(); + if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open")) + goto end_module; + + skel->rodata->bpf_mod_race_config.tgid = getpid(); + skel->rodata->bpf_mod_race_config.inject_error = -4242; + skel->rodata->bpf_mod_race_config.fault_addr = fault_addr; + if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load")) + goto end_destroy; + blockingp = (_Atomic int *)&skel->bss->bpf_blocking; + + if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach")) + goto end_destroy; + + uffd = test_setup_uffd(fault_addr); + if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address")) + goto end_destroy; + + if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL), + "load module thread")) + goto end_uffd; + + /* Now, we either fail loading module, or block in bpf prog, spin to find out */ + while (!atomic_load(&state) && !atomic_load(blockingp)) + ; + if (!ASSERT_EQ(state, _TS_INVALID, "module load should block")) + goto end_join; + if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) { + pthread_kill(load_mod_thrd, SIGKILL); + goto end_uffd; + } + + /* We might have set bpf_blocking to 1, but may have not blocked in + * bpf_copy_from_user. Read userfaultfd descriptor to verify that. + */ + if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg), + "read uffd block event")) + goto end_join; + if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault")) + goto end_join; + + /* We know that load_mod_thrd is blocked in the fmod_ret program, the + * module state is still MODULE_STATE_COMING because mod->init hasn't + * returned. This is the time we try to load a program calling kfunc and + * check if we get ENXIO from verifier. + */ + skel_fail = config->bpf_open_and_load(); + ret = errno; + if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) { + /* Close uffd to unblock load_mod_thrd */ + close(uffd); + uffd = -1; + while (atomic_load(blockingp) != 2) + ; + ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu"); + config->bpf_destroy(skel_fail); + goto end_join; + + } + ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO"); + ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false"); + + close(uffd); + uffd = -1; +end_join: + pthread_join(load_mod_thrd, NULL); + if (uffd < 0) + ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success"); +end_uffd: + if (uffd >= 0) + close(uffd); +end_destroy: + bpf_mod_race__destroy(skel); + ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu"); +end_module: + sys_delete_module("bpf_testmod", 0); + ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod"); +end_mmap: + munmap(fault_addr, 4096); + atomic_store(&state, _TS_INVALID); +} + +static const struct test_config ksym_config = { + .str_open = "ksym_race__open_and_load", + .bpf_open_and_load = (void *)ksym_race__open_and_load, + .bpf_destroy = (void *)ksym_race__destroy, +}; + +static const struct test_config kfunc_config = { + .str_open = "kfunc_call_race__open_and_load", + .bpf_open_and_load = (void *)kfunc_call_race__open_and_load, + .bpf_destroy = (void *)kfunc_call_race__destroy, +}; + +void serial_test_bpf_mod_race(void) +{ + if (test__start_subtest("ksym (used_btfs UAF)")) + test_bpf_mod_race_config(&ksym_config); + if (test__start_subtest("kfunc (kfunc_btf_tab UAF)")) + test_bpf_mod_race_config(&kfunc_config); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c new file mode 100644 index 000000000000..e3166a81e989 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> +#include "test_bpf_nf.skel.h" + +enum { + TEST_XDP, + TEST_TC_BPF, +}; + +void test_bpf_nf_ct(int mode) +{ + struct test_bpf_nf *skel; + int prog_fd, err, retval; + + skel = test_bpf_nf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load")) + return; + + if (mode == TEST_XDP) + prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test); + else + prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test); + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, + (__u32 *)&retval, NULL); + if (!ASSERT_OK(err, "bpf_prog_test_run")) + goto end; + + ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple"); + ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0"); + ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1"); + ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ"); + ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP"); + ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id"); + ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup"); + ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple"); +end: + test_bpf_nf__destroy(skel); +} + +void test_bpf_nf(void) +{ + if (test__start_subtest("xdp-ct")) + test_bpf_nf_ct(TEST_XDP); + if (test__start_subtest("tc-bpf-ct")) + test_bpf_nf_ct(TEST_TC_BPF); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 8ba53acf9eb4..14f9b6136794 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -4560,6 +4560,8 @@ static void do_test_file(unsigned int test_num) has_btf_ext = btf_ext != NULL; btf_ext__free(btf_ext); + /* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */ + libbpf_set_strict_mode((__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS); obj = bpf_object__open(test->file); err = libbpf_get_error(obj); if (CHECK(err, "obj: %d", err)) @@ -4684,6 +4686,8 @@ skip: fprintf(stderr, "OK"); done: + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + btf__free(btf); free(func_info); bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index d3e8f729c623..38b3c47293da 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -194,14 +194,14 @@ void serial_test_cgroup_attach_multi(void) attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE; attach_opts.replace_prog_fd = allow_prog[0]; - if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1, BPF_CGROUP_INET_EGRESS, &attach_opts), "fail_prog_replace_override", "unexpected success\n")) goto err; CHECK_FAIL(errno != EINVAL); attach_opts.flags = BPF_F_REPLACE; - if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1, BPF_CGROUP_INET_EGRESS, &attach_opts), "fail_prog_replace_no_multi", "unexpected success\n")) goto err; @@ -209,7 +209,7 @@ void serial_test_cgroup_attach_multi(void) attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE; attach_opts.replace_prog_fd = -1; - if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1, BPF_CGROUP_INET_EGRESS, &attach_opts), "fail_prog_replace_bad_fd", "unexpected success\n")) goto err; @@ -217,7 +217,7 @@ void serial_test_cgroup_attach_multi(void) /* replacing a program that is not attached to cgroup should fail */ attach_opts.replace_prog_fd = allow_prog[3]; - if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1, BPF_CGROUP_INET_EGRESS, &attach_opts), "fail_prog_replace_no_ent", "unexpected success\n")) goto err; @@ -225,14 +225,14 @@ void serial_test_cgroup_attach_multi(void) /* replace 1st from the top program */ attach_opts.replace_prog_fd = allow_prog[0]; - if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1, + if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1, BPF_CGROUP_INET_EGRESS, &attach_opts), "prog_replace", "errno=%d\n", errno)) goto err; /* replace program with itself */ attach_opts.replace_prog_fd = allow_prog[6]; - if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1, + if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1, BPF_CGROUP_INET_EGRESS, &attach_opts), "prog_replace", "errno=%d\n", errno)) goto err; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c new file mode 100644 index 000000000000..0b47c3c000c7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c @@ -0,0 +1,481 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2021 Google LLC. + */ + +#include <test_progs.h> +#include <cgroup_helpers.h> +#include <network_helpers.h> + +#include "cgroup_getset_retval_setsockopt.skel.h" +#include "cgroup_getset_retval_getsockopt.skel.h" + +#define SOL_CUSTOM 0xdeadbeef + +static int zero; + +static void test_setsockopt_set(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_set_eunatch = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that sets EUNATCH, assert that + * we actually get that error when we run setsockopt() + */ + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) + goto close_bpf_object; + + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_set_eunatch); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_set_eunatch = NULL, *link_get_retval = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that sets EUNATCH, and one that gets the + * previously set errno. Assert that we get the same errno back. + */ + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) + goto close_bpf_object; + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_set_eunatch); + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_get_retval = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that gets the previously set errno. + * Assert that, without anything setting one, we get 0. + */ + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_OK(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_get_retval = NULL, *link_set_eunatch = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that gets the previously set errno, and then + * one that sets the errno to EUNATCH. Assert that the get does not + * see EUNATCH set later, and does not prevent EUNATCH from being set. + */ + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) + goto close_bpf_object; + + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_get_retval); + bpf_link__destroy(link_set_eunatch); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_setsockopt_override(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_set_eunatch = NULL, *link_set_eisconn = NULL; + struct bpf_link *link_get_retval = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN, + * and then one that gets the exported errno. Assert both the syscall + * and the helper sees the last set errno. + */ + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) + goto close_bpf_object; + link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn")) + goto close_bpf_object; + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EISCONN, "setsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, -EISCONN, "retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_set_eunatch); + bpf_link__destroy(link_set_eisconn); + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_legacy_eperm = NULL, *link_get_retval = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that return a reject without setting errno + * (legacy reject), and one that gets the errno. Assert that for + * backward compatibility the syscall result in EPERM, and this + * is also visible to the helper. + */ + link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm, + cgroup_fd); + if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm")) + goto close_bpf_object; + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EPERM, "setsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, -EPERM, "retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_legacy_eperm); + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_set_eunatch = NULL, *link_legacy_eperm = NULL; + struct bpf_link *link_get_retval = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that sets EUNATCH, then one that return a reject + * without setting errno, and then one that gets the exported errno. + * Assert both the syscall and the helper's errno are unaffected by + * the second prog (i.e. legacy rejects does not override the errno + * to EPERM). + */ + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) + goto close_bpf_object; + link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm, + cgroup_fd); + if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm")) + goto close_bpf_object; + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_set_eunatch); + bpf_link__destroy(link_legacy_eperm); + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_getsockopt_get(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_getsockopt *obj; + struct bpf_link *link_get_retval = NULL; + int buf; + socklen_t optlen = sizeof(buf); + + obj = cgroup_getset_retval_getsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach getsockopt that gets previously set errno. Assert that the + * error from kernel is in both ctx_retval_value and retval_value. + */ + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0, + &buf, &optlen), "getsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EOPNOTSUPP, "getsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, -EOPNOTSUPP, "retval_value")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->ctx_retval_value, -EOPNOTSUPP, "ctx_retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_getsockopt__destroy(obj); +} + +static void test_getsockopt_override(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_getsockopt *obj; + struct bpf_link *link_set_eisconn = NULL; + int buf; + socklen_t optlen = sizeof(buf); + + obj = cgroup_getset_retval_getsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach getsockopt that sets retval to -EISCONN. Assert that this + * overrides the value from kernel. + */ + link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn")) + goto close_bpf_object; + + if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0, + &buf, &optlen), "getsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EISCONN, "getsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_set_eisconn); + + cgroup_getset_retval_getsockopt__destroy(obj); +} + +static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_getsockopt *obj; + struct bpf_link *link_set_eisconn = NULL, *link_clear_retval = NULL; + struct bpf_link *link_get_retval = NULL; + int buf; + socklen_t optlen = sizeof(buf); + + obj = cgroup_getset_retval_getsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach getsockopt that sets retval to -EISCONN, and one that clears + * ctx retval. Assert that the clearing ctx retval is synced to helper + * and clears any errors both from kernel and BPF.. + */ + link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn")) + goto close_bpf_object; + link_clear_retval = bpf_program__attach_cgroup(obj->progs.clear_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_clear_retval, "cg-attach-clear_retval")) + goto close_bpf_object; + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_OK(getsockopt(sock_fd, SOL_CUSTOM, 0, + &buf, &optlen), "getsockopt")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->ctx_retval_value, 0, "ctx_retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_set_eisconn); + bpf_link__destroy(link_clear_retval); + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_getsockopt__destroy(obj); +} + +void test_cgroup_getset_retval(void) +{ + int cgroup_fd = -1; + int sock_fd = -1; + + cgroup_fd = test__join_cgroup("/cgroup_getset_retval"); + if (!ASSERT_GE(cgroup_fd, 0, "cg-create")) + goto close_fd; + + sock_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0); + if (!ASSERT_GE(sock_fd, 0, "start-server")) + goto close_fd; + + if (test__start_subtest("setsockopt-set")) + test_setsockopt_set(cgroup_fd, sock_fd); + + if (test__start_subtest("setsockopt-set_and_get")) + test_setsockopt_set_and_get(cgroup_fd, sock_fd); + + if (test__start_subtest("setsockopt-default_zero")) + test_setsockopt_default_zero(cgroup_fd, sock_fd); + + if (test__start_subtest("setsockopt-default_zero_and_set")) + test_setsockopt_default_zero_and_set(cgroup_fd, sock_fd); + + if (test__start_subtest("setsockopt-override")) + test_setsockopt_override(cgroup_fd, sock_fd); + + if (test__start_subtest("setsockopt-legacy_eperm")) + test_setsockopt_legacy_eperm(cgroup_fd, sock_fd); + + if (test__start_subtest("setsockopt-legacy_no_override")) + test_setsockopt_legacy_no_override(cgroup_fd, sock_fd); + + if (test__start_subtest("getsockopt-get")) + test_getsockopt_get(cgroup_fd, sock_fd); + + if (test__start_subtest("getsockopt-override")) + test_getsockopt_override(cgroup_fd, sock_fd); + + if (test__start_subtest("getsockopt-retval_sync")) + test_getsockopt_retval_sync(cgroup_fd, sock_fd); + +close_fd: + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index ac54e3f91d42..dfafd62df50b 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -457,7 +457,7 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array) if (map_fd < 0) return -1; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c index 9da131b32e13..917165e04427 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data.c @@ -121,7 +121,7 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration) if (CHECK_FAIL(map_fd < 0)) return; - buff = malloc(bpf_map__def(map)->value_size); + buff = malloc(bpf_map__value_size(map)); if (buff) err = bpf_map_update_elem(map_fd, &zero, buff, 0); free(buff); diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c index 1db86eab101b..57331c606964 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c @@ -20,7 +20,7 @@ void test_global_data_init(void) if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) goto out; - sz = bpf_map__def(map)->value_size; + sz = bpf_map__value_size(map); newval = malloc(sz); if (CHECK_FAIL(!newval)) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 7d7445ccc141..b39a4f09aefd 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -27,6 +27,12 @@ static void test_main(void) ASSERT_OK(err, "bpf_prog_test_run(test2)"); ASSERT_EQ(retval, 3, "test2-retval"); + prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd; + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, (__u32 *)&retval, NULL); + ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)"); + ASSERT_EQ(retval, 0, "test_ref_btf_id-retval"); + kfunc_call_test_lskel__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 85db0f4cdd95..b97a8f236b3a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -8,6 +8,7 @@ #include "test_sockmap_update.skel.h" #include "test_sockmap_invalid_update.skel.h" #include "test_sockmap_skb_verdict_attach.skel.h" +#include "test_sockmap_progs_query.skel.h" #include "bpf_iter_sockmap.skel.h" #define TCP_REPAIR 19 /* TCP sock is under repair right now */ @@ -315,6 +316,63 @@ out: test_sockmap_skb_verdict_attach__destroy(skel); } +static __u32 query_prog_id(int prog_fd) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int err; + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") || + !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd")) + return 0; + + return info.id; +} + +static void test_sockmap_progs_query(enum bpf_attach_type attach_type) +{ + struct test_sockmap_progs_query *skel; + int err, map_fd, verdict_fd; + __u32 attach_flags = 0; + __u32 prog_ids[3] = {}; + __u32 prog_cnt = 3; + + skel = test_sockmap_progs_query__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load")) + return; + + map_fd = bpf_map__fd(skel->maps.sock_map); + + if (attach_type == BPF_SK_MSG_VERDICT) + verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict); + else + verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict); + + err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */, + &attach_flags, prog_ids, &prog_cnt); + ASSERT_OK(err, "bpf_prog_query failed"); + ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query"); + ASSERT_EQ(prog_cnt, 0, "wrong program count on query"); + + err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0); + if (!ASSERT_OK(err, "bpf_prog_attach failed")) + goto out; + + prog_cnt = 1; + err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */, + &attach_flags, prog_ids, &prog_cnt); + ASSERT_OK(err, "bpf_prog_query failed"); + ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query"); + ASSERT_EQ(prog_cnt, 1, "wrong program count on query"); + ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd), + "wrong prog_ids on query"); + + bpf_prog_detach2(verdict_fd, map_fd, attach_type); +out: + test_sockmap_progs_query__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -341,4 +399,12 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT, BPF_SK_SKB_VERDICT); } + if (test__start_subtest("sockmap msg_verdict progs query")) + test_sockmap_progs_query(BPF_SK_MSG_VERDICT); + if (test__start_subtest("sockmap stream_parser progs query")) + test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER); + if (test__start_subtest("sockmap stream_verdict progs query")) + test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT); + if (test__start_subtest("sockmap skb_verdict progs query")) + test_sockmap_progs_query(BPF_SK_SKB_VERDICT); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 7e21bfab6358..2cf0c7a3fe23 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -1413,14 +1413,12 @@ close_srv1: static void test_ops_cleanup(const struct bpf_map *map) { - const struct bpf_map_def *def; int err, mapfd; u32 key; - def = bpf_map__def(map); mapfd = bpf_map__fd(map); - for (key = 0; key < def->max_entries; key++) { + for (key = 0; key < bpf_map__max_entries(map); key++) { err = bpf_map_delete_elem(mapfd, &key); if (err && errno != EINVAL && errno != ENOENT) FAIL_ERRNO("map_delete: expected EINVAL/ENOENT"); @@ -1443,13 +1441,13 @@ static const char *family_str(sa_family_t family) static const char *map_type_str(const struct bpf_map *map) { - const struct bpf_map_def *def; + int type; - def = bpf_map__def(map); - if (IS_ERR(def)) + if (!map) return "invalid"; + type = bpf_map__type(map); - switch (def->type) { + switch (type) { case BPF_MAP_TYPE_SOCKMAP: return "sockmap"; case BPF_MAP_TYPE_SOCKHASH: diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 4b937e5dbaca..30a99d2ed5c6 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -173,11 +173,11 @@ static int getsetsockopt(void) } memset(&buf, 0, sizeof(buf)); - buf.zc.address = 12345; /* rejected by BPF */ + buf.zc.address = 12345; /* Not page aligned. Rejected by tcp_zerocopy_receive() */ optlen = sizeof(buf.zc); errno = 0; err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen); - if (errno != EPERM) { + if (errno != EINVAL) { log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d", err, errno); goto err; diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 5dc0f425bd11..796f231582f8 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -37,7 +37,7 @@ static void test_tailcall_1(void) if (CHECK_FAIL(map_fd < 0)) goto out; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -53,7 +53,7 @@ static void test_tailcall_1(void) goto out; } - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, &duration, &retval, NULL); CHECK(err || retval != i, "tailcall", @@ -69,7 +69,7 @@ static void test_tailcall_1(void) CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n", err, errno, retval); - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -90,8 +90,8 @@ static void test_tailcall_1(void) CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", err, errno, retval); - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - j = bpf_map__def(prog_array)->max_entries - 1 - i; + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { + j = bpf_map__max_entries(prog_array) - 1 - i; snprintf(prog_name, sizeof(prog_name), "classifier_%d", j); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -107,8 +107,8 @@ static void test_tailcall_1(void) goto out; } - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - j = bpf_map__def(prog_array)->max_entries - 1 - i; + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { + j = bpf_map__max_entries(prog_array) - 1 - i; err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, &duration, &retval, NULL); @@ -125,7 +125,7 @@ static void test_tailcall_1(void) CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n", err, errno, retval); - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { err = bpf_map_delete_elem(map_fd, &i); if (CHECK_FAIL(err >= 0 || errno != ENOENT)) goto out; @@ -175,7 +175,7 @@ static void test_tailcall_2(void) if (CHECK_FAIL(map_fd < 0)) goto out; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -353,7 +353,7 @@ static void test_tailcall_4(void) if (CHECK_FAIL(map_fd < 0)) return; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -369,7 +369,7 @@ static void test_tailcall_4(void) goto out; } - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY); if (CHECK_FAIL(err)) goto out; @@ -380,7 +380,7 @@ static void test_tailcall_4(void) "err %d errno %d retval %d\n", err, errno, retval); } - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY); if (CHECK_FAIL(err)) goto out; @@ -441,7 +441,7 @@ static void test_tailcall_5(void) if (CHECK_FAIL(map_fd < 0)) return; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -457,7 +457,7 @@ static void test_tailcall_5(void) goto out; } - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY); if (CHECK_FAIL(err)) goto out; @@ -468,7 +468,7 @@ static void test_tailcall_5(void) "err %d errno %d retval %d\n", err, errno, retval); } - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY); if (CHECK_FAIL(err)) goto out; @@ -520,7 +520,7 @@ static void test_tailcall_bpf2bpf_1(void) goto out; /* nop -> jmp */ - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -681,7 +681,7 @@ static void test_tailcall_bpf2bpf_3(void) if (CHECK_FAIL(map_fd < 0)) goto out; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -778,7 +778,7 @@ static void test_tailcall_bpf2bpf_4(bool noise) if (CHECK_FAIL(map_fd < 0)) goto out; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c new file mode 100644 index 000000000000..31c188666e81 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> + +void test_xdp_update_frags(void) +{ + const char *file = "./test_xdp_update_frags.o"; + __u32 duration, retval, size; + struct bpf_program *prog; + struct bpf_object *obj; + int err, prog_fd; + __u32 *offset; + __u8 *buf; + + obj = bpf_object__open(file); + if (libbpf_get_error(obj)) + return; + + prog = bpf_object__next_program(obj, NULL); + if (bpf_object__load(obj)) + return; + + prog_fd = bpf_program__fd(prog); + + buf = malloc(128); + if (!ASSERT_OK_PTR(buf, "alloc buf 128b")) + goto out; + + memset(buf, 0, 128); + offset = (__u32 *)buf; + *offset = 16; + buf[*offset] = 0xaa; /* marker at offset 16 (head) */ + buf[*offset + 15] = 0xaa; /* marker at offset 31 (head) */ + + err = bpf_prog_test_run(prog_fd, 1, buf, 128, + buf, &size, &retval, &duration); + + /* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */ + ASSERT_OK(err, "xdp_update_frag"); + ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); + ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]"); + ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]"); + + free(buf); + + buf = malloc(9000); + if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb")) + goto out; + + memset(buf, 0, 9000); + offset = (__u32 *)buf; + *offset = 5000; + buf[*offset] = 0xaa; /* marker at offset 5000 (frag0) */ + buf[*offset + 15] = 0xaa; /* marker at offset 5015 (frag0) */ + + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, + buf, &size, &retval, &duration); + + /* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */ + ASSERT_OK(err, "xdp_update_frag"); + ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); + ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]"); + ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]"); + + memset(buf, 0, 9000); + offset = (__u32 *)buf; + *offset = 3510; + buf[*offset] = 0xaa; /* marker at offset 3510 (head) */ + buf[*offset + 15] = 0xaa; /* marker at offset 3525 (frag0) */ + + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, + buf, &size, &retval, &duration); + + /* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */ + ASSERT_OK(err, "xdp_update_frag"); + ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); + ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]"); + ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]"); + + memset(buf, 0, 9000); + offset = (__u32 *)buf; + *offset = 7606; + buf[*offset] = 0xaa; /* marker at offset 7606 (frag0) */ + buf[*offset + 15] = 0xaa; /* marker at offset 7621 (frag1) */ + + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, + buf, &size, &retval, &duration); + + /* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */ + ASSERT_OK(err, "xdp_update_frag"); + ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); + ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]"); + ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]"); + + free(buf); +out: + bpf_object__close(obj); +} + +void test_xdp_adjust_frags(void) +{ + if (test__start_subtest("xdp_adjust_frags")) + test_xdp_update_frags(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index 3f5a17c38be5..ccc9e63254a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -11,22 +11,21 @@ static void test_xdp_adjust_tail_shrink(void) char buf[128]; err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (CHECK_FAIL(err)) + if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink")) return; err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - - CHECK(err || retval != XDP_DROP, - "ipv4", "err %d errno %d retval %d size %d\n", - err, errno, retval, size); + ASSERT_OK(err, "ipv4"); + ASSERT_EQ(retval, XDP_DROP, "ipv4 retval"); expect_sz = sizeof(pkt_v6) - 20; /* Test shrink with 20 bytes */ err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || retval != XDP_TX || size != expect_sz, - "ipv6", "err %d errno %d retval %d size %d expect-size %d\n", - err, errno, retval, size, expect_sz); + ASSERT_OK(err, "ipv6"); + ASSERT_EQ(retval, XDP_TX, "ipv6 retval"); + ASSERT_EQ(size, expect_sz, "ipv6 size"); + bpf_object__close(obj); } @@ -39,21 +38,20 @@ static void test_xdp_adjust_tail_grow(void) int err, prog_fd; err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (CHECK_FAIL(err)) + if (ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || retval != XDP_DROP, - "ipv4", "err %d errno %d retval %d size %d\n", - err, errno, retval, size); + ASSERT_OK(err, "ipv4"); + ASSERT_EQ(retval, XDP_DROP, "ipv4 retval"); expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */ err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */, buf, &size, &retval, &duration); - CHECK(err || retval != XDP_TX || size != expect_sz, - "ipv6", "err %d errno %d retval %d size %d expect-size %d\n", - err, errno, retval, size, expect_sz); + ASSERT_OK(err, "ipv6"); + ASSERT_EQ(retval, XDP_TX, "ipv6 retval"); + ASSERT_EQ(size, expect_sz, "ipv6 size"); bpf_object__close(obj); } @@ -76,7 +74,7 @@ static void test_xdp_adjust_tail_grow2(void) }; err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd); - if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + if (ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; /* Test case-64 */ @@ -86,21 +84,17 @@ static void test_xdp_adjust_tail_grow2(void) /* Kernel side alloc packet memory area that is zero init */ err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */ - || tattr.retval != XDP_TX - || tattr.data_size_out != 192, /* Expected grow size */ - "case-64", - "err %d errno %d retval %d size %d\n", - err, errno, tattr.retval, tattr.data_size_out); + ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */ + ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval"); + ASSERT_EQ(tattr.data_size_out, 192, "case-64 data_size_out"); /* Expected grow size */ /* Extra checks for data contents */ - CHECK_ATTR(tattr.data_size_out != 192 - || buf[0] != 1 || buf[63] != 1 /* 0-63 memset to 1 */ - || buf[64] != 0 || buf[127] != 0 /* 64-127 memset to 0 */ - || buf[128] != 1 || buf[191] != 1, /*128-191 memset to 1 */ - "case-64-data", - "err %d errno %d retval %d size %d\n", - err, errno, tattr.retval, tattr.data_size_out); + ASSERT_EQ(buf[0], 1, "case-64-data buf[0]"); /* 0-63 memset to 1 */ + ASSERT_EQ(buf[63], 1, "case-64-data buf[63]"); + ASSERT_EQ(buf[64], 0, "case-64-data buf[64]"); /* 64-127 memset to 0 */ + ASSERT_EQ(buf[127], 0, "case-64-data buf[127]"); + ASSERT_EQ(buf[128], 1, "case-64-data buf[128]"); /* 128-191 memset to 1 */ + ASSERT_EQ(buf[191], 1, "case-64-data buf[191]"); /* Test case-128 */ memset(buf, 2, sizeof(buf)); @@ -109,24 +103,139 @@ static void test_xdp_adjust_tail_grow2(void) err = bpf_prog_test_run_xattr(&tattr); max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */ - CHECK_ATTR(err - || tattr.retval != XDP_TX - || tattr.data_size_out != max_grow,/* Expect max grow size */ - "case-128", - "err %d errno %d retval %d size %d expect-size %d\n", - err, errno, tattr.retval, tattr.data_size_out, max_grow); + ASSERT_OK(err, "case-128"); + ASSERT_EQ(tattr.retval, XDP_TX, "case-128 retval"); + ASSERT_EQ(tattr.data_size_out, max_grow, "case-128 data_size_out"); /* Expect max grow */ /* Extra checks for data content: Count grow size, will contain zeros */ for (i = 0, cnt = 0; i < sizeof(buf); i++) { if (buf[i] == 0) cnt++; } - CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */ - || tattr.data_size_out != max_grow, /* Total grow size */ - "case-128-data", - "err %d errno %d retval %d size %d grow-size %d\n", - err, errno, tattr.retval, tattr.data_size_out, cnt); + ASSERT_EQ(cnt, max_grow - tattr.data_size_in, "case-128-data cnt"); /* Grow increase */ + ASSERT_EQ(tattr.data_size_out, max_grow, "case-128-data data_size_out"); /* Total grow */ + + bpf_object__close(obj); +} + +void test_xdp_adjust_frags_tail_shrink(void) +{ + const char *file = "./test_xdp_adjust_tail_shrink.o"; + __u32 duration, retval, size, exp_size; + struct bpf_program *prog; + struct bpf_object *obj; + int err, prog_fd; + __u8 *buf; + + /* For the individual test cases, the first byte in the packet + * indicates which test will be run. + */ + obj = bpf_object__open(file); + if (libbpf_get_error(obj)) + return; + + prog = bpf_object__next_program(obj, NULL); + if (bpf_object__load(obj)) + return; + + prog_fd = bpf_program__fd(prog); + + buf = malloc(9000); + if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb")) + goto out; + + memset(buf, 0, 9000); + + /* Test case removing 10 bytes from last frag, NOT freeing it */ + exp_size = 8990; /* 9000 - 10 */ + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, + buf, &size, &retval, &duration); + + ASSERT_OK(err, "9Kb-10b"); + ASSERT_EQ(retval, XDP_TX, "9Kb-10b retval"); + ASSERT_EQ(size, exp_size, "9Kb-10b size"); + + /* Test case removing one of two pages, assuming 4K pages */ + buf[0] = 1; + exp_size = 4900; /* 9000 - 4100 */ + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, + buf, &size, &retval, &duration); + + ASSERT_OK(err, "9Kb-4Kb"); + ASSERT_EQ(retval, XDP_TX, "9Kb-4Kb retval"); + ASSERT_EQ(size, exp_size, "9Kb-4Kb size"); + + /* Test case removing two pages resulting in a linear xdp_buff */ + buf[0] = 2; + exp_size = 800; /* 9000 - 8200 */ + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, + buf, &size, &retval, &duration); + + ASSERT_OK(err, "9Kb-9Kb"); + ASSERT_EQ(retval, XDP_TX, "9Kb-9Kb retval"); + ASSERT_EQ(size, exp_size, "9Kb-9Kb size"); + + free(buf); +out: + bpf_object__close(obj); +} + +void test_xdp_adjust_frags_tail_grow(void) +{ + const char *file = "./test_xdp_adjust_tail_grow.o"; + __u32 duration, retval, size, exp_size; + struct bpf_program *prog; + struct bpf_object *obj; + int err, i, prog_fd; + __u8 *buf; + + obj = bpf_object__open(file); + if (libbpf_get_error(obj)) + return; + + prog = bpf_object__next_program(obj, NULL); + if (bpf_object__load(obj)) + return; + + prog_fd = bpf_program__fd(prog); + + buf = malloc(16384); + if (!ASSERT_OK_PTR(buf, "alloc buf 16Kb")) + goto out; + + /* Test case add 10 bytes to last frag */ + memset(buf, 1, 16384); + size = 9000; + exp_size = size + 10; + err = bpf_prog_test_run(prog_fd, 1, buf, size, + buf, &size, &retval, &duration); + + ASSERT_OK(err, "9Kb+10b"); + ASSERT_EQ(retval, XDP_TX, "9Kb+10b retval"); + ASSERT_EQ(size, exp_size, "9Kb+10b size"); + + for (i = 0; i < 9000; i++) + ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); + + for (i = 9000; i < 9010; i++) + ASSERT_EQ(buf[i], 0, "9Kb+10b-new"); + + for (i = 9010; i < 16384; i++) + ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched"); + + /* Test a too large grow */ + memset(buf, 1, 16384); + size = 9001; + exp_size = size; + err = bpf_prog_test_run(prog_fd, 1, buf, size, + buf, &size, &retval, &duration); + + ASSERT_OK(err, "9Kb+10b"); + ASSERT_EQ(retval, XDP_DROP, "9Kb+10b retval"); + ASSERT_EQ(size, exp_size, "9Kb+10b size"); + free(buf); +out: bpf_object__close(obj); } @@ -138,4 +247,8 @@ void test_xdp_adjust_tail(void) test_xdp_adjust_tail_grow(); if (test__start_subtest("xdp_adjust_tail_grow2")) test_xdp_adjust_tail_grow2(); + if (test__start_subtest("xdp_adjust_frags_tail_shrink")) + test_xdp_adjust_frags_tail_shrink(); + if (test__start_subtest("xdp_adjust_frags_tail_grow")) + test_xdp_adjust_frags_tail_grow(); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index c98a897ad692..9c395ea680c6 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -10,40 +10,97 @@ struct meta { int pkt_len; }; +struct test_ctx_s { + bool passed; + int pkt_size; +}; + +struct test_ctx_s test_ctx; + static void on_sample(void *ctx, int cpu, void *data, __u32 size) { - int duration = 0; struct meta *meta = (struct meta *)data; struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta); + unsigned char *raw_pkt = data + sizeof(*meta); + struct test_ctx_s *tst_ctx = ctx; + + ASSERT_GE(size, sizeof(pkt_v4) + sizeof(*meta), "check_size"); + ASSERT_EQ(meta->ifindex, if_nametoindex("lo"), "check_meta_ifindex"); + ASSERT_EQ(meta->pkt_len, tst_ctx->pkt_size, "check_meta_pkt_len"); + ASSERT_EQ(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 0, + "check_packet_content"); + + if (meta->pkt_len > sizeof(pkt_v4)) { + for (int i = 0; i < meta->pkt_len - sizeof(pkt_v4); i++) + ASSERT_EQ(raw_pkt[i + sizeof(pkt_v4)], (unsigned char)i, + "check_packet_content"); + } + + tst_ctx->passed = true; +} - if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta), - "check_size", "size %u < %zu\n", - size, sizeof(pkt_v4) + sizeof(*meta))) - return; +#define BUF_SZ 9000 - if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex", - "meta->ifindex = %d\n", meta->ifindex)) +static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb, + struct test_xdp_bpf2bpf *ftrace_skel, + int pkt_size) +{ + __u32 duration = 0, retval, size; + __u8 *buf, *buf_in; + int err; + + if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") || + !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size")) return; - if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len", - "meta->pkt_len = %zd\n", sizeof(pkt_v4))) + buf_in = malloc(BUF_SZ); + if (!ASSERT_OK_PTR(buf_in, "buf_in malloc()")) return; - if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), - "check_packet_content", "content not the same\n")) + buf = malloc(BUF_SZ); + if (!ASSERT_OK_PTR(buf, "buf malloc()")) { + free(buf_in); return; + } + + test_ctx.passed = false; + test_ctx.pkt_size = pkt_size; + + memcpy(buf_in, &pkt_v4, sizeof(pkt_v4)); + if (pkt_size > sizeof(pkt_v4)) { + for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++) + buf_in[i + sizeof(pkt_v4)] = i; + } + + /* Run test program */ + err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size, + buf, &size, &retval, &duration); + + ASSERT_OK(err, "ipv4"); + ASSERT_EQ(retval, XDP_PASS, "ipv4 retval"); + ASSERT_EQ(size, pkt_size, "ipv4 size"); + + /* Make sure bpf_xdp_output() was triggered and it sent the expected + * data to the perf ring buffer. + */ + err = perf_buffer__poll(pb, 100); - *(bool *)ctx = true; + ASSERT_GE(err, 0, "perf_buffer__poll"); + ASSERT_TRUE(test_ctx.passed, "test passed"); + /* Verify test results */ + ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"), + "fentry result"); + ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_PASS, "fexit result"); + + free(buf); + free(buf_in); } void test_xdp_bpf2bpf(void) { - __u32 duration = 0, retval, size; - char buf[128]; int err, pkt_fd, map_fd; - bool passed = false; - struct iphdr iph; - struct iptnl_info value4 = {.family = AF_INET}; + int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200}; + struct iptnl_info value4 = {.family = AF_INET6}; struct test_xdp *pkt_skel = NULL; struct test_xdp_bpf2bpf *ftrace_skel = NULL; struct vip key4 = {.protocol = 6, .family = AF_INET}; @@ -52,7 +109,7 @@ void test_xdp_bpf2bpf(void) /* Load XDP program to introspect */ pkt_skel = test_xdp__open_and_load(); - if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n")) + if (!ASSERT_OK_PTR(pkt_skel, "test_xdp__open_and_load")) return; pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel); @@ -62,7 +119,7 @@ void test_xdp_bpf2bpf(void) /* Load trace program */ ftrace_skel = test_xdp_bpf2bpf__open(); - if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n")) + if (!ASSERT_OK_PTR(ftrace_skel, "test_xdp_bpf2bpf__open")) goto out; /* Demonstrate the bpf_program__set_attach_target() API rather than @@ -77,50 +134,24 @@ void test_xdp_bpf2bpf(void) bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel"); err = test_xdp_bpf2bpf__load(ftrace_skel); - if (CHECK(err, "__load", "ftrace skeleton failed\n")) + if (!ASSERT_OK(err, "test_xdp_bpf2bpf__load")) goto out; err = test_xdp_bpf2bpf__attach(ftrace_skel); - if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err)) + if (!ASSERT_OK(err, "test_xdp_bpf2bpf__attach")) goto out; /* Set up perf buffer */ - pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1, - on_sample, NULL, &passed, NULL); + pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 8, + on_sample, NULL, &test_ctx, NULL); if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto out; - /* Run test program */ - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), - buf, &size, &retval, &duration); - memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); - if (CHECK(err || retval != XDP_TX || size != 74 || - iph.protocol != IPPROTO_IPIP, "ipv4", - "err %d errno %d retval %d size %d\n", - err, errno, retval, size)) - goto out; - - /* Make sure bpf_xdp_output() was triggered and it sent the expected - * data to the perf ring buffer. - */ - err = perf_buffer__poll(pb, 100); - if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) - goto out; - - CHECK_FAIL(!passed); - - /* Verify test results */ - if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"), - "result", "fentry failed err %llu\n", - ftrace_skel->bss->test_result_fentry)) - goto out; - - CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result", - "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit); - + for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++) + run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel, + pkt_sizes[i]); out: - if (pb) - perf_buffer__free(pb); + perf_buffer__free(pb); test_xdp__destroy(pkt_skel); test_xdp_bpf2bpf__destroy(ftrace_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index fd812bd43600..13aabb3b6cf2 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -3,11 +3,12 @@ #include <linux/if_link.h> #include <test_progs.h> +#include "test_xdp_with_cpumap_frags_helpers.skel.h" #include "test_xdp_with_cpumap_helpers.skel.h" #define IFINDEX_LO 1 -void serial_test_xdp_cpumap_attach(void) +void test_xdp_with_cpumap_helpers(void) { struct test_xdp_with_cpumap_helpers *skel; struct bpf_prog_info info = {}; @@ -54,6 +55,67 @@ void serial_test_xdp_cpumap_attach(void) err = bpf_map_update_elem(map_fd, &idx, &val, 0); ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry"); + /* Try to attach BPF_XDP program with frags to cpumap when we have + * already loaded a BPF_XDP program on the map + */ + idx = 1; + val.qsize = 192; + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags); + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry"); + out_close: test_xdp_with_cpumap_helpers__destroy(skel); } + +void test_xdp_with_cpumap_frags_helpers(void) +{ + struct test_xdp_with_cpumap_frags_helpers *skel; + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + struct bpf_cpumap_val val = { + .qsize = 192, + }; + int err, frags_prog_fd, map_fd; + __u32 idx = 0; + + skel = test_xdp_with_cpumap_frags_helpers__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load")) + return; + + frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags); + map_fd = bpf_map__fd(skel->maps.cpu_map); + err = bpf_obj_get_info_by_fd(frags_prog_fd, &info, &len); + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) + goto out_close; + + val.bpf_prog.fd = frags_prog_fd; + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_OK(err, "Add program to cpumap entry"); + + err = bpf_map_lookup_elem(map_fd, &idx, &val); + ASSERT_OK(err, "Read cpumap entry"); + ASSERT_EQ(info.id, val.bpf_prog.id, + "Match program id to cpumap entry prog_id"); + + /* Try to attach BPF_XDP program to cpumap when we have + * already loaded a BPF_XDP program with frags on the map + */ + idx = 1; + val.qsize = 192; + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm); + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_NEQ(err, 0, "Add BPF_XDP program to cpumap entry"); + +out_close: + test_xdp_with_cpumap_frags_helpers__destroy(skel); +} + +void serial_test_xdp_cpumap_attach(void) +{ + if (test__start_subtest("CPUMAP with programs in entries")) + test_xdp_with_cpumap_helpers(); + + if (test__start_subtest("CPUMAP with frags programs in entries")) + test_xdp_with_cpumap_frags_helpers(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index 3079d5568f8f..2a784ccd3136 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -4,6 +4,7 @@ #include <test_progs.h> #include "test_xdp_devmap_helpers.skel.h" +#include "test_xdp_with_devmap_frags_helpers.skel.h" #include "test_xdp_with_devmap_helpers.skel.h" #define IFINDEX_LO 1 @@ -56,6 +57,15 @@ static void test_xdp_with_devmap_helpers(void) err = bpf_map_update_elem(map_fd, &idx, &val, 0); ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry"); + /* Try to attach BPF_XDP program with frags to devmap when we have + * already loaded a BPF_XDP program on the map + */ + idx = 1; + val.ifindex = 1; + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm_frags); + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry"); + out_close: test_xdp_with_devmap_helpers__destroy(skel); } @@ -71,12 +81,57 @@ static void test_neg_xdp_devmap_helpers(void) } } +void test_xdp_with_devmap_frags_helpers(void) +{ + struct test_xdp_with_devmap_frags_helpers *skel; + struct bpf_prog_info info = {}; + struct bpf_devmap_val val = { + .ifindex = IFINDEX_LO, + }; + __u32 len = sizeof(info); + int err, dm_fd_frags, map_fd; + __u32 idx = 0; + + skel = test_xdp_with_devmap_frags_helpers__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load")) + return; + + dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags); + map_fd = bpf_map__fd(skel->maps.dm_ports); + err = bpf_obj_get_info_by_fd(dm_fd_frags, &info, &len); + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) + goto out_close; + + val.bpf_prog.fd = dm_fd_frags; + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_OK(err, "Add frags program to devmap entry"); + + err = bpf_map_lookup_elem(map_fd, &idx, &val); + ASSERT_OK(err, "Read devmap entry"); + ASSERT_EQ(info.id, val.bpf_prog.id, + "Match program id to devmap entry prog_id"); + + /* Try to attach BPF_XDP program to devmap when we have + * already loaded a BPF_XDP program with frags on the map + */ + idx = 1; + val.ifindex = 1; + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm); + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_NEQ(err, 0, "Add BPF_XDP program to devmap entry"); + +out_close: + test_xdp_with_devmap_frags_helpers__destroy(skel); +} void serial_test_xdp_devmap_attach(void) { if (test__start_subtest("DEVMAP with programs in entries")) test_xdp_with_devmap_helpers(); + if (test__start_subtest("DEVMAP with frags programs in entries")) + test_xdp_with_devmap_frags_helpers(); + if (test__start_subtest("Verifier check of DEVMAP programs")) test_neg_xdp_devmap_helpers(); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c new file mode 100644 index 000000000000..eafc877ea460 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <limits.h> + +#define AUTOBIND_LEN 6 +char sun_path[AUTOBIND_LEN]; + +#define NR_CASES 5 +int sndbuf_setsockopt[NR_CASES] = {-1, 0, 8192, INT_MAX / 2, INT_MAX}; +int sndbuf_getsockopt[NR_CASES] = {-1, -1, -1, -1, -1}; +int sndbuf_getsockopt_expected[NR_CASES]; + +static inline int cmpname(struct unix_sock *unix_sk) +{ + int i; + + for (i = 0; i < AUTOBIND_LEN; i++) { + if (unix_sk->addr->name->sun_path[i] != sun_path[i]) + return -1; + } + + return 0; +} + +SEC("iter/unix") +int change_sndbuf(struct bpf_iter__unix *ctx) +{ + struct unix_sock *unix_sk = ctx->unix_sk; + int i, err; + + if (!unix_sk || !unix_sk->addr) + return 0; + + if (unix_sk->addr->name->sun_path[0]) + return 0; + + if (cmpname(unix_sk)) + return 0; + + for (i = 0; i < NR_CASES; i++) { + err = bpf_setsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF, + &sndbuf_setsockopt[i], + sizeof(sndbuf_setsockopt[i])); + if (err) + break; + + err = bpf_getsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF, + &sndbuf_getsockopt[i], + sizeof(sndbuf_getsockopt[i])); + if (err) + break; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c index c21e3f545371..e6aefae38894 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c @@ -63,7 +63,7 @@ int dump_unix(struct bpf_iter__unix *ctx) BPF_SEQ_PRINTF(seq, " @"); for (i = 1; i < len; i++) { - /* unix_mkname() tests this upper bound. */ + /* unix_validate_addr() tests this upper bound. */ if (i >= sizeof(struct sockaddr_un)) break; diff --git a/tools/testing/selftests/bpf/progs/bpf_mod_race.c b/tools/testing/selftests/bpf/progs/bpf_mod_race.c new file mode 100644 index 000000000000..82a5c6c6ba83 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_mod_race.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +const volatile struct { + /* thread to activate trace programs for */ + pid_t tgid; + /* return error from __init function */ + int inject_error; + /* uffd monitored range start address */ + void *fault_addr; +} bpf_mod_race_config = { -1 }; + +int bpf_blocking = 0; +int res_try_get_module = -1; + +static __always_inline bool check_thread_id(void) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + return task->tgid == bpf_mod_race_config.tgid; +} + +/* The trace of execution is something like this: + * + * finit_module() + * load_module() + * prepare_coming_module() + * notifier_call(MODULE_STATE_COMING) + * btf_parse_module() + * btf_alloc_id() // Visible to userspace at this point + * list_add(btf_mod->list, &btf_modules) + * do_init_module() + * freeinit = kmalloc() + * ret = mod->init() + * bpf_prog_widen_race() + * bpf_copy_from_user() + * ...<sleep>... + * if (ret < 0) + * ... + * free_module() + * return ret + * + * At this point, module loading thread is blocked, we now load the program: + * + * bpf_check + * add_kfunc_call/check_pseudo_btf_id + * btf_try_get_module + * try_get_module_live == false + * return -ENXIO + * + * Without the fix (try_get_module_live in btf_try_get_module): + * + * bpf_check + * add_kfunc_call/check_pseudo_btf_id + * btf_try_get_module + * try_get_module == true + * <store module reference in btf_kfunc_tab or used_btf array> + * ... + * return fd + * + * Now, if we inject an error in the blocked program, our module will be freed + * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING). + * Later, when bpf program is freed, it will try to module_put already freed + * module. This is why try_get_module_live returns false if mod->state is not + * MODULE_STATE_LIVE. + */ + +SEC("fmod_ret.s/bpf_fentry_test1") +int BPF_PROG(widen_race, int a, int ret) +{ + char dst; + + if (!check_thread_id()) + return 0; + /* Indicate that we will attempt to block */ + bpf_blocking = 1; + bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr); + return bpf_mod_race_config.inject_error; +} + +SEC("fexit/do_init_module") +int BPF_PROG(fexit_init_module, struct module *mod, int ret) +{ + if (!check_thread_id()) + return 0; + /* Indicate that we finished blocking */ + bpf_blocking = 2; + return 0; +} + +SEC("fexit/btf_try_get_module") +int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod) +{ + res_try_get_module = !!mod; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index e0f42601be9b..1c1289ba5fc5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -5,6 +5,8 @@ #define AF_INET 2 #define AF_INET6 10 +#define SOL_SOCKET 1 +#define SO_SNDBUF 7 #define __SO_ACCEPTCON (1 << 16) #define SOL_TCP 6 diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c new file mode 100644 index 000000000000..b2a409e6382a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2021 Google LLC. + */ + +#include <errno.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__u32 invocations = 0; +__u32 assertion_error = 0; +__u32 retval_value = 0; +__u32 ctx_retval_value = 0; + +SEC("cgroup/getsockopt") +int get_retval(struct bpf_sockopt *ctx) +{ + retval_value = bpf_get_retval(); + ctx_retval_value = ctx->retval; + __sync_fetch_and_add(&invocations, 1); + + return 1; +} + +SEC("cgroup/getsockopt") +int set_eisconn(struct bpf_sockopt *ctx) +{ + __sync_fetch_and_add(&invocations, 1); + + if (bpf_set_retval(-EISCONN)) + assertion_error = 1; + + return 1; +} + +SEC("cgroup/getsockopt") +int clear_retval(struct bpf_sockopt *ctx) +{ + __sync_fetch_and_add(&invocations, 1); + + ctx->retval = 0; + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c new file mode 100644 index 000000000000..d6e5903e06ba --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2021 Google LLC. + */ + +#include <errno.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__u32 invocations = 0; +__u32 assertion_error = 0; +__u32 retval_value = 0; + +SEC("cgroup/setsockopt") +int get_retval(struct bpf_sockopt *ctx) +{ + retval_value = bpf_get_retval(); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} + +SEC("cgroup/setsockopt") +int set_eunatch(struct bpf_sockopt *ctx) +{ + __sync_fetch_and_add(&invocations, 1); + + if (bpf_set_retval(-EUNATCH)) + assertion_error = 1; + + return 0; +} + +SEC("cgroup/setsockopt") +int set_eisconn(struct bpf_sockopt *ctx) +{ + __sync_fetch_and_add(&invocations, 1); + + if (bpf_set_retval(-EISCONN)) + assertion_error = 1; + + return 0; +} + +SEC("cgroup/setsockopt") +int legacy_eperm(struct bpf_sockopt *ctx) +{ + __sync_fetch_and_add(&invocations, 1); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c index 68a5a9db928a..7e94412d47a5 100644 --- a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c @@ -7,12 +7,12 @@ #include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> -struct bpf_map_def SEC("maps") sock_map = { - .type = BPF_MAP_TYPE_SOCKMAP, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 2, -}; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __type(key, int); + __type(value, int); + __uint(max_entries, 2); +} sock_map SEC(".maps"); SEC("freplace/cls_redirect") int freplace_cls_redirect_test(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c new file mode 100644 index 000000000000..4e8fed75a4e0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +extern void bpf_testmod_test_mod_kfunc(int i) __ksym; + +SEC("tc") +int kfunc_call_fail(struct __sk_buff *ctx) +{ + bpf_testmod_test_mod_kfunc(0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index 8a8cf59017aa..5aecbb9fdc68 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -1,13 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ -#include <linux/bpf.h> +#include <vmlinux.h> #include <bpf/bpf_helpers.h> -#include "bpf_tcp_helpers.h" extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, __u32 c, __u64 d) __ksym; +extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; +extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; +extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; +extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; +extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; +extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; +extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; + SEC("tc") int kfunc_call_test2(struct __sk_buff *skb) { @@ -44,4 +51,45 @@ int kfunc_call_test1(struct __sk_buff *skb) return ret; } +SEC("tc") +int kfunc_call_test_ref_btf_id(struct __sk_buff *skb) +{ + struct prog_test_ref_kfunc *pt; + unsigned long s = 0; + int ret = 0; + + pt = bpf_kfunc_call_test_acquire(&s); + if (pt) { + if (pt->a != 42 || pt->b != 108) + ret = -1; + bpf_kfunc_call_test_release(pt); + } + return ret; +} + +SEC("tc") +int kfunc_call_test_pass(struct __sk_buff *skb) +{ + struct prog_test_pass1 p1 = {}; + struct prog_test_pass2 p2 = {}; + short a = 0; + __u64 b = 0; + long c = 0; + char d = 0; + int e = 0; + + bpf_kfunc_call_test_pass_ctx(skb); + bpf_kfunc_call_test_pass1(&p1); + bpf_kfunc_call_test_pass2(&p2); + + bpf_kfunc_call_test_mem_len_pass1(&a, sizeof(a)); + bpf_kfunc_call_test_mem_len_pass1(&b, sizeof(b)); + bpf_kfunc_call_test_mem_len_pass1(&c, sizeof(c)); + bpf_kfunc_call_test_mem_len_pass1(&d, sizeof(d)); + bpf_kfunc_call_test_mem_len_pass1(&e, sizeof(e)); + bpf_kfunc_call_test_mem_len_fail2(&b, -1); + + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/ksym_race.c b/tools/testing/selftests/bpf/progs/ksym_race.c new file mode 100644 index 000000000000..def97f2fed90 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/ksym_race.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +extern int bpf_testmod_ksym_percpu __ksym; + +SEC("tc") +int ksym_fail(struct __sk_buff *ctx) +{ + return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/bpf/progs/sample_map_ret0.c index 1612a32007b6..495990d355ef 100644 --- a/tools/testing/selftests/bpf/progs/sample_map_ret0.c +++ b/tools/testing/selftests/bpf/progs/sample_map_ret0.c @@ -2,19 +2,19 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -struct bpf_map_def SEC("maps") htab = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(__u32), - .value_size = sizeof(long), - .max_entries = 2, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, long); + __uint(max_entries, 2); +} htab SEC(".maps"); -struct bpf_map_def SEC("maps") array = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(long), - .max_entries = 2, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, long); + __uint(max_entries, 2); +} array SEC(".maps"); /* Sample program which should always load for testing control paths. */ SEC(".text") int func() diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index 95d5b941bc1f..c9abfe3a11af 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -7,8 +7,6 @@ int bpf_prog1(struct __sk_buff *skb) { void *data_end = (void *)(long) skb->data_end; void *data = (void *)(long) skb->data; - __u32 lport = skb->local_port; - __u32 rport = skb->remote_port; __u8 *d = data; int err; diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index 79c8139b63b8..d0298dccedcd 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -73,17 +73,17 @@ int _getsockopt(struct bpf_sockopt *ctx) */ if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ if (((struct tcp_zerocopy_receive *)optval)->address != 0) - return 0; /* EPERM, unexpected data */ + return 0; /* unexpected data */ return 1; } if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { if (optval + 1 > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ ctx->retval = 0; /* Reset system call return value to zero */ @@ -96,24 +96,24 @@ int _getsockopt(struct bpf_sockopt *ctx) * bytes of data. */ if (optval_end - optval != page_size) - return 0; /* EPERM, unexpected data size */ + return 0; /* unexpected data size */ return 1; } if (ctx->level != SOL_CUSTOM) - return 0; /* EPERM, deny everything except custom level */ + return 0; /* deny everything except custom level */ if (optval + 1 > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, BPF_SK_STORAGE_GET_F_CREATE); if (!storage) - return 0; /* EPERM, couldn't get sk storage */ + return 0; /* couldn't get sk storage */ if (!ctx->retval) - return 0; /* EPERM, kernel should not have handled + return 0; /* kernel should not have handled * SOL_CUSTOM, something is wrong! */ ctx->retval = 0; /* Reset system call return value to zero */ @@ -152,7 +152,7 @@ int _setsockopt(struct bpf_sockopt *ctx) /* Overwrite SO_SNDBUF value */ if (optval + sizeof(__u32) > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ *(__u32 *)optval = 0x55AA; ctx->optlen = 4; @@ -164,7 +164,7 @@ int _setsockopt(struct bpf_sockopt *ctx) /* Always use cubic */ if (optval + 5 > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ memcpy(optval, "cubic", 5); ctx->optlen = 5; @@ -175,10 +175,10 @@ int _setsockopt(struct bpf_sockopt *ctx) if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { /* Original optlen is larger than PAGE_SIZE. */ if (ctx->optlen != page_size * 2) - return 0; /* EPERM, unexpected data size */ + return 0; /* unexpected data size */ if (optval + 1 > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ /* Make sure we can trim the buffer. */ optval[0] = 0; @@ -189,21 +189,21 @@ int _setsockopt(struct bpf_sockopt *ctx) * bytes of data. */ if (optval_end - optval != page_size) - return 0; /* EPERM, unexpected data size */ + return 0; /* unexpected data size */ return 1; } if (ctx->level != SOL_CUSTOM) - return 0; /* EPERM, deny everything except custom level */ + return 0; /* deny everything except custom level */ if (optval + 1 > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, BPF_SK_STORAGE_GET_F_CREATE); if (!storage) - return 0; /* EPERM, couldn't get sk storage */ + return 0; /* couldn't get sk storage */ storage->val = optval[0]; ctx->optlen = -1; /* BPF has consumed this option, don't call kernel diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c new file mode 100644 index 000000000000..f00a9731930e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +#define EAFNOSUPPORT 97 +#define EPROTO 71 +#define ENONET 64 +#define EINVAL 22 +#define ENOENT 2 + +int test_einval_bpf_tuple = 0; +int test_einval_reserved = 0; +int test_einval_netns_id = 0; +int test_einval_len_opts = 0; +int test_eproto_l4proto = 0; +int test_enonet_netns_id = 0; +int test_enoent_lookup = 0; +int test_eafnosupport = 0; + +struct nf_conn; + +struct bpf_ct_opts___local { + s32 netns_id; + s32 error; + u8 l4proto; + u8 reserved[3]; +} __attribute__((preserve_access_index)); + +struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; +void bpf_ct_release(struct nf_conn *) __ksym; + +static __always_inline void +nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32), + void *ctx) +{ + struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 }; + struct bpf_sock_tuple bpf_tuple; + struct nf_conn *ct; + + __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4)); + + ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def)); + if (ct) + bpf_ct_release(ct); + else + test_einval_bpf_tuple = opts_def.error; + + opts_def.reserved[0] = 1; + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + opts_def.reserved[0] = 0; + opts_def.l4proto = IPPROTO_TCP; + if (ct) + bpf_ct_release(ct); + else + test_einval_reserved = opts_def.error; + + opts_def.netns_id = -2; + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + opts_def.netns_id = -1; + if (ct) + bpf_ct_release(ct); + else + test_einval_netns_id = opts_def.error; + + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1); + if (ct) + bpf_ct_release(ct); + else + test_einval_len_opts = opts_def.error; + + opts_def.l4proto = IPPROTO_ICMP; + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + opts_def.l4proto = IPPROTO_TCP; + if (ct) + bpf_ct_release(ct); + else + test_eproto_l4proto = opts_def.error; + + opts_def.netns_id = 0xf00f; + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + opts_def.netns_id = -1; + if (ct) + bpf_ct_release(ct); + else + test_enonet_netns_id = opts_def.error; + + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + if (ct) + bpf_ct_release(ct); + else + test_enoent_lookup = opts_def.error; + + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def)); + if (ct) + bpf_ct_release(ct); + else + test_eafnosupport = opts_def.error; +} + +SEC("xdp") +int nf_xdp_ct_test(struct xdp_md *ctx) +{ + nf_ct_test((void *)bpf_xdp_ct_lookup, ctx); + return 0; +} + +SEC("tc") +int nf_skb_ct_test(struct __sk_buff *ctx) +{ + nf_ct_test((void *)bpf_skb_ct_lookup, ctx); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c index 160ead6c67b2..07c94df13660 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c @@ -9,12 +9,15 @@ struct ipv_counts { unsigned int v6; }; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" struct bpf_map_def SEC("maps") btf_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(struct ipv_counts), .max_entries = 4, }; +#pragma GCC diagnostic pop BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index 1884a5bd10f5..762671a2e90c 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -9,6 +9,8 @@ struct ipv_counts { unsigned int v6; }; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" /* just to validate we can handle maps in multiple sections */ struct bpf_map_def SEC("maps") btf_map_legacy = { .type = BPF_MAP_TYPE_ARRAY, @@ -16,6 +18,7 @@ struct bpf_map_def SEC("maps") btf_map_legacy = { .value_size = sizeof(long long), .max_entries = 4, }; +#pragma GCC diagnostic pop BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts); diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c index 15e0f9945fe4..1dabb88f8cb4 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c @@ -8,12 +8,12 @@ struct ipv_counts { unsigned int v6; }; -struct bpf_map_def SEC("maps") btf_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(struct ipv_counts), - .max_entries = 4, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(struct ipv_counts)); + __uint(max_entries, 4); +} btf_map SEC(".maps"); __attribute__((noinline)) int test_long_fname_2(void) diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c index c304cd5b8cad..37aacc66cd68 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c @@ -10,12 +10,12 @@ #define NUM_CGROUP_LEVELS 4 -struct bpf_map_def SEC("maps") cgroup_ids = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(__u64), - .max_entries = NUM_CGROUP_LEVELS, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u64); + __uint(max_entries, NUM_CGROUP_LEVELS); +} cgroup_ids SEC(".maps"); static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level) { diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c new file mode 100644 index 000000000000..9d58d61c0dee --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} sock_map SEC(".maps"); + +SEC("sk_skb") +int prog_skb_verdict(struct __sk_buff *skb) +{ + return SK_PASS; +} + +SEC("sk_msg") +int prog_skmsg_verdict(struct sk_msg_md *msg) +{ + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c index bf28814bfde5..950a70b61e74 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_edt.c +++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c @@ -17,12 +17,12 @@ #define THROTTLE_RATE_BPS (5 * 1000 * 1000) /* flow_key => last_tstamp timestamp used */ -struct bpf_map_def SEC("maps") flow_map = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(uint32_t), - .value_size = sizeof(uint64_t), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, uint32_t); + __type(value, uint64_t); + __uint(max_entries, 1); +} flow_map SEC(".maps"); static inline int throttle_flow(struct __sk_buff *skb) { diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c index cd747cd93dbe..6edebce563b5 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c @@ -16,12 +16,12 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -struct bpf_map_def SEC("maps") results = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(__u32), - .max_entries = 3, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 3); +} results SEC(".maps"); static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk, void *iph, __u32 ip_size, diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c index 199c61b7d062..53b64c999450 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c @@ -7,11 +7,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; void *data = (void *)(long)xdp->data; - unsigned int data_len; + int data_len = bpf_xdp_get_buff_len(xdp); int offset = 0; /* Data length determine test case */ - data_len = data_end - data; if (data_len == 54) { /* sizeof(pkt_v4) */ offset = 4096; /* test too large offset */ @@ -20,7 +19,12 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) } else if (data_len == 64) { offset = 128; } else if (data_len == 128) { - offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */ + /* Max tail grow 3520 */ + offset = 4096 - 256 - 320 - data_len; + } else if (data_len == 9000) { + offset = 10; + } else if (data_len == 9001) { + offset = 4096; } else { return XDP_ABORTED; /* No matching test */ } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c index b7448253d135..ca68c038357c 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c @@ -12,14 +12,38 @@ SEC("xdp") int _xdp_adjust_tail_shrink(struct xdp_md *xdp) { - void *data_end = (void *)(long)xdp->data_end; - void *data = (void *)(long)xdp->data; + __u8 *data_end = (void *)(long)xdp->data_end; + __u8 *data = (void *)(long)xdp->data; int offset = 0; - if (data_end - data == 54) /* sizeof(pkt_v4) */ + switch (bpf_xdp_get_buff_len(xdp)) { + case 54: + /* sizeof(pkt_v4) */ offset = 256; /* shrink too much */ - else + break; + case 9000: + /* non-linear buff test cases */ + if (data + 1 > data_end) + return XDP_DROP; + + switch (data[0]) { + case 0: + offset = 10; + break; + case 1: + offset = 4100; + break; + case 2: + offset = 8200; + break; + default: + return XDP_DROP; + } + break; + default: offset = 20; + break; + } if (bpf_xdp_adjust_tail(xdp, 0 - offset)) return XDP_DROP; return XDP_TX; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c index 58cf4345f5cc..3379d303f41a 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -49,7 +49,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) void *data = (void *)(long)xdp->data; meta.ifindex = xdp->rxq->dev->ifindex; - meta.pkt_len = data_end - data; + meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp); bpf_xdp_output(xdp, &perf_buf_map, ((__u64) meta.pkt_len << 32) | BPF_F_CURRENT_CPU, diff --git a/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c new file mode 100644 index 000000000000..2a3496d8e327 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <bpf/bpf_helpers.h> + +int _version SEC("version") = 1; + +SEC("xdp.frags") +int xdp_adjust_frags(struct xdp_md *xdp) +{ + __u8 *data_end = (void *)(long)xdp->data_end; + __u8 *data = (void *)(long)xdp->data; + __u8 val[16] = {}; + __u32 offset; + int err; + + if (data + sizeof(__u32) > data_end) + return XDP_DROP; + + offset = *(__u32 *)data; + err = bpf_xdp_load_bytes(xdp, offset, val, sizeof(val)); + if (err < 0) + return XDP_DROP; + + if (val[0] != 0xaa || val[15] != 0xaa) /* marker */ + return XDP_DROP; + + val[0] = 0xbb; /* update the marker */ + val[15] = 0xbb; + err = bpf_xdp_store_bytes(xdp, offset, val, sizeof(val)); + if (err < 0) + return XDP_DROP; + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c new file mode 100644 index 000000000000..62fb7cd4d87a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +#define IFINDEX_LO 1 + +struct { + __uint(type, BPF_MAP_TYPE_CPUMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_cpumap_val)); + __uint(max_entries, 4); +} cpu_map SEC(".maps"); + +SEC("xdp_cpumap/dummy_cm") +int xdp_dummy_cm(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +SEC("xdp.frags/cpumap") +int xdp_dummy_cm_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c index 532025057711..48007f17dfa8 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c @@ -33,4 +33,10 @@ int xdp_dummy_cm(struct xdp_md *ctx) return XDP_PASS; } +SEC("xdp.frags/cpumap") +int xdp_dummy_cm_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c new file mode 100644 index 000000000000..e1caf510b7d2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 4); +} dm_ports SEC(".maps"); + +/* valid program on DEVMAP entry via SEC name; + * has access to egress and ingress ifindex + */ +SEC("xdp_devmap/map_prog") +int xdp_dummy_dm(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +SEC("xdp.frags/devmap") +int xdp_dummy_dm_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c index 1e6b9c38ea6d..8ae11fab8316 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c @@ -40,4 +40,11 @@ int xdp_dummy_dm(struct xdp_md *ctx) return XDP_PASS; } + +SEC("xdp.frags/devmap") +int xdp_dummy_dm_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 76cd903117af..29bbaa58233c 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -31,6 +31,7 @@ #include <linux/if_ether.h> #include <linux/btf.h> +#include <bpf/btf.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> @@ -66,6 +67,11 @@ static bool unpriv_disabled = false; static int skips; static bool verbose = false; +struct kfunc_btf_id_pair { + const char *kfunc; + int insn_idx; +}; + struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; @@ -92,6 +98,7 @@ struct bpf_test { int fixup_map_reuseport_array[MAX_FIXUPS]; int fixup_map_ringbuf[MAX_FIXUPS]; int fixup_map_timer[MAX_FIXUPS]; + struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS]; /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT. * Can be a tab-separated sequence of expected strings. An empty string * means no log verification. @@ -744,6 +751,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; int *fixup_map_ringbuf = test->fixup_map_ringbuf; int *fixup_map_timer = test->fixup_map_timer; + struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -936,6 +944,26 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_timer++; } while (*fixup_map_timer); } + + /* Patch in kfunc BTF IDs */ + if (fixup_kfunc_btf_id->kfunc) { + struct btf *btf; + int btf_id; + + do { + btf_id = 0; + btf = btf__load_vmlinux_btf(); + if (btf) { + btf_id = btf__find_by_name_kind(btf, + fixup_kfunc_btf_id->kfunc, + BTF_KIND_FUNC); + btf_id = btf_id < 0 ? 0 : btf_id; + } + btf__free(btf); + prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; + fixup_kfunc_btf_id++; + } while (fixup_kfunc_btf_id->kfunc); + } } struct libcap { diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index d7b74eb28333..829be2b9e08e 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -22,6 +22,81 @@ .result = ACCEPT, }, { + "calls: invalid kfunc call: ptr_to_mem to struct with non-scalar", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_fail1", 2 }, + }, +}, +{ + "calls: invalid kfunc call: ptr_to_mem to struct with nesting depth > 4", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_fail2", 2 }, + }, +}, +{ + "calls: invalid kfunc call: ptr_to_mem to struct with FAM", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_fail3", 2 }, + }, +}, +{ + "calls: invalid kfunc call: reg->type != PTR_TO_CTX", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "arg#0 expected pointer to ctx, but got PTR", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_pass_ctx", 2 }, + }, +}, +{ + "calls: invalid kfunc call: void * not allowed in func proto without mem size arg", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "arg#0 pointer type UNKNOWN must point to scalar", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_mem_len_fail1", 2 }, + }, +}, +{ "calls: basic sanity", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 0a5d23da486d..ffa5502ad95e 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -906,7 +906,10 @@ static bool rx_stats_are_valid(struct ifobject *ifobject) return true; case STAT_TEST_RX_FULL: xsk_stat = stats.rx_ring_full; - expected_stat -= RX_FULL_RXQSIZE; + if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) + expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE; + else + expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE; break; case STAT_TEST_RX_FILL_EMPTY: xsk_stat = stats.rx_fill_ring_empty_descs; diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 43ea8407a82e..3b0489910422 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -96,7 +96,7 @@ fib_rule6_del() fib_rule6_del_by_pref() { - pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1) + pref=$($IP -6 rule show $1 table $RTABLE | cut -d ":" -f 1) $IP -6 rule del pref $pref } @@ -104,17 +104,21 @@ fib_rule6_test_match_n_redirect() { local match="$1" local getmatch="$2" + local description="$3" $IP -6 rule add $match table $RTABLE $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE" - log_test $? 0 "rule6 check: $1" + log_test $? 0 "rule6 check: $description" fib_rule6_del_by_pref "$match" - log_test $? 0 "rule6 del by pref: $match" + log_test $? 0 "rule6 del by pref: $description" } fib_rule6_test() { + local getmatch + local match + # setup the fib rule redirect route $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink @@ -165,7 +169,7 @@ fib_rule4_del() fib_rule4_del_by_pref() { - pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1) + pref=$($IP rule show $1 table $RTABLE | cut -d ":" -f 1) $IP rule del pref $pref } @@ -173,17 +177,21 @@ fib_rule4_test_match_n_redirect() { local match="$1" local getmatch="$2" + local description="$3" $IP rule add $match table $RTABLE $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE" - log_test $? 0 "rule4 check: $1" + log_test $? 0 "rule4 check: $description" fib_rule4_del_by_pref "$match" - log_test $? 0 "rule4 del by pref: $match" + log_test $? 0 "rule4 del by pref: $description" } fib_rule4_test() { + local getmatch + local match + # setup the fib rule redirect route $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink @@ -192,11 +200,11 @@ fib_rule4_test() # need enable forwarding and disable rp_filter temporarily as all the # addresses are in the same subnet and egress device == ingress device. - ip netns exec testns sysctl -w net.ipv4.ip_forward=1 - ip netns exec testns sysctl -w net.ipv4.conf.$DEV.rp_filter=0 + ip netns exec testns sysctl -qw net.ipv4.ip_forward=1 + ip netns exec testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 match="from $SRC_IP iif $DEV" fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" - ip netns exec testns sysctl -w net.ipv4.ip_forward=0 + ip netns exec testns sysctl -qw net.ipv4.ip_forward=0 match="tos 0x10" fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table" diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index b90dff8d3a94..64bd00fe9a4f 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -28,8 +28,9 @@ h2_destroy() switch_create() { - # 10 Seconds ageing time. - ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \ + ip link add dev br0 type bridge \ + vlan_filtering 1 \ + ageing_time $LOW_AGEING_TIME \ mcast_snooping 0 ip link set dev $swp1 master br0 diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh index c15c6c85c984..1c8a26046589 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh @@ -27,8 +27,9 @@ h2_destroy() switch_create() { - # 10 Seconds ageing time. - ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0 + ip link add dev br0 type bridge \ + ageing_time $LOW_AGEING_TIME \ + mcast_snooping 0 ip link set dev $swp1 master br0 ip link set dev $swp2 master br0 diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index b0980a2efa31..4a546509de90 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -41,6 +41,8 @@ NETIF_CREATE=yes # Timeout (in seconds) before ping exits regardless of how many packets have # been sent or received PING_TIMEOUT=5 +# Minimum ageing_time (in centiseconds) supported by hardware +LOW_AGEING_TIME=1000 # Flag for tc match, supposed to be skip_sw/skip_hw which means do not process # filter by software/hardware TC_FLAG=skip_hw diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 7da783d6f453..e7e434a4758b 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -24,6 +24,7 @@ PING_COUNT=${PING_COUNT:=10} PING_TIMEOUT=${PING_TIMEOUT:=5} WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600} +LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000} REQUIRE_JQ=${REQUIRE_JQ:=yes} REQUIRE_MZ=${REQUIRE_MZ:=yes} diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index b8bdbec0cf69..bd106c7ec232 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -289,7 +289,7 @@ do_transfer() addr_nr_ns1="$7" addr_nr_ns2="$8" speed="$9" - bkup="${10}" + sflags="${10}" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -461,14 +461,13 @@ do_transfer() fi fi - if [ ! -z $bkup ]; then + if [ ! -z $sflags ]; then sleep 1 for netns in "$ns1" "$ns2"; do dump=(`ip netns exec $netns ./pm_nl_ctl dump`) if [ ${#dump[@]} -gt 0 ]; then addr=${dump[${#dump[@]} - 1]} - backup="ip netns exec $netns ./pm_nl_ctl set $addr flags $bkup" - $backup + ip netns exec $netns ./pm_nl_ctl set $addr flags $sflags fi done fi @@ -545,7 +544,7 @@ run_tests() addr_nr_ns1="${5:-0}" addr_nr_ns2="${6:-0}" speed="${7:-fast}" - bkup="${8:-""}" + sflags="${8:-""}" lret=0 oldin="" @@ -574,7 +573,7 @@ run_tests() fi do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ - ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup} + ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${sflags} lret=$? } @@ -1888,6 +1887,44 @@ fullmesh_tests() run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow chk_join_nr "fullmesh test 1x2, limited" 4 4 4 chk_add_nr 1 1 + + # set fullmesh flag + reset + ip netns exec $ns1 ./pm_nl_ctl limits 4 4 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow + ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh + chk_join_nr "set fullmesh flag test" 2 2 2 + chk_rm_nr 0 1 + + # set nofullmesh flag + reset + ip netns exec $ns1 ./pm_nl_ctl limits 4 4 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow,fullmesh + ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh + chk_join_nr "set nofullmesh flag test" 2 2 2 + chk_rm_nr 0 1 + + # set backup,fullmesh flags + reset + ip netns exec $ns1 ./pm_nl_ctl limits 4 4 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow + ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh + chk_join_nr "set backup,fullmesh flags test" 2 2 2 + chk_prio_nr 0 1 + chk_rm_nr 0 1 + + # set nobackup,nofullmesh flags + reset + ip netns exec $ns1 ./pm_nl_ctl limits 4 4 + ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,backup,fullmesh + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh + chk_join_nr "set nobackup,nofullmesh flags test" 2 2 2 + chk_prio_nr 0 1 + chk_rm_nr 0 1 } all_tests() diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 354784512748..152b84e44d69 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -28,7 +28,7 @@ static void syntax(char *argv[]) fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n"); fprintf(stderr, "\tdel <id> [<ip>]\n"); fprintf(stderr, "\tget <id>\n"); - fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n"); + fprintf(stderr, "\tset <ip> [flags backup|nobackup|fullmesh|nofullmesh]\n"); fprintf(stderr, "\tflush\n"); fprintf(stderr, "\tdump\n"); fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n"); @@ -704,12 +704,14 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) if (++arg >= argc) error(1, 0, " missing flags value"); - /* do not support flag list yet */ for (str = argv[arg]; (tok = strtok(str, ",")); str = NULL) { if (!strcmp(tok, "backup")) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; - else if (strcmp(tok, "nobackup")) + else if (!strcmp(tok, "fullmesh")) + flags |= MPTCP_PM_ADDR_FLAG_FULLMESH; + else if (strcmp(tok, "nobackup") && + strcmp(tok, "nofullmesh")) error(1, errno, "unknown flag %s", argv[arg]); } diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c index aee631c5284e..044bc0e9ed81 100644 --- a/tools/testing/selftests/net/timestamping.c +++ b/tools/testing/selftests/net/timestamping.c @@ -325,8 +325,8 @@ int main(int argc, char **argv) struct ifreq device; struct ifreq hwtstamp; struct hwtstamp_config hwconfig, hwconfig_requested; - struct so_timestamping so_timestamping_get = { 0, -1 }; - struct so_timestamping so_timestamping = { 0, -1 }; + struct so_timestamping so_timestamping_get = { 0, 0 }; + struct so_timestamping so_timestamping = { 0, 0 }; struct sockaddr_in addr; struct ip_mreq imr; struct in_addr iaddr; |