diff --git a/.mailmap b/.mailmap index 63c11ea7e35d70..7d14504daf24b8 100644 --- a/.mailmap +++ b/.mailmap @@ -327,6 +327,7 @@ Henrik Rydberg Herbert Xu Huacai Chen Huacai Chen +Ignat Korchagin Ike Panhc J. Bruce Fields J. Bruce Fields @@ -586,6 +587,7 @@ Morten Welinder Morten Welinder Morten Welinder Morten Welinder +Muhammad Usama Anjum Mukesh Ojha Muna Sinada Murali Nalajala diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst index 1d7bfad73b1c7a..123c8468d58f21 100644 --- a/Documentation/core-api/dma-attributes.rst +++ b/Documentation/core-api/dma-attributes.rst @@ -149,11 +149,33 @@ For architectures that require cache flushing for DMA coherence DMA_ATTR_MMIO will not perform any cache flushing. The address provided must never be mapped cacheable into the CPU. -DMA_ATTR_CPU_CACHE_CLEAN ------------------------- - -This attribute indicates the CPU will not dirty any cacheline overlapping this -DMA_FROM_DEVICE/DMA_BIDIRECTIONAL buffer while it is mapped. This allows -multiple small buffers to safely share a cacheline without risk of data -corruption, suppressing DMA debug warnings about overlapping mappings. -All mappings sharing a cacheline should have this attribute. +DMA_ATTR_DEBUGGING_IGNORE_CACHELINES +------------------------------------ + +This attribute indicates that CPU cache lines may overlap for buffers mapped +with DMA_FROM_DEVICE or DMA_BIDIRECTIONAL. + +Such overlap may occur when callers map multiple small buffers that reside +within the same cache line. In this case, callers must guarantee that the CPU +will not dirty these cache lines after the mappings are established. When this +condition is met, multiple buffers can safely share a cache line without risking +data corruption. + +All mappings that share a cache line must set this attribute to suppress DMA +debug warnings about overlapping mappings. + +DMA_ATTR_REQUIRE_COHERENT +------------------------- + +DMA mapping requests with the DMA_ATTR_REQUIRE_COHERENT fail on any +system where SWIOTLB or cache management is required. This should only +be used to support uAPI designs that require continuous HW DMA +coherence with userspace processes, for example RDMA and DRM. At a +minimum the memory being mapped must be userspace memory from +pin_user_pages() or similar. + +Drivers should consider using dma_mmap_pages() instead of this +interface when building their uAPIs, when possible. + +It must never be used in an in-kernel driver that only works with +kernel memory. diff --git a/Documentation/dev-tools/kunit/run_wrapper.rst b/Documentation/dev-tools/kunit/run_wrapper.rst index 3c0b585dcfffbd..770bb09a475ae7 100644 --- a/Documentation/dev-tools/kunit/run_wrapper.rst +++ b/Documentation/dev-tools/kunit/run_wrapper.rst @@ -336,6 +336,8 @@ command line arguments: - ``--list_tests_attr``: If set, lists all tests that will be run and all of their attributes. +- ``--list_suites``: If set, lists all suites that will be run. + Command-line completion ============================== diff --git a/Documentation/devicetree/bindings/mtd/st,spear600-smi.yaml b/Documentation/devicetree/bindings/mtd/st,spear600-smi.yaml index 8fe27aae752790..e7385d906591ba 100644 --- a/Documentation/devicetree/bindings/mtd/st,spear600-smi.yaml +++ b/Documentation/devicetree/bindings/mtd/st,spear600-smi.yaml @@ -19,9 +19,6 @@ description: Flash sub nodes describe the memory range and optional per-flash properties. -allOf: - - $ref: mtd.yaml# - properties: compatible: const: st,spear600-smi @@ -42,14 +39,29 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32 description: Functional clock rate of the SMI controller in Hz. - st,smi-fast-mode: - type: boolean - description: Indicates that the attached flash supports fast read mode. +patternProperties: + "^flash@.*$": + $ref: /schemas/mtd/mtd.yaml# + + properties: + reg: + maxItems: 1 + + st,smi-fast-mode: + type: boolean + description: Indicates that the attached flash supports fast read mode. + + unevaluatedProperties: false + + required: + - reg required: - compatible - reg - clock-rate + - "#address-cells" + - "#size-cells" unevaluatedProperties: false @@ -64,7 +76,7 @@ examples: interrupts = <12>; clock-rate = <50000000>; /* 50 MHz */ - flash@f8000000 { + flash@fc000000 { reg = <0xfc000000 0x1000>; st,smi-fast-mode; }; diff --git a/Documentation/devicetree/bindings/regulator/regulator.yaml b/Documentation/devicetree/bindings/regulator/regulator.yaml index 042e56396399f9..019aeb664caeca 100644 --- a/Documentation/devicetree/bindings/regulator/regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/regulator.yaml @@ -168,7 +168,7 @@ properties: offset from voltage set to regulator. regulator-uv-protection-microvolt: - description: Set over under voltage protection limit. This is a limit where + description: Set under voltage protection limit. This is a limit where hardware performs emergency shutdown. Zero can be passed to disable protection and value '1' indicates that protection should be enabled but limit setting can be omitted. Limit is given as microvolt offset from @@ -182,7 +182,7 @@ properties: is given as microvolt offset from voltage set to regulator. regulator-uv-warn-microvolt: - description: Set over under voltage warning limit. This is a limit where + description: Set under voltage warning limit. This is a limit where hardware is assumed still to be functional but approaching limit where it gets damaged. Recovery actions should be initiated. Zero can be passed to disable detection and value '1' indicates that detection should diff --git a/Documentation/driver-api/driver-model/binding.rst b/Documentation/driver-api/driver-model/binding.rst index d1d311a4011fee..fa0888c2b3b92d 100644 --- a/Documentation/driver-api/driver-model/binding.rst +++ b/Documentation/driver-api/driver-model/binding.rst @@ -99,3 +99,51 @@ of the driver is decremented. All symlinks between the two are removed. When a driver is removed, the list of devices that it supports is iterated over, and the driver's remove callback is called for each one. The device is removed from that list and the symlinks removed. + + +Driver Override +~~~~~~~~~~~~~~~ + +Userspace may override the standard matching by writing a driver name to +a device's ``driver_override`` sysfs attribute. When set, only a driver +whose name matches the override will be considered during binding. This +bypasses all bus-specific matching (OF, ACPI, ID tables, etc.). + +The override may be cleared by writing an empty string, which returns +the device to standard matching rules. Writing to ``driver_override`` +does not automatically unbind the device from its current driver or +make any attempt to load the specified driver. + +Buses opt into this mechanism by setting the ``driver_override`` flag in +their ``struct bus_type``:: + + const struct bus_type example_bus_type = { + ... + .driver_override = true, + }; + +When the flag is set, the driver core automatically creates the +``driver_override`` sysfs attribute for every device on that bus. + +The bus's ``match()`` callback should check the override before performing +its own matching, using ``device_match_driver_override()``:: + + static int example_match(struct device *dev, const struct device_driver *drv) + { + int ret; + + ret = device_match_driver_override(dev, drv); + if (ret >= 0) + return ret; + + /* Fall through to bus-specific matching... */ + } + +``device_match_driver_override()`` returns > 0 if the override matches +the given driver, 0 if the override is set but does not match, or < 0 if +no override is set at all. + +Additional helpers are available: + +- ``device_set_driver_override()`` - set or clear the override from kernel code. +- ``device_has_driver_override()`` - check whether an override is set. diff --git a/Documentation/netlink/specs/net_shaper.yaml b/Documentation/netlink/specs/net_shaper.yaml index 0b1b54be48f92c..3f2ad772b64b15 100644 --- a/Documentation/netlink/specs/net_shaper.yaml +++ b/Documentation/netlink/specs/net_shaper.yaml @@ -247,8 +247,8 @@ operations: flags: [admin-perm] do: - pre: net-shaper-nl-pre-doit - post: net-shaper-nl-post-doit + pre: net-shaper-nl-pre-doit-write + post: net-shaper-nl-post-doit-write request: attributes: - ifindex @@ -278,8 +278,8 @@ operations: flags: [admin-perm] do: - pre: net-shaper-nl-pre-doit - post: net-shaper-nl-post-doit + pre: net-shaper-nl-pre-doit-write + post: net-shaper-nl-post-doit-write request: attributes: *ns-binding @@ -309,8 +309,8 @@ operations: flags: [admin-perm] do: - pre: net-shaper-nl-pre-doit - post: net-shaper-nl-post-doit + pre: net-shaper-nl-pre-doit-write + post: net-shaper-nl-post-doit-write request: attributes: - ifindex diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst index 13134bccdd39d7..7f86d7a37dc232 100644 --- a/Documentation/userspace-api/landlock.rst +++ b/Documentation/userspace-api/landlock.rst @@ -8,7 +8,7 @@ Landlock: unprivileged access control ===================================== :Author: Mickaël Salaün -:Date: January 2026 +:Date: March 2026 The goal of Landlock is to enable restriction of ambient rights (e.g. global filesystem or network access) for a set of processes. Because Landlock @@ -197,12 +197,27 @@ similar backwards compatibility check is needed for the restrict flags .. code-block:: c - __u32 restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; - if (abi < 7) { - /* Clear logging flags unsupported before ABI 7. */ + __u32 restrict_flags = + LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON | + LANDLOCK_RESTRICT_SELF_TSYNC; + switch (abi) { + case 1 ... 6: + /* Removes logging flags for ABI < 7 */ restrict_flags &= ~(LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF | LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON | LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF); + __attribute__((fallthrough)); + case 7: + /* + * Removes multithreaded enforcement flag for ABI < 8 + * + * WARNING: Without this flag, calling landlock_restrict_self(2) is + * only equivalent if the calling process is single-threaded. Below + * ABI v8 (and as of ABI v8, when not using this flag), a Landlock + * policy would only be enforced for the calling thread and its + * children (and not for all threads, including parents and siblings). + */ + restrict_flags &= ~LANDLOCK_RESTRICT_SELF_TSYNC; } The next step is to restrict the current thread from gaining more privileges diff --git a/MAINTAINERS b/MAINTAINERS index 96ea84948d76af..0481aca2286ca1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3986,7 +3986,7 @@ F: drivers/hwmon/asus-ec-sensors.c ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS M: Corentin Chary M: Luke D. Jones -M: Denis Benato +M: Denis Benato L: platform-driver-x86@vger.kernel.org S: Maintained W: https://asus-linux.org/ @@ -4022,7 +4022,7 @@ F: drivers/hwmon/asus_wmi_sensors.c ASYMMETRIC KEYS M: David Howells M: Lukas Wunner -M: Ignat Korchagin +M: Ignat Korchagin L: keyrings@vger.kernel.org L: linux-crypto@vger.kernel.org S: Maintained @@ -4035,7 +4035,7 @@ F: include/linux/verification.h ASYMMETRIC KEYS - ECDSA M: Lukas Wunner -M: Ignat Korchagin +M: Ignat Korchagin R: Stefan Berger L: linux-crypto@vger.kernel.org S: Maintained @@ -4045,14 +4045,14 @@ F: include/crypto/ecc* ASYMMETRIC KEYS - GOST M: Lukas Wunner -M: Ignat Korchagin +M: Ignat Korchagin L: linux-crypto@vger.kernel.org S: Odd fixes F: crypto/ecrdsa* ASYMMETRIC KEYS - RSA M: Lukas Wunner -M: Ignat Korchagin +M: Ignat Korchagin L: linux-crypto@vger.kernel.org S: Maintained F: crypto/rsa* @@ -7998,7 +7998,9 @@ F: Documentation/devicetree/bindings/display/himax,hx8357.yaml F: drivers/gpu/drm/tiny/hx8357d.c DRM DRIVER FOR HYPERV SYNTHETIC VIDEO DEVICE -M: Deepak Rawat +M: Dexuan Cui +M: Long Li +M: Saurabh Sengar L: linux-hyperv@vger.kernel.org L: dri-devel@lists.freedesktop.org S: Maintained @@ -24900,9 +24902,9 @@ F: drivers/clk/spear/ F: drivers/pinctrl/spear/ SPI NOR SUBSYSTEM -M: Tudor Ambarus M: Pratyush Yadav M: Michael Walle +R: Takahiro Kuwano L: linux-mtd@lists.infradead.org S: Maintained W: http://www.linux-mtd.infradead.org/ diff --git a/Makefile b/Makefile index c9b7bee102e811..e1279c4d5b2446 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 7 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -1654,7 +1654,7 @@ CLEAN_FILES += vmlinux.symvers modules-only.symvers \ modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \ compile_commands.json rust/test \ rust-project.json .vmlinux.objs .vmlinux.export.c \ - .builtin-dtbs-list .builtin-dtb.S + .builtin-dtbs-list .builtin-dtbs.S # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index f75d75cf91c88c..70d05f74049e84 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -279,7 +279,6 @@ CONFIG_TI_CPSW_SWITCHDEV=y CONFIG_TI_CPTS=y CONFIG_TI_KEYSTONE_NETCP=y CONFIG_TI_KEYSTONE_NETCP_ETHSS=y -CONFIG_TI_PRUSS=m CONFIG_TI_PRUETH=m CONFIG_XILINX_EMACLITE=y CONFIG_SFP=m diff --git a/arch/arm64/boot/dts/renesas/r8a78000.dtsi b/arch/arm64/boot/dts/renesas/r8a78000.dtsi index 4c97298fa76348..3e1c98903cea08 100644 --- a/arch/arm64/boot/dts/renesas/r8a78000.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a78000.dtsi @@ -698,7 +698,7 @@ compatible = "renesas,scif-r8a78000", "renesas,rcar-gen5-scif", "renesas,scif"; reg = <0 0xc0700000 0 0x40>; - interrupts = ; + interrupts = ; clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -708,7 +708,7 @@ compatible = "renesas,scif-r8a78000", "renesas,rcar-gen5-scif", "renesas,scif"; reg = <0 0xc0704000 0 0x40>; - interrupts = ; + interrupts = ; clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -718,7 +718,7 @@ compatible = "renesas,scif-r8a78000", "renesas,rcar-gen5-scif", "renesas,scif"; reg = <0 0xc0708000 0 0x40>; - interrupts = ; + interrupts = ; clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -728,7 +728,7 @@ compatible = "renesas,scif-r8a78000", "renesas,rcar-gen5-scif", "renesas,scif"; reg = <0 0xc070c000 0 0x40>; - interrupts = ; + interrupts = ; clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -738,7 +738,7 @@ compatible = "renesas,hscif-r8a78000", "renesas,rcar-gen5-hscif", "renesas,hscif"; reg = <0 0xc0710000 0 0x60>; - interrupts = ; + interrupts = ; clocks = <&dummy_clk_sgasyncd4>, <&dummy_clk_sgasyncd4>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -748,7 +748,7 @@ compatible = "renesas,hscif-r8a78000", "renesas,rcar-gen5-hscif", "renesas,hscif"; reg = <0 0xc0714000 0 0x60>; - interrupts = ; + interrupts = ; clocks = <&dummy_clk_sgasyncd4>, <&dummy_clk_sgasyncd4>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -758,7 +758,7 @@ compatible = "renesas,hscif-r8a78000", "renesas,rcar-gen5-hscif", "renesas,hscif"; reg = <0 0xc0718000 0 0x60>; - interrupts = ; + interrupts = ; clocks = <&dummy_clk_sgasyncd4>, <&dummy_clk_sgasyncd4>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -768,7 +768,7 @@ compatible = "renesas,hscif-r8a78000", "renesas,rcar-gen5-hscif", "renesas,hscif"; reg = <0 0xc071c000 0 0x60>; - interrupts = ; + interrupts = ; clocks = <&dummy_clk_sgasyncd4>, <&dummy_clk_sgasyncd4>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; diff --git a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi index 80cba9fcfe7bfa..504c2838662252 100644 --- a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi @@ -581,16 +581,6 @@ status = "disabled"; }; - wdt0: watchdog@11c00400 { - compatible = "renesas,r9a09g057-wdt"; - reg = <0 0x11c00400 0 0x400>; - clocks = <&cpg CPG_MOD 0x4b>, <&cpg CPG_MOD 0x4c>; - clock-names = "pclk", "oscclk"; - resets = <&cpg 0x75>; - power-domains = <&cpg>; - status = "disabled"; - }; - wdt1: watchdog@14400000 { compatible = "renesas,r9a09g057-wdt"; reg = <0 0x14400000 0 0x400>; @@ -601,26 +591,6 @@ status = "disabled"; }; - wdt2: watchdog@13000000 { - compatible = "renesas,r9a09g057-wdt"; - reg = <0 0x13000000 0 0x400>; - clocks = <&cpg CPG_MOD 0x4f>, <&cpg CPG_MOD 0x50>; - clock-names = "pclk", "oscclk"; - resets = <&cpg 0x77>; - power-domains = <&cpg>; - status = "disabled"; - }; - - wdt3: watchdog@13000400 { - compatible = "renesas,r9a09g057-wdt"; - reg = <0 0x13000400 0 0x400>; - clocks = <&cpg CPG_MOD 0x51>, <&cpg CPG_MOD 0x52>; - clock-names = "pclk", "oscclk"; - resets = <&cpg 0x78>; - power-domains = <&cpg>; - status = "disabled"; - }; - rtc: rtc@11c00800 { compatible = "renesas,r9a09g057-rtca3", "renesas,rz-rtca3"; reg = <0 0x11c00800 0 0x400>; diff --git a/arch/arm64/boot/dts/renesas/r9a09g077.dtsi b/arch/arm64/boot/dts/renesas/r9a09g077.dtsi index 14d7fb6f8952e1..9d0b4d8d3d5bb1 100644 --- a/arch/arm64/boot/dts/renesas/r9a09g077.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a09g077.dtsi @@ -974,8 +974,8 @@ cpg: clock-controller@80280000 { compatible = "renesas,r9a09g077-cpg-mssr"; - reg = <0 0x80280000 0 0x1000>, - <0 0x81280000 0 0x9000>; + reg = <0 0x80280000 0 0x10000>, + <0 0x81280000 0 0x10000>; clocks = <&extal_clk>; clock-names = "extal"; #clock-cells = <2>; diff --git a/arch/arm64/boot/dts/renesas/r9a09g087.dtsi b/arch/arm64/boot/dts/renesas/r9a09g087.dtsi index 4a133956133218..d407c48f996695 100644 --- a/arch/arm64/boot/dts/renesas/r9a09g087.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a09g087.dtsi @@ -977,8 +977,8 @@ cpg: clock-controller@80280000 { compatible = "renesas,r9a09g087-cpg-mssr"; - reg = <0 0x80280000 0 0x1000>, - <0 0x81280000 0 0x9000>; + reg = <0 0x80280000 0 0x10000>, + <0 0x81280000 0 0x10000>; clocks = <&extal_clk>; clock-names = "extal"; #clock-cells = <2>; diff --git a/arch/arm64/boot/dts/renesas/rzg3s-smarc-som.dtsi b/arch/arm64/boot/dts/renesas/rzg3s-smarc-som.dtsi index 982f17aafbc507..b45acfe6288a7c 100644 --- a/arch/arm64/boot/dts/renesas/rzg3s-smarc-som.dtsi +++ b/arch/arm64/boot/dts/renesas/rzg3s-smarc-som.dtsi @@ -162,7 +162,7 @@ <100000000>; renesas,settings = [ 80 00 11 19 4c 42 dc 2f 06 7d 20 1a 5f 1e f2 27 - 00 40 00 00 00 00 00 00 06 0c 19 02 3f f0 90 86 + 00 40 00 00 00 00 00 00 06 0c 19 02 3b f0 90 86 a0 80 30 30 9c ]; }; diff --git a/arch/arm64/boot/dts/renesas/rzt2h-n2h-evk-common.dtsi b/arch/arm64/boot/dts/renesas/rzt2h-n2h-evk-common.dtsi index 510399febf2956..f87c2492f414a4 100644 --- a/arch/arm64/boot/dts/renesas/rzt2h-n2h-evk-common.dtsi +++ b/arch/arm64/boot/dts/renesas/rzt2h-n2h-evk-common.dtsi @@ -53,6 +53,7 @@ regulator-max-microvolt = <3300000>; gpios-states = <0>; states = <3300000 0>, <1800000 1>; + regulator-ramp-delay = <60>; }; #endif diff --git a/arch/arm64/boot/dts/renesas/rzv2-evk-cn15-sd.dtso b/arch/arm64/boot/dts/renesas/rzv2-evk-cn15-sd.dtso index 0af1e0a6c7f482..fc53c1aae3b522 100644 --- a/arch/arm64/boot/dts/renesas/rzv2-evk-cn15-sd.dtso +++ b/arch/arm64/boot/dts/renesas/rzv2-evk-cn15-sd.dtso @@ -25,6 +25,7 @@ regulator-max-microvolt = <3300000>; gpios-states = <0>; states = <3300000 0>, <1800000 1>; + regulator-ramp-delay = <60>; }; }; diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index cb87c8fc66b3b0..00530b29101023 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -76,19 +76,24 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_aes_ctx rk; + struct crypto_aes_ctx *rk; int err; - err = aes_expandkey(&rk, in_key, key_len); + rk = kmalloc(sizeof(*rk), GFP_KERNEL); + if (!rk) + return -ENOMEM; + + err = aes_expandkey(rk, in_key, key_len); if (err) - return err; + goto out; ctx->rounds = 6 + key_len / 4; scoped_ksimd() - aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); - - return 0; + aesbs_convert_key(ctx->rk, rk->key_enc, ctx->rounds); +out: + kfree_sensitive(rk); + return err; } static int __ecb_crypt(struct skcipher_request *req, @@ -133,22 +138,26 @@ static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_aes_ctx rk; + struct crypto_aes_ctx *rk; int err; - err = aes_expandkey(&rk, in_key, key_len); + rk = kmalloc(sizeof(*rk), GFP_KERNEL); + if (!rk) + return -ENOMEM; + + err = aes_expandkey(rk, in_key, key_len); if (err) - return err; + goto out; ctx->key.rounds = 6 + key_len / 4; - memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); + memcpy(ctx->enc, rk->key_enc, sizeof(ctx->enc)); scoped_ksimd() - aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); - memzero_explicit(&rk, sizeof(rk)); - - return 0; + aesbs_convert_key(ctx->key.rk, rk->key_enc, ctx->key.rounds); +out: + kfree_sensitive(rk); + return err; } static int cbc_encrypt(struct skcipher_request *req) diff --git a/arch/arm64/kernel/pi/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c index bbe7d30ed12b32..dac568e4a54f23 100644 --- a/arch/arm64/kernel/pi/patch-scs.c +++ b/arch/arm64/kernel/pi/patch-scs.c @@ -192,6 +192,14 @@ static int scs_handle_fde_frame(const struct eh_frame *frame, size -= 2; break; + case DW_CFA_advance_loc4: + loc += *opcode++ * code_alignment_factor; + loc += (*opcode++ << 8) * code_alignment_factor; + loc += (*opcode++ << 16) * code_alignment_factor; + loc += (*opcode++ << 24) * code_alignment_factor; + size -= 4; + break; + case DW_CFA_def_cfa: case DW_CFA_offset_extended: size = skip_xleb128(&opcode, size); diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c index c64a06f58c0bc0..9e846ce4ef9ca7 100644 --- a/arch/arm64/kernel/rsi.c +++ b/arch/arm64/kernel/rsi.c @@ -12,6 +12,7 @@ #include #include +#include #include static struct realm_config config; @@ -146,7 +147,7 @@ void __init arm64_rsi_init(void) return; if (WARN_ON(rsi_get_realm_config(&config))) return; - prot_ns_shared = BIT(config.ipa_bits - 1); + prot_ns_shared = __phys_to_pte_val(BIT(config.ipa_bits - 1)); if (arm64_ioremap_prot_hook_register(realm_ioremap_hook)) return; diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index c5c5644b1878e7..a024d9a770dc74 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1753,7 +1753,7 @@ int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new) if (!writable) return -EPERM; - ptep = (u64 __user *)hva + offset; + ptep = (void __user *)hva + offset; if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS)) r = __lse_swap_desc(ptep, old, new); else diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 959532422d3a30..b963fd975aacaf 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -247,6 +247,20 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_vcpu_set_be(vcpu); *vcpu_pc(vcpu) = target_pc; + + /* + * We may come from a state where either a PC update was + * pending (SMC call resulting in PC being increpented to + * skip the SMC) or a pending exception. Make sure we get + * rid of all that, as this cannot be valid out of reset. + * + * Note that clearing the exception mask also clears PC + * updates, but that's an implementation detail, and we + * really want to make it explicit. + */ + vcpu_clear_flag(vcpu, PENDING_EXCEPTION); + vcpu_clear_flag(vcpu, EXCEPT_MASK); + vcpu_clear_flag(vcpu, INCREMENT_PC); vcpu_set_reg(vcpu, 0, reset_state.r0); } diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index d211c6572b0a67..92068ff38685eb 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -304,6 +304,9 @@ config AS_HAS_LBT_EXTENSION config AS_HAS_LVZ_EXTENSION def_bool $(as-instr,hvcl 0) +config AS_HAS_SCQ_EXTENSION + def_bool $(as-instr,sc.q \$t0$(comma)\$t1$(comma)\$t2) + config CC_HAS_ANNOTATE_TABLEJUMP def_bool $(cc-option,-mannotate-tablejump) diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h index 58cabab6d90d1a..909f9274fe71aa 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -238,6 +238,8 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int arch_cmpxchg((ptr), (o), (n)); \ }) +#ifdef CONFIG_AS_HAS_SCQ_EXTENSION + union __u128_halves { u128 full; struct { @@ -290,6 +292,9 @@ union __u128_halves { BUILD_BUG_ON(sizeof(*(ptr)) != 16); \ __arch_cmpxchg128(ptr, o, n, ""); \ }) + +#endif /* CONFIG_AS_HAS_SCQ_EXTENSION */ + #else #include #define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n)) diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index 4e259d490e4567..438269313e78c4 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -253,8 +253,13 @@ do { \ \ __get_kernel_common(*((type *)(dst)), sizeof(type), \ (__force type *)(src)); \ - if (unlikely(__gu_err)) \ + if (unlikely(__gu_err)) { \ + pr_info("%s: memory access failed, ecode 0x%x\n", \ + __func__, read_csr_excode()); \ + pr_info("%s: the caller is %pS\n", \ + __func__, __builtin_return_address(0)); \ goto err_label; \ + } \ } while (0) #define __put_kernel_nofault(dst, src, type, err_label) \ @@ -264,8 +269,13 @@ do { \ \ __pu_val = *(__force type *)(src); \ __put_kernel_common(((type *)(dst)), sizeof(type)); \ - if (unlikely(__pu_err)) \ + if (unlikely(__pu_err)) { \ + pr_info("%s: memory access failed, ecode 0x%x\n", \ + __func__, read_csr_excode()); \ + pr_info("%s: the caller is %pS\n", \ + __func__, __builtin_return_address(0)); \ goto err_label; \ + } \ } while (0) extern unsigned long __copy_user(void *to, const void *from, __kernel_size_t n); diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index bf037f0c6b26c9..1a728082944cb2 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -246,32 +246,51 @@ static int text_copy_cb(void *data) if (smp_processor_id() == copy->cpu) { ret = copy_to_kernel_nofault(copy->dst, copy->src, copy->len); - if (ret) + if (ret) { pr_err("%s: operation failed\n", __func__); + return ret; + } } flush_icache_range((unsigned long)copy->dst, (unsigned long)copy->dst + copy->len); - return ret; + return 0; } int larch_insn_text_copy(void *dst, void *src, size_t len) { int ret = 0; + int err = 0; size_t start, end; struct insn_copy copy = { .dst = dst, .src = src, .len = len, - .cpu = smp_processor_id(), + .cpu = raw_smp_processor_id(), }; + /* + * Ensure copy.cpu won't be hot removed before stop_machine. + * If it is removed nobody will really update the text. + */ + lockdep_assert_cpus_held(); + start = round_down((size_t)dst, PAGE_SIZE); end = round_up((size_t)dst + len, PAGE_SIZE); - set_memory_rw(start, (end - start) / PAGE_SIZE); - ret = stop_machine(text_copy_cb, ©, cpu_online_mask); - set_memory_rox(start, (end - start) / PAGE_SIZE); + err = set_memory_rw(start, (end - start) / PAGE_SIZE); + if (err) { + pr_info("%s: set_memory_rw() failed\n", __func__); + return err; + } + + ret = stop_machine_cpuslocked(text_copy_cb, ©, cpu_online_mask); + + err = set_memory_rox(start, (end - start) / PAGE_SIZE); + if (err) { + pr_info("%s: set_memory_rox() failed\n", __func__); + return err; + } return ret; } diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 17b3d5b36cfc03..8cc5ee1c53efbe 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -49,8 +49,8 @@ static void kvm_vm_init_features(struct kvm *kvm) kvm->arch.kvm_features |= BIT(KVM_LOONGARCH_VM_FEAT_PMU); /* Enable all PV features by default */ - kvm->arch.pv_features = BIT(KVM_FEATURE_IPI); - kvm->arch.kvm_features = BIT(KVM_LOONGARCH_VM_FEAT_PV_IPI); + kvm->arch.pv_features |= BIT(KVM_FEATURE_IPI); + kvm->arch.kvm_features |= BIT(KVM_LOONGARCH_VM_FEAT_PV_IPI); if (kvm_pvtime_supported()) { kvm->arch.pv_features |= BIT(KVM_FEATURE_PREEMPT); kvm->arch.pv_features |= BIT(KVM_FEATURE_STEAL_TIME); diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 3bd89f55960d98..9cb796e1637941 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1379,9 +1379,11 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len) { int ret; + cpus_read_lock(); mutex_lock(&text_mutex); ret = larch_insn_text_copy(dst, src, len); mutex_unlock(&text_mutex); + cpus_read_unlock(); return ret ? ERR_PTR(-EINVAL) : dst; } @@ -1429,10 +1431,12 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, if (ret) return ret; + cpus_read_lock(); mutex_lock(&text_mutex); if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES)) ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES); mutex_unlock(&text_mutex); + cpus_read_unlock(); return ret; } @@ -1450,10 +1454,12 @@ int bpf_arch_text_invalidate(void *dst, size_t len) for (i = 0; i < (len / sizeof(u32)); i++) inst[i] = INSN_BREAK; + cpus_read_lock(); mutex_lock(&text_mutex); if (larch_insn_text_copy(dst, inst, len)) ret = -EINVAL; mutex_unlock(&text_mutex); + cpus_read_unlock(); kvfree(inst); @@ -1568,6 +1574,11 @@ void arch_free_bpf_trampoline(void *image, unsigned int size) bpf_prog_pack_free(image, size); } +int arch_protect_bpf_trampoline(void *image, unsigned int size) +{ + return 0; +} + /* * Sign-extend the register if necessary */ diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 4c5240d3a3c7cf..b189265785dc79 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -953,7 +953,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, #else "1: cmpb,<<,n %0,%2,1b\n" #endif - " fic,m %3(%4,%0)\n" + " fdc,m %3(%4,%0)\n" "2: sync\n" ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1") : "+r" (start), "+r" (error) @@ -968,7 +968,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, #else "1: cmpb,<<,n %0,%2,1b\n" #endif - " fdc,m %3(%4,%0)\n" + " fic,m %3(%4,%0)\n" "2: sync\n" ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1") : "+r" (start), "+r" (error) diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index 5c2963e269b836..a0ffedc2d34406 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -428,6 +428,7 @@ clocks = <&clkcfg CLK_CAN0>, <&clkcfg CLK_MSSPLL3>; interrupt-parent = <&plic>; interrupts = <56>; + resets = <&mss_top_sysreg CLK_CAN0>; status = "disabled"; }; @@ -437,6 +438,7 @@ clocks = <&clkcfg CLK_CAN1>, <&clkcfg CLK_MSSPLL3>; interrupt-parent = <&plic>; interrupts = <57>; + resets = <&mss_top_sysreg CLK_CAN1>; status = "disabled"; }; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 64a50f0862aabb..3039c88daa6333 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -710,6 +710,9 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm); void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, unsigned long *aqm, unsigned long *adm); +#define SIE64_RETURN_NORMAL 0 +#define SIE64_RETURN_MCCK 1 + int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa, unsigned long gasce); diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index c9ae680a28af91..ac3606c3babe49 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -62,7 +62,7 @@ struct stack_frame { struct { unsigned long sie_control_block; unsigned long sie_savearea; - unsigned long sie_reason; + unsigned long sie_return; unsigned long sie_flags; unsigned long sie_control_block_phys; unsigned long sie_guest_asce; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index e1a5b5b54e4f8e..fbd26f3e9f96bb 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -63,7 +63,7 @@ int main(void) OFFSET(__SF_EMPTY, stack_frame, empty[0]); OFFSET(__SF_SIE_CONTROL, stack_frame, sie_control_block); OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea); - OFFSET(__SF_SIE_REASON, stack_frame, sie_reason); + OFFSET(__SF_SIE_RETURN, stack_frame, sie_return); OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags); OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys); OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4873fe9d891ba2..5817cb47b2d0be 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -200,7 +200,7 @@ SYM_FUNC_START(__sie64a) stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce - xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 + xc __SF_SIE_RETURN(8,%r15),__SF_SIE_RETURN(%r15) # return code = 0 mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags lmg %r0,%r13,0(%r4) # load guest gprs 0-13 mvi __TI_sie(%r14),1 @@ -237,7 +237,7 @@ SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL) xgr %r4,%r4 xgr %r5,%r5 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lg %r2,__SF_SIE_REASON(%r15) # return exit reason code + lg %r2,__SF_SIE_RETURN(%r15) # return sie return code BR_EX %r14 SYM_FUNC_END(__sie64a) EXPORT_SYMBOL(__sie64a) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index a55abbf65333a1..94fbfad49f6205 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -487,8 +487,8 @@ void notrace s390_do_machine_check(struct pt_regs *regs) mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK); if (test_cpu_flag(CIF_MCCK_GUEST) && (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) { - /* Set exit reason code for host's later handling */ - *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR; + /* Set sie return code for host's later handling */ + ((struct stack_frame *)regs->gprs[15])->sie_return = SIE64_RETURN_MCCK; } clear_cpu_flag(CIF_MCCK_GUEST); diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 4630b2a067ea61..a9da9390867d3e 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1434,7 +1434,8 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union if (rc) return rc; - pgste = pgste_get_lock(ptep_h); + if (!pgste_get_trylock(ptep_h, &pgste)) + return -EAGAIN; newpte = _pte(f->pfn, f->writable, !p, 0); newpte.s.d |= ptep->s.d; newpte.s.sd |= ptep->s.sd; @@ -1444,7 +1445,8 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union pgste_set_unlock(ptep_h, pgste); newpte = _pte(f->pfn, 0, !p, 0); - pgste = pgste_get_lock(ptep); + if (!pgste_get_trylock(ptep, &pgste)) + return -EAGAIN; pgste = __dat_ptep_xchg(ptep, pgste, newpte, gpa_to_gfn(raddr), sg->asce, uses_skeys(sg)); pgste_set_unlock(ptep, pgste); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 18932a65ca6831..7cb8ce833b6254 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2724,6 +2724,9 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap) bit = bit_nr + (addr % PAGE_SIZE) * 8; + /* kvm_set_routing_entry() should never allow this to happen */ + WARN_ON_ONCE(bit > (PAGE_SIZE * BITS_PER_BYTE - 1)); + return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit; } @@ -2824,6 +2827,12 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, int rc; mci.val = mcck_info->mcic; + + /* log machine checks being reinjected on all debugs */ + VCPU_EVENT(vcpu, 2, "guest machine check %lx", mci.val); + KVM_EVENT(2, "guest machine check %lx", mci.val); + pr_info("guest machine check pid %d: %lx", current->pid, mci.val); + if (mci.sr) cr14 |= CR14_RECOVERY_SUBMASK; if (mci.dg) @@ -2852,6 +2861,7 @@ int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { + const struct kvm_irq_routing_s390_adapter *adapter; u64 uaddr_s, uaddr_i; int idx; @@ -2862,6 +2872,14 @@ int kvm_set_routing_entry(struct kvm *kvm, return -EINVAL; e->set = set_adapter_int; + adapter = &ue->u.adapter; + if (adapter->summary_addr + (adapter->summary_offset / 8) >= + (adapter->summary_addr & PAGE_MASK) + PAGE_SIZE) + return -EINVAL; + if (adapter->ind_addr + (adapter->ind_offset / 8) >= + (adapter->ind_addr & PAGE_MASK) + PAGE_SIZE) + return -EINVAL; + idx = srcu_read_lock(&kvm->srcu); uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr); uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3eb60aa932ecc6..b2c01fa7b852c8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4617,7 +4617,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) return 0; } -static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) +static int vcpu_post_run(struct kvm_vcpu *vcpu, int sie_return) { struct mcck_volatile_info *mcck_info; struct sie_page *sie_page; @@ -4633,14 +4633,14 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; - if (exit_reason == -EINTR) { - VCPU_EVENT(vcpu, 3, "%s", "machine check"); + if (sie_return == SIE64_RETURN_MCCK) { sie_page = container_of(vcpu->arch.sie_block, struct sie_page, sie_block); mcck_info = &sie_page->mcck_info; kvm_s390_reinject_machine_check(vcpu, mcck_info); return 0; } + WARN_ON_ONCE(sie_return != SIE64_RETURN_NORMAL); if (vcpu->arch.sie_block->icptcode > 0) { rc = kvm_handle_sie_intercept(vcpu); @@ -4679,7 +4679,7 @@ int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb, #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) static int __vcpu_run(struct kvm_vcpu *vcpu) { - int rc, exit_reason; + int rc, sie_return; struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; /* @@ -4719,9 +4719,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) guest_timing_enter_irqoff(); __disable_cpu_timer_accounting(vcpu); - exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block, - vcpu->run->s.regs.gprs, - vcpu->arch.gmap->asce.val); + sie_return = kvm_s390_enter_exit_sie(vcpu->arch.sie_block, + vcpu->run->s.regs.gprs, + vcpu->arch.gmap->asce.val); __enable_cpu_timer_accounting(vcpu); guest_timing_exit_irqoff(); @@ -4744,7 +4744,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } kvm_vcpu_srcu_read_lock(vcpu); - rc = vcpu_post_run(vcpu, exit_reason); + rc = vcpu_post_run(vcpu, sie_return); if (rc || guestdbg_exit_pending(vcpu)) { kvm_vcpu_srcu_read_unlock(vcpu); break; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index d249b10044eb75..0330829b4046cd 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1122,6 +1122,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struc { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + unsigned long sie_return = SIE64_RETURN_NORMAL; int guest_bp_isolation; int rc = 0; @@ -1163,7 +1164,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struc goto xfer_to_guest_mode_check; } guest_timing_enter_irqoff(); - rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, sg->asce.val); + sie_return = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, sg->asce.val); guest_timing_exit_irqoff(); local_irq_enable(); } @@ -1178,12 +1179,13 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struc kvm_vcpu_srcu_read_lock(vcpu); - if (rc == -EINTR) { - VCPU_EVENT(vcpu, 3, "%s", "machine check"); + if (sie_return == SIE64_RETURN_MCCK) { kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info); return 0; } + WARN_ON_ONCE(sie_return != SIE64_RETURN_NORMAL); + if (rc > 0) rc = 0; /* we could still have an icpt */ else if (current->thread.gmap_int_code) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index a52aa7a99b6bfd..191cc53caead39 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -441,10 +441,17 @@ void do_secure_storage_access(struct pt_regs *regs) folio = phys_to_folio(addr); if (unlikely(!folio_try_get(folio))) return; - rc = arch_make_folio_accessible(folio); + rc = uv_convert_from_secure(folio_to_phys(folio)); + if (!rc) + clear_bit(PG_arch_1, &folio->flags.f); folio_put(folio); + /* + * There are some valid fixup types for kernel + * accesses to donated secure memory. zeropad is one + * of them. + */ if (rc) - BUG(); + return handle_fault_error_nolock(regs, 0); } else { if (faulthandler_disabled()) return handle_fault_error_nolock(regs, 0); diff --git a/arch/sh/drivers/platform_early.c b/arch/sh/drivers/platform_early.c index 143747c45206fe..48ddbc547bd9ac 100644 --- a/arch/sh/drivers/platform_early.c +++ b/arch/sh/drivers/platform_early.c @@ -26,10 +26,6 @@ static int platform_match(struct device *dev, struct device_driver *drv) struct platform_device *pdev = to_platform_device(dev); struct platform_driver *pdrv = to_platform_driver(drv); - /* When driver_override is set, only bind to the matching driver */ - if (pdev->driver_override) - return !strcmp(pdev->driver_override, drv->name); - /* Then try to match against the id table */ if (pdrv->id_table) return platform_match_id(pdrv->id_table, pdev) != NULL; diff --git a/arch/x86/entry/vdso/common/vclock_gettime.c b/arch/x86/entry/vdso/common/vclock_gettime.c index 027b7e88d753a7..57066f346b3faf 100644 --- a/arch/x86/entry/vdso/common/vclock_gettime.c +++ b/arch/x86/entry/vdso/common/vclock_gettime.c @@ -13,7 +13,7 @@ #include #include -#include "../../../../lib/vdso/gettimeofday.c" +#include "lib/vdso/gettimeofday.c" int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 03ce1bc7ef2ea7..810ab21ffd9913 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1372,14 +1372,17 @@ static void x86_pmu_enable(struct pmu *pmu) else if (i < n_running) continue; - if (hwc->state & PERF_HES_ARCH) + cpuc->events[hwc->idx] = event; + + if (hwc->state & PERF_HES_ARCH) { + static_call(x86_pmu_set_period)(event); continue; + } /* * if cpuc->enabled = 0, then no wrmsr as * per x86_pmu_enable_event() */ - cpuc->events[hwc->idx] = event; x86_pmu_start(event, PERF_EF_RELOAD); } cpuc->n_added = 0; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index cf3a4fe06ff26c..36c68210d4d2fe 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4628,6 +4628,19 @@ static inline void intel_pmu_set_acr_caused_constr(struct perf_event *event, event->hw.dyn_constraint &= hybrid(event->pmu, acr_cause_mask64); } +static inline int intel_set_branch_counter_constr(struct perf_event *event, + int *num) +{ + if (branch_sample_call_stack(event)) + return -EINVAL; + if (branch_sample_counters(event)) { + (*num)++; + event->hw.dyn_constraint &= x86_pmu.lbr_counters; + } + + return 0; +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -4698,21 +4711,19 @@ static int intel_pmu_hw_config(struct perf_event *event) * group, which requires the extra space to store the counters. */ leader = event->group_leader; - if (branch_sample_call_stack(leader)) + if (intel_set_branch_counter_constr(leader, &num)) return -EINVAL; - if (branch_sample_counters(leader)) { - num++; - leader->hw.dyn_constraint &= x86_pmu.lbr_counters; - } leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS; for_each_sibling_event(sibling, leader) { - if (branch_sample_call_stack(sibling)) + if (intel_set_branch_counter_constr(sibling, &num)) + return -EINVAL; + } + + /* event isn't installed as a sibling yet. */ + if (event != leader) { + if (intel_set_branch_counter_constr(event, &num)) return -EINVAL; - if (branch_sample_counters(sibling)) { - num++; - sibling->hw.dyn_constraint &= x86_pmu.lbr_counters; - } } if (num > fls(x86_pmu.lbr_counters)) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 5027afc97b6559..7f0d515c07c54f 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -345,12 +345,12 @@ static u64 parse_omr_data_source(u8 dse) if (omr.omr_remote) val |= REM; - val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT); - if (omr.omr_source == 0x2) { - u8 snoop = omr.omr_snoop | omr.omr_promoted; + u8 snoop = omr.omr_snoop | (omr.omr_promoted << 1); - if (snoop == 0x0) + if (omr.omr_hitm) + val |= P(SNOOP, HITM); + else if (snoop == 0x0) val |= P(SNOOP, NA); else if (snoop == 0x1) val |= P(SNOOP, MISS); @@ -359,7 +359,10 @@ static u64 parse_omr_data_source(u8 dse) else if (snoop == 0x3) val |= P(SNOOP, NONE); } else if (omr.omr_source > 0x2 && omr.omr_source < 0x7) { + val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT); val |= omr.omr_snoop ? P(SNOOPX, FWD) : 0; + } else { + val |= P(SNOOP, NONE); } return val; diff --git a/arch/x86/hyperv/hv_crash.c b/arch/x86/hyperv/hv_crash.c index 92da1b4f2e7306..5ffcc23255de8e 100644 --- a/arch/x86/hyperv/hv_crash.c +++ b/arch/x86/hyperv/hv_crash.c @@ -107,14 +107,12 @@ static void __noreturn hv_panic_timeout_reboot(void) cpu_relax(); } -/* This cannot be inlined as it needs stack */ -static noinline __noclone void hv_crash_restore_tss(void) +static void hv_crash_restore_tss(void) { load_TR_desc(); } -/* This cannot be inlined as it needs stack */ -static noinline void hv_crash_clear_kernpt(void) +static void hv_crash_clear_kernpt(void) { pgd_t *pgd; p4d_t *p4d; @@ -125,6 +123,25 @@ static noinline void hv_crash_clear_kernpt(void) native_p4d_clear(p4d); } + +static void __noreturn hv_crash_handle(void) +{ + hv_crash_restore_tss(); + hv_crash_clear_kernpt(); + + /* we are now fully in devirtualized normal kernel mode */ + __crash_kexec(NULL); + + hv_panic_timeout_reboot(); +} + +/* + * __naked functions do not permit function calls, not even to __always_inline + * functions that only contain asm() blocks themselves. So use a macro instead. + */ +#define hv_wrmsr(msr, val) \ + asm volatile("wrmsr" :: "c"(msr), "a"((u32)val), "d"((u32)(val >> 32)) : "memory") + /* * This is the C entry point from the asm glue code after the disable hypercall. * We enter here in IA32-e long mode, ie, full 64bit mode running on kernel @@ -133,51 +150,38 @@ static noinline void hv_crash_clear_kernpt(void) * available. We restore kernel GDT, and rest of the context, and continue * to kexec. */ -static asmlinkage void __noreturn hv_crash_c_entry(void) +static void __naked hv_crash_c_entry(void) { - struct hv_crash_ctxt *ctxt = &hv_crash_ctxt; - /* first thing, restore kernel gdt */ - native_load_gdt(&ctxt->gdtr); + asm volatile("lgdt %0" : : "m" (hv_crash_ctxt.gdtr)); - asm volatile("movw %%ax, %%ss" : : "a"(ctxt->ss)); - asm volatile("movq %0, %%rsp" : : "m"(ctxt->rsp)); + asm volatile("movw %0, %%ss\n\t" + "movq %1, %%rsp" + :: "m"(hv_crash_ctxt.ss), "m"(hv_crash_ctxt.rsp)); - asm volatile("movw %%ax, %%ds" : : "a"(ctxt->ds)); - asm volatile("movw %%ax, %%es" : : "a"(ctxt->es)); - asm volatile("movw %%ax, %%fs" : : "a"(ctxt->fs)); - asm volatile("movw %%ax, %%gs" : : "a"(ctxt->gs)); + asm volatile("movw %0, %%ds" : : "m"(hv_crash_ctxt.ds)); + asm volatile("movw %0, %%es" : : "m"(hv_crash_ctxt.es)); + asm volatile("movw %0, %%fs" : : "m"(hv_crash_ctxt.fs)); + asm volatile("movw %0, %%gs" : : "m"(hv_crash_ctxt.gs)); - native_wrmsrq(MSR_IA32_CR_PAT, ctxt->pat); - asm volatile("movq %0, %%cr0" : : "r"(ctxt->cr0)); + hv_wrmsr(MSR_IA32_CR_PAT, hv_crash_ctxt.pat); + asm volatile("movq %0, %%cr0" : : "r"(hv_crash_ctxt.cr0)); - asm volatile("movq %0, %%cr8" : : "r"(ctxt->cr8)); - asm volatile("movq %0, %%cr4" : : "r"(ctxt->cr4)); - asm volatile("movq %0, %%cr2" : : "r"(ctxt->cr4)); + asm volatile("movq %0, %%cr8" : : "r"(hv_crash_ctxt.cr8)); + asm volatile("movq %0, %%cr4" : : "r"(hv_crash_ctxt.cr4)); + asm volatile("movq %0, %%cr2" : : "r"(hv_crash_ctxt.cr2)); - native_load_idt(&ctxt->idtr); - native_wrmsrq(MSR_GS_BASE, ctxt->gsbase); - native_wrmsrq(MSR_EFER, ctxt->efer); + asm volatile("lidt %0" : : "m" (hv_crash_ctxt.idtr)); + hv_wrmsr(MSR_GS_BASE, hv_crash_ctxt.gsbase); + hv_wrmsr(MSR_EFER, hv_crash_ctxt.efer); /* restore the original kernel CS now via far return */ - asm volatile("movzwq %0, %%rax\n\t" - "pushq %%rax\n\t" - "pushq $1f\n\t" - "lretq\n\t" - "1:nop\n\t" : : "m"(ctxt->cs) : "rax"); - - /* We are in asmlinkage without stack frame, hence make C function - * calls which will buy stack frames. - */ - hv_crash_restore_tss(); - hv_crash_clear_kernpt(); - - /* we are now fully in devirtualized normal kernel mode */ - __crash_kexec(NULL); - - hv_panic_timeout_reboot(); + asm volatile("pushq %q0\n\t" + "pushq %q1\n\t" + "lretq" + :: "r"(hv_crash_ctxt.cs), "r"(hv_crash_handle)); } -/* Tell gcc we are using lretq long jump in the above function intentionally */ +/* Tell objtool we are using lretq long jump in the above function intentionally */ STACK_FRAME_NON_STANDARD(hv_crash_c_entry); static void hv_mark_tss_not_busy(void) @@ -195,20 +199,20 @@ static void hv_hvcrash_ctxt_save(void) { struct hv_crash_ctxt *ctxt = &hv_crash_ctxt; - asm volatile("movq %%rsp,%0" : "=m"(ctxt->rsp)); + ctxt->rsp = current_stack_pointer; ctxt->cr0 = native_read_cr0(); ctxt->cr4 = native_read_cr4(); - asm volatile("movq %%cr2, %0" : "=a"(ctxt->cr2)); - asm volatile("movq %%cr8, %0" : "=a"(ctxt->cr8)); + asm volatile("movq %%cr2, %0" : "=r"(ctxt->cr2)); + asm volatile("movq %%cr8, %0" : "=r"(ctxt->cr8)); - asm volatile("movl %%cs, %%eax" : "=a"(ctxt->cs)); - asm volatile("movl %%ss, %%eax" : "=a"(ctxt->ss)); - asm volatile("movl %%ds, %%eax" : "=a"(ctxt->ds)); - asm volatile("movl %%es, %%eax" : "=a"(ctxt->es)); - asm volatile("movl %%fs, %%eax" : "=a"(ctxt->fs)); - asm volatile("movl %%gs, %%eax" : "=a"(ctxt->gs)); + asm volatile("movw %%cs, %0" : "=m"(ctxt->cs)); + asm volatile("movw %%ss, %0" : "=m"(ctxt->ss)); + asm volatile("movw %%ds, %0" : "=m"(ctxt->ds)); + asm volatile("movw %%es, %0" : "=m"(ctxt->es)); + asm volatile("movw %%fs, %0" : "=m"(ctxt->fs)); + asm volatile("movw %%gs, %0" : "=m"(ctxt->gs)); native_store_gdt(&ctxt->gdtr); store_idt(&ctxt->idtr); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 15209f220e1fda..42568ceec48162 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1708,8 +1708,22 @@ static void __init uv_system_init_hub(void) struct uv_hub_info_s *new_hub; /* Allocate & fill new per hub info list */ - new_hub = (bid == 0) ? &uv_hub_info_node0 - : kzalloc_node(bytes, GFP_KERNEL, uv_blade_to_node(bid)); + if (bid == 0) { + new_hub = &uv_hub_info_node0; + } else { + int nid; + + /* + * Deconfigured sockets are mapped to SOCK_EMPTY. Use + * NUMA_NO_NODE to allocate on a valid node. + */ + nid = uv_blade_to_node(bid); + if (nid == SOCK_EMPTY) + nid = NUMA_NO_NODE; + + new_hub = kzalloc_node(bytes, GFP_KERNEL, nid); + } + if (WARN_ON_ONCE(!new_hub)) { /* do not kfree() bid 0, which is statically allocated */ while (--bid > 0) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index da13c1e37f87a9..a030ee4cecc2fe 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -875,13 +875,18 @@ void amd_clear_bank(struct mce *m) { amd_reset_thr_limit(m->bank); - /* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */ - if (m->status & MCI_STATUS_DEFERRED) - mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0); + if (mce_flags.smca) { + /* + * Clear MCA_DESTAT for all deferred errors even those + * logged in MCA_STATUS. + */ + if (m->status & MCI_STATUS_DEFERRED) + mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0); - /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */ - if (m->kflags & MCE_CHECK_DFR_REGS) - return; + /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */ + if (m->kflags & MCE_CHECK_DFR_REGS) + return; + } mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0); } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 89a2eb8a07221d..9befdc557d9e50 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -496,8 +496,9 @@ static void hv_reserve_irq_vectors(void) test_and_set_bit(HYPERV_DBG_FASTFAIL_VECTOR, system_vectors)) BUG(); - pr_info("Hyper-V: reserve vectors: %d %d %d\n", HYPERV_DBG_ASSERT_VECTOR, - HYPERV_DBG_SERVICE_VECTOR, HYPERV_DBG_FASTFAIL_VECTOR); + pr_info("Hyper-V: reserve vectors: 0x%x 0x%x 0x%x\n", + HYPERV_DBG_ASSERT_VECTOR, HYPERV_DBG_SERVICE_VECTOR, + HYPERV_DBG_FASTFAIL_VECTOR); } static void __init ms_hyperv_init_platform(void) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index b34a48068a8d12..b1652cab631aa3 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -113,6 +113,10 @@ static int acpi_processor_errata_piix4(struct pci_dev *dev) PCI_ANY_ID, PCI_ANY_ID, NULL); if (ide_dev) { errata.piix4.bmisx = pci_resource_start(ide_dev, 4); + if (errata.piix4.bmisx) + dev_dbg(&ide_dev->dev, + "Bus master activity detection (BM-IDE) erratum enabled\n"); + pci_dev_put(ide_dev); } @@ -131,20 +135,17 @@ static int acpi_processor_errata_piix4(struct pci_dev *dev) if (isa_dev) { pci_read_config_byte(isa_dev, 0x76, &value1); pci_read_config_byte(isa_dev, 0x77, &value2); - if ((value1 & 0x80) || (value2 & 0x80)) + if ((value1 & 0x80) || (value2 & 0x80)) { errata.piix4.fdma = 1; + dev_dbg(&isa_dev->dev, + "Type-F DMA livelock erratum (C3 disabled)\n"); + } pci_dev_put(isa_dev); } break; } - if (ide_dev) - dev_dbg(&ide_dev->dev, "Bus master activity detection (BM-IDE) erratum enabled\n"); - - if (isa_dev) - dev_dbg(&isa_dev->dev, "Type-F DMA livelock erratum (C3 disabled)\n"); - return 0; } diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h index 6c9b5bf7d392db..07d5790d09f8c4 100644 --- a/drivers/acpi/acpica/acpredef.h +++ b/drivers/acpi/acpica/acpredef.h @@ -451,7 +451,7 @@ const union acpi_predefined_info acpi_gbl_predefined_methods[] = { {{"_DSM", METHOD_4ARGS(ACPI_TYPE_BUFFER, ACPI_TYPE_INTEGER, ACPI_TYPE_INTEGER, - ACPI_TYPE_ANY | ACPI_TYPE_PACKAGE) | + ACPI_TYPE_PACKAGE | ACPI_TYPE_ANY) | ARG_COUNT_IS_MINIMUM, METHOD_RETURNS(ACPI_RTYPE_ALL)}}, /* Must return a value, but it can be of any type */ diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index f6707325f58210..2ec095e2009e40 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -818,9 +818,6 @@ const struct acpi_device *acpi_companion_match(const struct device *dev) if (list_empty(&adev->pnp.ids)) return NULL; - if (adev->pnp.type.backlight) - return adev; - return acpi_primary_dev_companion(adev, dev); } diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 5f63ed120a2c71..6f0065257a77c3 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1656,6 +1656,8 @@ static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device, bool ca ret = ec_install_handlers(ec, device, call_reg); if (ret) { + ec_remove_handlers(ec); + if (ec == first_ec) first_ec = NULL; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6c4e567b6582f7..374993031895b3 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4188,6 +4188,9 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { { "ST3320[68]13AS", "SD1[5-9]", ATA_QUIRK_NONCQ | ATA_QUIRK_FIRMWARE_WARN }, + /* ADATA devices with LPM issues. */ + { "ADATA SU680", NULL, ATA_QUIRK_NOLPM }, + /* Seagate disks with LPM issues */ { "ST1000DM010-2EP102", NULL, ATA_QUIRK_NOLPM }, { "ST2000DM008-2FR102", NULL, ATA_QUIRK_NOLPM }, diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index ad798e5246b491..3b65df914ebbe7 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3600,7 +3600,7 @@ static unsigned int ata_scsiop_maint_in(struct ata_device *dev, if (cdb[2] != 1 && cdb[2] != 3) { ata_dev_warn(dev, "invalid command format %d\n", cdb[2]); - ata_scsi_set_invalid_field(dev, cmd, 1, 0xff); + ata_scsi_set_invalid_field(dev, cmd, 2, 0xff); return 0; } diff --git a/drivers/base/bus.c b/drivers/base/bus.c index bb61d8adbab12c..8b6722ff8590df 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -504,6 +504,36 @@ int bus_for_each_drv(const struct bus_type *bus, struct device_driver *start, } EXPORT_SYMBOL_GPL(bus_for_each_drv); +static ssize_t driver_override_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + + ret = __device_set_driver_override(dev, buf, count); + if (ret) + return ret; + + return count; +} + +static ssize_t driver_override_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + guard(spinlock)(&dev->driver_override.lock); + return sysfs_emit(buf, "%s\n", dev->driver_override.name); +} +static DEVICE_ATTR_RW(driver_override); + +static struct attribute *driver_override_dev_attrs[] = { + &dev_attr_driver_override.attr, + NULL, +}; + +static const struct attribute_group driver_override_dev_group = { + .attrs = driver_override_dev_attrs, +}; + /** * bus_add_device - add device to bus * @dev: device being added @@ -537,9 +567,15 @@ int bus_add_device(struct device *dev) if (error) goto out_put; + if (dev->bus->driver_override) { + error = device_add_group(dev, &driver_override_dev_group); + if (error) + goto out_groups; + } + error = sysfs_create_link(&sp->devices_kset->kobj, &dev->kobj, dev_name(dev)); if (error) - goto out_groups; + goto out_override; error = sysfs_create_link(&dev->kobj, &sp->subsys.kobj, "subsystem"); if (error) @@ -550,6 +586,9 @@ int bus_add_device(struct device *dev) out_subsys: sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev)); +out_override: + if (dev->bus->driver_override) + device_remove_group(dev, &driver_override_dev_group); out_groups: device_remove_groups(dev, sp->bus->dev_groups); out_put: @@ -607,6 +646,8 @@ void bus_remove_device(struct device *dev) sysfs_remove_link(&dev->kobj, "subsystem"); sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev)); + if (dev->bus->driver_override) + device_remove_group(dev, &driver_override_dev_group); device_remove_groups(dev, dev->bus->dev_groups); if (klist_node_attached(&dev->p->knode_bus)) klist_del(&dev->p->knode_bus); diff --git a/drivers/base/core.c b/drivers/base/core.c index 791f9e444df8d3..09b98f02f559d0 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2556,6 +2556,7 @@ static void device_release(struct kobject *kobj) devres_release_all(dev); kfree(dev->dma_range_map); + kfree(dev->driver_override.name); if (dev->release) dev->release(dev); @@ -3159,6 +3160,7 @@ void device_initialize(struct device *dev) kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); mutex_init(&dev->mutex); + spin_lock_init(&dev->driver_override.lock); lockdep_set_novalidate_class(&dev->mutex); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index bea8da5f8a3a92..37c7e54e0e4c74 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -381,6 +381,66 @@ static void __exit deferred_probe_exit(void) } __exitcall(deferred_probe_exit); +int __device_set_driver_override(struct device *dev, const char *s, size_t len) +{ + const char *new, *old; + char *cp; + + if (!s) + return -EINVAL; + + /* + * The stored value will be used in sysfs show callback (sysfs_emit()), + * which has a length limit of PAGE_SIZE and adds a trailing newline. + * Thus we can store one character less to avoid truncation during sysfs + * show. + */ + if (len >= (PAGE_SIZE - 1)) + return -EINVAL; + + /* + * Compute the real length of the string in case userspace sends us a + * bunch of \0 characters like python likes to do. + */ + len = strlen(s); + + if (!len) { + /* Empty string passed - clear override */ + spin_lock(&dev->driver_override.lock); + old = dev->driver_override.name; + dev->driver_override.name = NULL; + spin_unlock(&dev->driver_override.lock); + kfree(old); + + return 0; + } + + cp = strnchr(s, len, '\n'); + if (cp) + len = cp - s; + + new = kstrndup(s, len, GFP_KERNEL); + if (!new) + return -ENOMEM; + + spin_lock(&dev->driver_override.lock); + old = dev->driver_override.name; + if (cp != s) { + dev->driver_override.name = new; + spin_unlock(&dev->driver_override.lock); + } else { + /* "\n" passed - clear override */ + dev->driver_override.name = NULL; + spin_unlock(&dev->driver_override.lock); + + kfree(new); + } + kfree(old); + + return 0; +} +EXPORT_SYMBOL_GPL(__device_set_driver_override); + /** * device_is_bound() - Check if device is bound to a driver * @dev: device to check diff --git a/drivers/base/platform.c b/drivers/base/platform.c index b45d41b018ca6d..d44591d52e3633 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -603,7 +603,6 @@ static void platform_device_release(struct device *dev) kfree(pa->pdev.dev.platform_data); kfree(pa->pdev.mfd_cell); kfree(pa->pdev.resource); - kfree(pa->pdev.driver_override); kfree(pa); } @@ -1306,38 +1305,9 @@ static ssize_t numa_node_show(struct device *dev, } static DEVICE_ATTR_RO(numa_node); -static ssize_t driver_override_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct platform_device *pdev = to_platform_device(dev); - ssize_t len; - - device_lock(dev); - len = sysfs_emit(buf, "%s\n", pdev->driver_override); - device_unlock(dev); - - return len; -} - -static ssize_t driver_override_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct platform_device *pdev = to_platform_device(dev); - int ret; - - ret = driver_set_override(dev, &pdev->driver_override, buf, count); - if (ret) - return ret; - - return count; -} -static DEVICE_ATTR_RW(driver_override); - static struct attribute *platform_dev_attrs[] = { &dev_attr_modalias.attr, &dev_attr_numa_node.attr, - &dev_attr_driver_override.attr, NULL, }; @@ -1377,10 +1347,12 @@ static int platform_match(struct device *dev, const struct device_driver *drv) { struct platform_device *pdev = to_platform_device(dev); struct platform_driver *pdrv = to_platform_driver(drv); + int ret; /* When driver_override is set, only bind to the matching driver */ - if (pdev->driver_override) - return !strcmp(pdev->driver_override, drv->name); + ret = device_match_driver_override(dev, drv); + if (ret >= 0) + return ret; /* Attempt an OF style match first */ if (of_driver_match_device(dev, drv)) @@ -1516,6 +1488,7 @@ static const struct dev_pm_ops platform_dev_pm_ops = { const struct bus_type platform_bus_type = { .name = "platform", .dev_groups = platform_dev_groups, + .driver_override = true, .match = platform_match, .uevent = platform_uevent, .probe = platform_probe, diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 0ee8ea971aa468..335288e8b5b312 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1895,6 +1895,7 @@ void pm_runtime_reinit(struct device *dev) void pm_runtime_remove(struct device *dev) { __pm_runtime_disable(dev, false); + flush_work(&dev->power.work); pm_runtime_reinit(dev); } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a324ede6206d3a..af679375b19359 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -917,9 +917,8 @@ static void zram_account_writeback_submit(struct zram *zram) static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) { - u32 size, index = req->pps->index; - int err, prio; - bool huge; + u32 index = req->pps->index; + int err; err = blk_status_to_errno(req->bio.bi_status); if (err) { @@ -946,28 +945,13 @@ static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) goto out; } - if (zram->compressed_wb) { - /* - * ZRAM_WB slots get freed, we need to preserve data required - * for read decompression. - */ - size = get_slot_size(zram, index); - prio = get_slot_comp_priority(zram, index); - huge = test_slot_flag(zram, index, ZRAM_HUGE); - } - - slot_free(zram, index); - set_slot_flag(zram, index, ZRAM_WB); + clear_slot_flag(zram, index, ZRAM_IDLE); + if (test_slot_flag(zram, index, ZRAM_HUGE)) + atomic64_dec(&zram->stats.huge_pages); + atomic64_sub(get_slot_size(zram, index), &zram->stats.compr_data_size); + zs_free(zram->mem_pool, get_slot_handle(zram, index)); set_slot_handle(zram, index, req->blk_idx); - - if (zram->compressed_wb) { - if (huge) - set_slot_flag(zram, index, ZRAM_HUGE); - set_slot_size(zram, index, size); - set_slot_comp_priority(zram, index, prio); - } - - atomic64_inc(&zram->stats.pages_stored); + set_slot_flag(zram, index, ZRAM_WB); out: slot_unlock(zram, index); @@ -2010,8 +1994,13 @@ static void slot_free(struct zram *zram, u32 index) set_slot_comp_priority(zram, index, 0); if (test_slot_flag(zram, index, ZRAM_HUGE)) { + /* + * Writeback completion decrements ->huge_pages but keeps + * ZRAM_HUGE flag for deferred decompression path. + */ + if (!test_slot_flag(zram, index, ZRAM_WB)) + atomic64_dec(&zram->stats.huge_pages); clear_slot_flag(zram, index, ZRAM_HUGE); - atomic64_dec(&zram->stats.huge_pages); } if (test_slot_flag(zram, index, ZRAM_WB)) { diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 246b6205c5e0e1..ab146894ba4e4e 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -251,11 +251,13 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code) bt_dev_err(hdev, "Hardware error 0x%2.2x", code); + hci_req_sync_lock(hdev); + skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { bt_dev_err(hdev, "Reset after hardware error failed (%ld)", PTR_ERR(skb)); - return; + goto unlock; } kfree_skb(skb); @@ -263,18 +265,21 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code) if (IS_ERR(skb)) { bt_dev_err(hdev, "Retrieving Intel exception info failed (%ld)", PTR_ERR(skb)); - return; + goto unlock; } if (skb->len != 13) { bt_dev_err(hdev, "Exception info size mismatch"); kfree_skb(skb); - return; + goto unlock; } bt_dev_err(hdev, "Exception info %s", (char *)(skb->data + 1)); kfree_skb(skb); + +unlock: + hci_req_sync_unlock(hdev); } EXPORT_SYMBOL_GPL(btintel_hw_error); diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 74f820e89655e0..3b062692019341 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -787,6 +787,8 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, */ if (soc_type == QCA_WCN3988) rom_ver = ((soc_ver & 0x00000f00) >> 0x05) | (soc_ver & 0x0000000f); + else if (soc_type == QCA_WCN3998) + rom_ver = ((soc_ver & 0x0000f000) >> 0x07) | (soc_ver & 0x0000000f); else rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index a1c5eb993e478a..5c535f3ab72286 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2376,8 +2376,11 @@ static void btusb_work(struct work_struct *work) if (data->air_mode == HCI_NOTIFY_ENABLE_SCO_CVSD) { if (hdev->voice_setting & 0x0020) { static const int alts[3] = { 2, 4, 5 }; + unsigned int sco_idx; - new_alts = alts[data->sco_num - 1]; + sco_idx = min_t(unsigned int, data->sco_num - 1, + ARRAY_SIZE(alts) - 1); + new_alts = alts[sco_idx]; } else { new_alts = data->sco_num; } diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index 91acf24f1ef544..91c96ad1234221 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -541,6 +541,8 @@ static int download_firmware(struct ll_device *lldev) if (err || !fw->data || !fw->size) { bt_dev_err(lldev->hu.hdev, "request_firmware failed(errno %d) for %s", err, bts_scr_name); + if (!err) + release_firmware(fw); return -EINVAL; } ptr = (void *)fw->data; diff --git a/drivers/bus/simple-pm-bus.c b/drivers/bus/simple-pm-bus.c index 3f00d953fb9a0e..c920bd6fbaafd4 100644 --- a/drivers/bus/simple-pm-bus.c +++ b/drivers/bus/simple-pm-bus.c @@ -36,7 +36,7 @@ static int simple_pm_bus_probe(struct platform_device *pdev) * that's not listed in simple_pm_bus_of_match. We don't want to do any * of the simple-pm-bus tasks for these devices, so return early. */ - if (pdev->driver_override) + if (device_has_driver_override(&pdev->dev)) return 0; match = of_match_device(dev->driver->of_match_table, dev); @@ -78,7 +78,7 @@ static void simple_pm_bus_remove(struct platform_device *pdev) { const void *data = of_device_get_match_data(&pdev->dev); - if (pdev->driver_override || data) + if (device_has_driver_override(&pdev->dev) || data) return; dev_dbg(&pdev->dev, "%s\n", __func__); diff --git a/drivers/cache/ax45mp_cache.c b/drivers/cache/ax45mp_cache.c index 1d7dd3d2c101cd..934c5087ec2bda 100644 --- a/drivers/cache/ax45mp_cache.c +++ b/drivers/cache/ax45mp_cache.c @@ -178,11 +178,11 @@ static const struct of_device_id ax45mp_cache_ids[] = { static int __init ax45mp_cache_init(void) { - struct device_node *np; struct resource res; int ret; - np = of_find_matching_node(NULL, ax45mp_cache_ids); + struct device_node *np __free(device_node) = + of_find_matching_node(NULL, ax45mp_cache_ids); if (!of_device_is_available(np)) return -ENODEV; diff --git a/drivers/cache/starfive_starlink_cache.c b/drivers/cache/starfive_starlink_cache.c index 24c7d078ca2272..3a25d2d7c70ca3 100644 --- a/drivers/cache/starfive_starlink_cache.c +++ b/drivers/cache/starfive_starlink_cache.c @@ -102,11 +102,11 @@ static const struct of_device_id starlink_cache_ids[] = { static int __init starlink_cache_init(void) { - struct device_node *np; u32 block_size; int ret; - np = of_find_matching_node(NULL, starlink_cache_ids); + struct device_node *np __free(device_node) = + of_find_matching_node(NULL, starlink_cache_ids); if (!of_device_is_available(np)) return -ENODEV; diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c index a85ec48a798b58..9b33df9967ece4 100644 --- a/drivers/clk/imx/clk-scu.c +++ b/drivers/clk/imx/clk-scu.c @@ -706,8 +706,7 @@ struct clk_hw *imx_clk_scu_alloc_dev(const char *name, if (ret) goto put_device; - ret = driver_set_override(&pdev->dev, &pdev->driver_override, - "imx-scu-clk", strlen("imx-scu-clk")); + ret = device_set_driver_override(&pdev->dev, "imx-scu-clk"); if (ret) goto put_device; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 277884d91913c8..1f794524a1d923 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1427,12 +1427,9 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy, * If there is a problem with its frequency table, take it * offline and drop it. */ - if (policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_ASCENDING && - policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_DESCENDING) { - ret = cpufreq_table_validate_and_sort(policy); - if (ret) - goto out_offline_policy; - } + ret = cpufreq_table_validate_and_sort(policy); + if (ret) + goto out_offline_policy; /* related_cpus should at least include policy->cpus. */ cpumask_copy(policy->related_cpus, policy->cpus); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index e0e847764511e9..df01d33993d824 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -313,6 +313,17 @@ static void cs_start(struct cpufreq_policy *policy) dbs_info->requested_freq = policy->cur; } +static void cs_limits(struct cpufreq_policy *policy) +{ + struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); + + /* + * The limits have changed, so may have the current frequency. Reset + * requested_freq to avoid any unintended outcomes due to the mismatch. + */ + dbs_info->requested_freq = policy->cur; +} + static struct dbs_governor cs_governor = { .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), .kobj_type = { .default_groups = cs_groups }, @@ -322,6 +333,7 @@ static struct dbs_governor cs_governor = { .init = cs_init, .exit = cs_exit, .start = cs_start, + .limits = cs_limits, }; #define CPU_FREQ_GOV_CONSERVATIVE (cs_governor.gov) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 36eb7aee4bcd36..acf10187873321 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -563,6 +563,7 @@ EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_stop); void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy) { + struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs; /* Protect gov->gdbs_data against cpufreq_dbs_governor_exit() */ @@ -574,6 +575,8 @@ void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy) mutex_lock(&policy_dbs->update_mutex); cpufreq_policy_apply_limits(policy); gov_update_sample_delay(policy_dbs, 0); + if (gov->limits) + gov->limits(policy); mutex_unlock(&policy_dbs->update_mutex); out: diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 168c23fd7fcac7..1462d59277bd12 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -138,6 +138,7 @@ struct dbs_governor { int (*init)(struct dbs_data *dbs_data); void (*exit)(struct dbs_data *dbs_data); void (*start)(struct cpufreq_policy *policy); + void (*limits)(struct cpufreq_policy *policy); }; static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy) diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 7f251daf03ce32..5b364d8da4f927 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -360,6 +360,10 @@ int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy) if (policy_has_boost_freq(policy)) policy->boost_supported = true; + if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING || + policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_DESCENDING) + return 0; + return set_freq_table_sorted(policy); } diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 8b2dfc11289bdc..aebf4dad545e2c 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -2408,10 +2408,8 @@ static int sev_ioctl_do_snp_platform_status(struct sev_issue_cmd *argp) * in Firmware state on failure. Use snp_reclaim_pages() to * transition either case back to Hypervisor-owned state. */ - if (snp_reclaim_pages(__pa(data), 1, true)) { - snp_leak_pages(__page_to_pfn(status_page), 1); + if (snp_reclaim_pages(__pa(data), 1, true)) return -EFAULT; - } } if (ret) diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 329f60ad422e6f..9214bbfc868f5d 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -332,6 +332,13 @@ static int __init padlock_init(void) if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN)) return -ENODEV; + /* + * Skip family 0x07 and newer used by Zhaoxin processors, + * as the driver's self-tests fail on these CPUs. + */ + if (c->x86 >= 0x07) + return -ENODEV; + /* Register the newly added algorithm module if on * * VIA Nano processor, or else just do as before */ if (c->x86_model < 0x0f) { diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 4589bf11d3fe00..80aeb0d556bd76 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -59,6 +59,7 @@ config CXL_ACPI tristate "CXL ACPI: Platform Support" depends on ACPI depends on ACPI_NUMA + depends on CXL_PMEM || !CXL_PMEM default CXL_BUS select ACPI_TABLE_LIB select ACPI_HMAT diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index c222e98ae7364c..cb5d5a047a9da1 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -94,7 +94,6 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) struct cxl_hdm *cxlhdm; void __iomem *hdm; u32 ctrl; - int i; if (!info) return false; @@ -113,22 +112,16 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) return false; /* - * If any decoders are committed already, there should not be any - * emulated DVSEC decoders. + * If HDM decoders are globally enabled, do not fall back to DVSEC + * range emulation. Zeroed decoder registers after region teardown + * do not imply absence of HDM capability. + * + * Falling back to DVSEC here would treat the decoder as AUTO and + * may incorrectly latch default interleave settings. */ - for (i = 0; i < cxlhdm->decoder_count; i++) { - ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); - dev_dbg(&info->port->dev, - "decoder%d.%d: committed: %ld base: %#x_%.8x size: %#x_%.8x\n", - info->port->id, i, - FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl), - readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i)), - readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(i)), - readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i)), - readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i))); - if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl)) - return false; - } + ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + if (ctrl & CXL_HDM_DECODER_ENABLE) + return false; return true; } diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index e7a6452bf5445b..12386d91270544 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1301,7 +1301,7 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) * Require an endpoint to be safe otherwise the driver can not * be sure that the device is unmapped. */ - if (endpoint && cxl_num_decoders_committed(endpoint) == 0) + if (cxlmd->dev.driver && cxl_num_decoders_committed(endpoint) == 0) return __cxl_mem_sanitize(mds, cmd); return -EBUSY; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 0c5957d1d32941..c5aacd7054f1d2 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -552,10 +552,13 @@ static void cxl_port_release(struct device *dev) xa_destroy(&port->dports); xa_destroy(&port->regions); ida_free(&cxl_port_ida, port->id); - if (is_cxl_root(port)) + + if (is_cxl_root(port)) { kfree(to_cxl_root(port)); - else + } else { + put_device(dev->parent); kfree(port); + } } static ssize_t decoders_committed_show(struct device *dev, @@ -707,6 +710,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, struct cxl_port *iter; dev->parent = &parent_port->dev; + get_device(dev->parent); port->depth = parent_port->depth + 1; port->parent_dport = parent_dport; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 42874948b589b9..c37ae0b28bbbc9 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3854,8 +3854,10 @@ static int __construct_region(struct cxl_region *cxlr, } rc = sysfs_update_group(&cxlr->dev.kobj, &cxl_region_group); - if (rc) + if (rc) { + kfree(res); return rc; + } rc = insert_resource(cxlrd->res, res); if (rc) { diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 082ec0f1c3a048..261dff7ced9f15 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -554,7 +554,7 @@ static __exit void cxl_pmem_exit(void) MODULE_DESCRIPTION("CXL PMEM: Persistent Memory Support"); MODULE_LICENSE("GPL v2"); -module_init(cxl_pmem_init); +subsys_initcall(cxl_pmem_init); module_exit(cxl_pmem_exit); MODULE_IMPORT_NS("CXL"); MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE); diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index f1a2bee39bf113..82b3b6d9ed2df6 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -257,9 +257,10 @@ static void fwnet_header_cache_update(struct hh_cache *hh, memcpy((u8 *)hh->hh_data + HH_DATA_OFF(FWNET_HLEN), haddr, net->addr_len); } -static int fwnet_header_parse(const struct sk_buff *skb, unsigned char *haddr) +static int fwnet_header_parse(const struct sk_buff *skb, const struct net_device *dev, + unsigned char *haddr) { - memcpy(haddr, skb->dev->dev_addr, FWNET_ALEN); + memcpy(haddr, dev->dev_addr, FWNET_ALEN); return FWNET_ALEN; } diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 12a625387d6e6b..f2f94d4d533e8d 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -205,12 +205,12 @@ static int ffa_rxtx_map(phys_addr_t tx_buf, phys_addr_t rx_buf, u32 pg_cnt) return 0; } -static int ffa_rxtx_unmap(u16 vm_id) +static int ffa_rxtx_unmap(void) { ffa_value_t ret; invoke_ffa_fn((ffa_value_t){ - .a0 = FFA_RXTX_UNMAP, .a1 = PACK_TARGET_INFO(vm_id, 0), + .a0 = FFA_RXTX_UNMAP, }, &ret); if (ret.a0 == FFA_ERROR) @@ -2097,7 +2097,7 @@ static int __init ffa_init(void) pr_err("failed to setup partitions\n"); ffa_notifications_cleanup(); - ffa_rxtx_unmap(drv_info->vm_id); + ffa_rxtx_unmap(); free_pages: if (drv_info->tx_buffer) free_pages_exact(drv_info->tx_buffer, rxtx_bufsz); @@ -2112,7 +2112,7 @@ static void __exit ffa_exit(void) { ffa_notifications_cleanup(); ffa_partitions_cleanup(); - ffa_rxtx_unmap(drv_info->vm_id); + ffa_rxtx_unmap(); free_pages_exact(drv_info->tx_buffer, drv_info->rxtx_bufsz); free_pages_exact(drv_info->rx_buffer, drv_info->rxtx_bufsz); kfree(drv_info); diff --git a/drivers/firmware/arm_scmi/notify.c b/drivers/firmware/arm_scmi/notify.c index 9168794adae43b..40ec184eedaecc 100644 --- a/drivers/firmware/arm_scmi/notify.c +++ b/drivers/firmware/arm_scmi/notify.c @@ -1066,7 +1066,7 @@ static int scmi_register_event_handler(struct scmi_notify_instance *ni, * since at creation time we usually want to have all setup and ready before * events really start flowing. * - * Return: A properly refcounted handler on Success, NULL on Failure + * Return: A properly refcounted handler on Success, ERR_PTR on Failure */ static inline struct scmi_event_handler * __scmi_event_handler_get_ops(struct scmi_notify_instance *ni, @@ -1113,7 +1113,7 @@ __scmi_event_handler_get_ops(struct scmi_notify_instance *ni, } mutex_unlock(&ni->pending_mtx); - return hndl; + return hndl ?: ERR_PTR(-ENODEV); } static struct scmi_event_handler * diff --git a/drivers/firmware/arm_scmi/protocols.h b/drivers/firmware/arm_scmi/protocols.h index 4c75970326e655..f51245aca2594c 100644 --- a/drivers/firmware/arm_scmi/protocols.h +++ b/drivers/firmware/arm_scmi/protocols.h @@ -189,13 +189,13 @@ struct scmi_protocol_handle { /** * struct scmi_iterator_state - Iterator current state descriptor - * @desc_index: Starting index for the current mulit-part request. + * @desc_index: Starting index for the current multi-part request. * @num_returned: Number of returned items in the last multi-part reply. * @num_remaining: Number of remaining items in the multi-part message. * @max_resources: Maximum acceptable number of items, configured by the caller * depending on the underlying resources that it is querying. * @loop_idx: The iterator loop index in the current multi-part reply. - * @rx_len: Size in bytes of the currenly processed message; it can be used by + * @rx_len: Size in bytes of the currently processed message; it can be used by * the user of the iterator to verify a reply size. * @priv: Optional pointer to some additional state-related private data setup * by the caller during the iterations. diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c index 00e74449ce09e5..2acad5fa5a2867 100644 --- a/drivers/firmware/arm_scpi.c +++ b/drivers/firmware/arm_scpi.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -940,13 +941,13 @@ static int scpi_probe(struct platform_device *pdev) int idx = scpi_drvinfo->num_chans; struct scpi_chan *pchan = scpi_drvinfo->channels + idx; struct mbox_client *cl = &pchan->cl; - struct device_node *shmem = of_parse_phandle(np, "shmem", idx); + struct device_node *shmem __free(device_node) = + of_parse_phandle(np, "shmem", idx); if (!of_match_node(shmem_of_match, shmem)) return -ENXIO; ret = of_address_to_resource(shmem, 0, &res); - of_node_put(shmem); if (ret) { dev_err(dev, "failed to get SCPI payload mem resource\n"); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 4662bfbe70b2d0..43864df8af0463 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -36,6 +36,7 @@ #define AMDGPU_BO_LIST_MAX_PRIORITY 32u #define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) +#define AMDGPU_BO_LIST_MAX_ENTRIES (128 * 1024) static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { @@ -188,6 +189,9 @@ int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, const uint32_t bo_number = in->bo_number; struct drm_amdgpu_bo_list_entry *info; + if (bo_number > AMDGPU_BO_LIST_MAX_ENTRIES) + return -EINVAL; + /* copy the handle array from userspace to a kernel buffer */ if (likely(info_size == bo_info_size)) { info = vmemdup_array_user(uptr, bo_number, info_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f2beb980e3c3af..c60cbce356cfef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1069,7 +1069,10 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, } /* Prepare a TLB flush fence to be attached to PTs */ - if (!params->unlocked) { + /* The check for need_tlb_fence should be dropped once we + * sort out the issues with KIQ/MES TLB invalidation timeouts. + */ + if (!params->unlocked && vm->need_tlb_fence) { amdgpu_vm_tlb_fence_create(params->adev, vm, fence); /* Makes sure no PD/PT is freed before the flush */ @@ -2602,6 +2605,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ttm_lru_bulk_move_init(&vm->lru_bulk_move); vm->is_compute_context = false; + vm->need_tlb_fence = amdgpu_userq_enabled(&adev->ddev); vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); @@ -2739,6 +2743,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_update); vm->last_update = dma_fence_get_stub(); vm->is_compute_context = true; + vm->need_tlb_fence = true; unreserve_bo: amdgpu_bo_unreserve(vm->root.bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 806d62ed61efff..bb276c0ad06dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -441,6 +441,8 @@ struct amdgpu_vm { struct ttm_lru_bulk_move lru_bulk_move; /* Flag to indicate if VM is used for compute */ bool is_compute_context; + /* Flag to indicate if VM needs a TLB fence (KFD or KGD) */ + bool need_tlb_fence; /* Memory partition number, -1 means any partition */ int8_t mem_id; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e35ed0cc2ec624..8eba99aa0f8fa0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -662,28 +662,35 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, } else { switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(9, 0, 0): - mmhub_cid = mmhub_client_ids_vega10[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vega10) ? + mmhub_client_ids_vega10[cid][rw] : NULL; break; case IP_VERSION(9, 3, 0): - mmhub_cid = mmhub_client_ids_vega12[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vega12) ? + mmhub_client_ids_vega12[cid][rw] : NULL; break; case IP_VERSION(9, 4, 0): - mmhub_cid = mmhub_client_ids_vega20[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vega20) ? + mmhub_client_ids_vega20[cid][rw] : NULL; break; case IP_VERSION(9, 4, 1): - mmhub_cid = mmhub_client_ids_arcturus[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_arcturus) ? + mmhub_client_ids_arcturus[cid][rw] : NULL; break; case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 0): - mmhub_cid = mmhub_client_ids_raven[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_raven) ? + mmhub_client_ids_raven[cid][rw] : NULL; break; case IP_VERSION(1, 5, 0): case IP_VERSION(2, 4, 0): - mmhub_cid = mmhub_client_ids_renoir[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_renoir) ? + mmhub_client_ids_renoir[cid][rw] : NULL; break; case IP_VERSION(1, 8, 0): case IP_VERSION(9, 4, 2): - mmhub_cid = mmhub_client_ids_aldebaran[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_aldebaran) ? + mmhub_client_ids_aldebaran[cid][rw] : NULL; break; default: mmhub_cid = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index b3590b33cab9eb..485ecdec961843 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -129,7 +129,7 @@ static int isp_genpd_add_device(struct device *dev, void *data) if (!pdev) return -EINVAL; - if (!dev->type->name) { + if (!dev->type || !dev->type->name) { drm_dbg(&adev->ddev, "Invalid device type to add\n"); goto exit; } @@ -165,7 +165,7 @@ static int isp_genpd_remove_device(struct device *dev, void *data) if (!pdev) return -EINVAL; - if (!dev->type->name) { + if (!dev->type || !dev->type->name) { drm_dbg(&adev->ddev, "Invalid device type to remove\n"); goto exit; } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index a0cc8e218ca1ea..534cb4c544dc45 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -154,14 +154,17 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(2, 0, 0): case IP_VERSION(2, 0, 2): - mmhub_cid = mmhub_client_ids_navi1x[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_navi1x) ? + mmhub_client_ids_navi1x[cid][rw] : NULL; break; case IP_VERSION(2, 1, 0): case IP_VERSION(2, 1, 1): - mmhub_cid = mmhub_client_ids_sienna_cichlid[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_sienna_cichlid) ? + mmhub_client_ids_sienna_cichlid[cid][rw] : NULL; break; case IP_VERSION(2, 1, 2): - mmhub_cid = mmhub_client_ids_beige_goby[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_beige_goby) ? + mmhub_client_ids_beige_goby[cid][rw] : NULL; break; default: mmhub_cid = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 5eb8122e27469c..ceb2f6b46de521 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -94,7 +94,8 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev, case IP_VERSION(2, 3, 0): case IP_VERSION(2, 4, 0): case IP_VERSION(2, 4, 1): - mmhub_cid = mmhub_client_ids_vangogh[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vangogh) ? + mmhub_client_ids_vangogh[cid][rw] : NULL; break; default: mmhub_cid = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index 7d5242df58a511..ab966e69a342a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c @@ -110,7 +110,8 @@ mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 1): - mmhub_cid = mmhub_client_ids_v3_0_0[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_0_0) ? + mmhub_client_ids_v3_0_0[cid][rw] : NULL; break; default: mmhub_cid = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index 910337dc28d105..14a742d3a99d78 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -117,7 +117,8 @@ mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 0, 1): - mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_0_1) ? + mmhub_client_ids_v3_0_1[cid][rw] : NULL; break; default: mmhub_cid = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c index f0f182f033b988..e1f07f2a185272 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c @@ -108,7 +108,8 @@ mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev, "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - mmhub_cid = mmhub_client_ids_v3_0_2[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_0_2) ? + mmhub_client_ids_v3_0_2[cid][rw] : NULL; dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 951998454b2572..88bfe321f83aaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -102,7 +102,8 @@ mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev, status); switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(4, 1, 0): - mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v4_1_0) ? + mmhub_client_ids_v4_1_0[cid][rw] : NULL; break; default: mmhub_cid = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c index a72770e3d0e997..2532ca80f73560 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c @@ -688,7 +688,8 @@ mmhub_v4_2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, status); switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(4, 2, 0): - mmhub_cid = mmhub_client_ids_v4_2_0[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v4_2_0) ? + mmhub_client_ids_v4_2_0[cid][rw] : NULL; break; default: mmhub_cid = NULL; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b3d6f2cd8ab6f3..085cc98bd875ac 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2554,7 +2554,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) fw_meta_info_params.fw_inst_const = adev->dm.dmub_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes) + PSP_HEADER_BYTES_256; - fw_meta_info_params.fw_bss_data = region_params.bss_data_size ? adev->dm.dmub_fw->data + + fw_meta_info_params.fw_bss_data = fw_meta_info_params.bss_data_size ? adev->dm.dmub_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes) + le32_to_cpu(hdr->inst_const_bytes) : NULL; fw_meta_info_params.custom_psp_footer_size = 0; @@ -13119,7 +13119,7 @@ static void parse_edid_displayid_vrr(struct drm_connector *connector, u16 min_vfreq; u16 max_vfreq; - if (edid == NULL || edid->extensions == 0) + if (!edid || !edid->extensions) return; /* Find DisplayID extension */ @@ -13129,7 +13129,7 @@ static void parse_edid_displayid_vrr(struct drm_connector *connector, break; } - if (edid_ext == NULL) + if (i == edid->extensions) return; while (j < EDID_LENGTH) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c index d59ba82d3d7cac..aa4658867e5565 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c @@ -37,19 +37,19 @@ const u64 amdgpu_dm_supported_degam_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) | BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF) | - BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); + BIT(DRM_COLOROP_1D_CURVE_GAMMA22); const u64 amdgpu_dm_supported_shaper_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) | BIT(DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF) | BIT(DRM_COLOROP_1D_CURVE_BT2020_OETF) | - BIT(DRM_COLOROP_1D_CURVE_GAMMA22); + BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); const u64 amdgpu_dm_supported_blnd_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) | BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF) | - BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); + BIT(DRM_COLOROP_1D_CURVE_GAMMA22); #define MAX_COLOR_PIPELINE_OPS 10 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 08d0e05a313ea0..d237d7b41dfd4a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -255,6 +255,10 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p BREAK_TO_DEBUGGER(); return NULL; } + if (ctx->dce_version == DCN_VERSION_2_01) { + dcn201_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); + return &clk_mgr->base; + } if (ASICREV_IS_SIENNA_CICHLID_P(asic_id.hw_internal_rev)) { dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base; @@ -267,10 +271,6 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base; } - if (ctx->dce_version == DCN_VERSION_2_01) { - dcn201_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); - return &clk_mgr->base; - } dcn20_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 7ebb7d1193af2e..c7fd604024d645 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1785,7 +1785,10 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, enum dc_valid dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_START(); dcn32_override_min_req_memclk(dc, context); + DC_FP_END(); + dcn32_override_min_req_dcfclk(dc, context); BW_VAL_TRACE_END_WATERMARKS(); diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index 61b1c5aa74cb51..36942467d4adda 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3454,9 +3454,11 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, if (adev->asic_type == CHIP_HAINAN) { if ((adev->pdev->revision == 0x81) || (adev->pdev->revision == 0xC3) || + (adev->pdev->device == 0x6660) || (adev->pdev->device == 0x6664) || (adev->pdev->device == 0x6665) || - (adev->pdev->device == 0x6667)) { + (adev->pdev->device == 0x6667) || + (adev->pdev->device == 0x666F)) { max_sclk = 75000; } if ((adev->pdev->revision == 0xC3) || diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c index ab7fed6214e062..facfb7526928d8 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c @@ -848,7 +848,7 @@ static int dw_hdmi_qp_config_audio_infoframe(struct dw_hdmi_qp *hdmi, regmap_bulk_write(hdmi->regm, PKT_AUDI_CONTENTS0, &header_bytes, 1); regmap_bulk_write(hdmi->regm, PKT_AUDI_CONTENTS1, &buffer[3], 1); - regmap_bulk_write(hdmi->regm, PKT_AUDI_CONTENTS2, &buffer[4], 1); + regmap_bulk_write(hdmi->regm, PKT_AUDI_CONTENTS2, &buffer[7], 1); /* Enable ACR, AUDI, AMD */ dw_hdmi_qp_mod(hdmi, diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index ec820686b30216..f52141f842a1f4 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -233,6 +233,7 @@ static void drm_events_release(struct drm_file *file_priv) void drm_file_free(struct drm_file *file) { struct drm_device *dev; + int idx; if (!file) return; @@ -249,9 +250,11 @@ void drm_file_free(struct drm_file *file) drm_events_release(file); - if (drm_core_check_feature(dev, DRIVER_MODESET)) { + if (drm_core_check_feature(dev, DRIVER_MODESET) && + drm_dev_enter(dev, &idx)) { drm_fb_release(file); drm_property_destroy_user_blobs(dev, file); + drm_dev_exit(idx); } if (drm_core_check_feature(dev, DRIVER_SYNCOBJ)) diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c index d12db9b0bab810..802bc4608abf54 100644 --- a/drivers/gpu/drm/drm_mode_config.c +++ b/drivers/gpu/drm/drm_mode_config.c @@ -577,10 +577,13 @@ void drm_mode_config_cleanup(struct drm_device *dev) */ WARN_ON(!list_empty(&dev->mode_config.fb_list)); list_for_each_entry_safe(fb, fbt, &dev->mode_config.fb_list, head) { - struct drm_printer p = drm_dbg_printer(dev, DRM_UT_KMS, "[leaked fb]"); + if (list_empty(&fb->filp_head) || drm_framebuffer_read_refcount(fb) > 1) { + struct drm_printer p = drm_dbg_printer(dev, DRM_UT_KMS, "[leaked fb]"); - drm_printf(&p, "framebuffer[%u]:\n", fb->base.id); - drm_framebuffer_print_info(&p, 1, fb); + drm_printf(&p, "framebuffer[%u]:\n", fb->base.id); + drm_framebuffer_print_info(&p, 1, fb); + } + list_del_init(&fb->filp_head); drm_framebuffer_free(&fb->base.refcount); } diff --git a/drivers/gpu/drm/drm_pagemap_util.c b/drivers/gpu/drm/drm_pagemap_util.c index 14ddb948a32e9d..6111d90a38e245 100644 --- a/drivers/gpu/drm/drm_pagemap_util.c +++ b/drivers/gpu/drm/drm_pagemap_util.c @@ -65,18 +65,14 @@ static void drm_pagemap_cache_fini(void *arg) drm_dbg(cache->shrinker->drm, "Destroying dpagemap cache.\n"); spin_lock(&cache->lock); dpagemap = cache->dpagemap; - if (!dpagemap) { - spin_unlock(&cache->lock); - goto out; - } + cache->dpagemap = NULL; + if (dpagemap && !drm_pagemap_shrinker_cancel(dpagemap)) + dpagemap = NULL; + spin_unlock(&cache->lock); - if (drm_pagemap_shrinker_cancel(dpagemap)) { - cache->dpagemap = NULL; - spin_unlock(&cache->lock); + if (dpagemap) drm_pagemap_destroy(dpagemap, false); - } -out: mutex_destroy(&cache->lookup_mutex); kfree(cache); } diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index db185a8591339d..fba9fa41f827f0 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c @@ -806,7 +806,7 @@ void gen9_set_dc_state(struct intel_display *display, u32 state) power_domains->dc_state, val & mask); enable_dc6 = state & DC_STATE_EN_UPTO_DC6; - dc6_was_enabled = val & DC_STATE_EN_UPTO_DC6; + dc6_was_enabled = power_domains->dc_state & DC_STATE_EN_UPTO_DC6; if (!dc6_was_enabled && enable_dc6) intel_dmc_update_dc6_allowed_count(display, true); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 6b92f333e18bc6..ced0e5a5989b85 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1186,6 +1186,7 @@ struct intel_crtc_state { u32 dc3co_exitline; u16 su_y_granularity; u8 active_non_psr_pipes; + u8 entry_setup_frames; const char *no_psr_reason; /* diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 1006b060c3f3ad..0b15cb764b1db5 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -1599,8 +1599,7 @@ static bool intel_dmc_get_dc6_allowed_count(struct intel_display *display, u32 * return false; mutex_lock(&power_domains->lock); - dc6_enabled = intel_de_read(display, DC_STATE_EN) & - DC_STATE_EN_UPTO_DC6; + dc6_enabled = power_domains->dc_state & DC_STATE_EN_UPTO_DC6; if (dc6_enabled) intel_dmc_update_dc6_allowed_count(display, false); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index b7302a32ded46e..3791944389db0c 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1717,7 +1717,7 @@ static bool _psr_compute_config(struct intel_dp *intel_dp, entry_setup_frames = intel_psr_entry_setup_frames(intel_dp, conn_state, adjusted_mode); if (entry_setup_frames >= 0) { - intel_dp->psr.entry_setup_frames = entry_setup_frames; + crtc_state->entry_setup_frames = entry_setup_frames; } else { crtc_state->no_psr_reason = "PSR setup timing not met"; drm_dbg_kms(display->drm, @@ -1815,7 +1815,7 @@ static bool intel_psr_needs_wa_18037818876(struct intel_dp *intel_dp, { struct intel_display *display = to_intel_display(intel_dp); - return (DISPLAY_VER(display) == 20 && intel_dp->psr.entry_setup_frames > 0 && + return (DISPLAY_VER(display) == 20 && crtc_state->entry_setup_frames > 0 && !crtc_state->has_sel_update); } @@ -2189,6 +2189,7 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp, intel_dp->psr.pkg_c_latency_used = crtc_state->pkg_c_latency_used; intel_dp->psr.io_wake_lines = crtc_state->alpm_state.io_wake_lines; intel_dp->psr.fast_wake_lines = crtc_state->alpm_state.fast_wake_lines; + intel_dp->psr.entry_setup_frames = crtc_state->entry_setup_frames; if (!psr_interrupt_error_check(intel_dp)) return; @@ -3109,6 +3110,8 @@ void intel_psr_pre_plane_update(struct intel_atomic_state *state, * - Display WA #1136: skl, bxt */ if (intel_crtc_needs_modeset(new_crtc_state) || + new_crtc_state->update_m_n || + new_crtc_state->update_lrr || !new_crtc_state->has_psr || !new_crtc_state->active_planes || new_crtc_state->has_sel_update != psr->sel_update_enabled || diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index d37966ec7a9246..54c9571327e723 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1967,7 +1967,8 @@ void intel_engines_reset_default_submission(struct intel_gt *gt) if (engine->sanitize) engine->sanitize(engine); - engine->set_default_submission(engine); + if (engine->set_default_submission) + engine->set_default_submission(engine); } } diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c index f58bb66a632755..dbb6f5a8ded12a 100644 --- a/drivers/gpu/drm/imagination/pvr_device.c +++ b/drivers/gpu/drm/imagination/pvr_device.c @@ -225,29 +225,12 @@ static irqreturn_t pvr_device_irq_thread_handler(int irq, void *data) } if (pvr_dev->has_safety_events) { - int err; - - /* - * Ensure the GPU is powered on since some safety events (such - * as ECC faults) can happen outside of job submissions, which - * are otherwise the only time a power reference is held. - */ - err = pvr_power_get(pvr_dev); - if (err) { - drm_err_ratelimited(drm_dev, - "%s: could not take power reference (%d)\n", - __func__, err); - return ret; - } - while (pvr_device_safety_irq_pending(pvr_dev)) { pvr_device_safety_irq_clear(pvr_dev); pvr_device_handle_safety_events(pvr_dev); ret = IRQ_HANDLED; } - - pvr_power_put(pvr_dev); } return ret; diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c index 0cf7393f89c6b0..3ec4ec4276e4bf 100644 --- a/drivers/gpu/drm/imagination/pvr_power.c +++ b/drivers/gpu/drm/imagination/pvr_power.c @@ -90,11 +90,11 @@ pvr_power_request_pwr_off(struct pvr_device *pvr_dev) } static int -pvr_power_fw_disable(struct pvr_device *pvr_dev, bool hard_reset) +pvr_power_fw_disable(struct pvr_device *pvr_dev, bool hard_reset, bool rpm_suspend) { - if (!hard_reset) { - int err; + int err; + if (!hard_reset) { cancel_delayed_work_sync(&pvr_dev->watchdog.work); err = pvr_power_request_idle(pvr_dev); @@ -106,29 +106,47 @@ pvr_power_fw_disable(struct pvr_device *pvr_dev, bool hard_reset) return err; } - return pvr_fw_stop(pvr_dev); + if (rpm_suspend) { + /* This also waits for late processing of GPU or firmware IRQs in other cores */ + disable_irq(pvr_dev->irq); + } + + err = pvr_fw_stop(pvr_dev); + if (err && rpm_suspend) + enable_irq(pvr_dev->irq); + + return err; } static int -pvr_power_fw_enable(struct pvr_device *pvr_dev) +pvr_power_fw_enable(struct pvr_device *pvr_dev, bool rpm_resume) { int err; + if (rpm_resume) + enable_irq(pvr_dev->irq); + err = pvr_fw_start(pvr_dev); if (err) - return err; + goto out; err = pvr_wait_for_fw_boot(pvr_dev); if (err) { drm_err(from_pvr_device(pvr_dev), "Firmware failed to boot\n"); pvr_fw_stop(pvr_dev); - return err; + goto out; } queue_delayed_work(pvr_dev->sched_wq, &pvr_dev->watchdog.work, msecs_to_jiffies(WATCHDOG_TIME_MS)); return 0; + +out: + if (rpm_resume) + disable_irq(pvr_dev->irq); + + return err; } bool @@ -361,7 +379,7 @@ pvr_power_device_suspend(struct device *dev) return -EIO; if (pvr_dev->fw_dev.booted) { - err = pvr_power_fw_disable(pvr_dev, false); + err = pvr_power_fw_disable(pvr_dev, false, true); if (err) goto err_drm_dev_exit; } @@ -391,7 +409,7 @@ pvr_power_device_resume(struct device *dev) goto err_drm_dev_exit; if (pvr_dev->fw_dev.booted) { - err = pvr_power_fw_enable(pvr_dev); + err = pvr_power_fw_enable(pvr_dev, true); if (err) goto err_power_off; } @@ -510,7 +528,16 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) } /* Disable IRQs for the duration of the reset. */ - disable_irq(pvr_dev->irq); + if (hard_reset) { + disable_irq(pvr_dev->irq); + } else { + /* + * Soft reset is triggered as a response to a FW command to the Host and is + * processed from the threaded IRQ handler. This code cannot (nor needs to) + * wait for any IRQ processing to complete. + */ + disable_irq_nosync(pvr_dev->irq); + } do { if (hard_reset) { @@ -518,7 +545,7 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) queues_disabled = true; } - err = pvr_power_fw_disable(pvr_dev, hard_reset); + err = pvr_power_fw_disable(pvr_dev, hard_reset, false); if (!err) { if (hard_reset) { pvr_dev->fw_dev.booted = false; @@ -541,7 +568,7 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) pvr_fw_irq_clear(pvr_dev); - err = pvr_power_fw_enable(pvr_dev); + err = pvr_power_fw_enable(pvr_dev, false); } if (err && hard_reset) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index b4aa49b1ac6349..4b10715f951c36 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2915,9 +2915,11 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (rdev->family == CHIP_HAINAN) { if ((rdev->pdev->revision == 0x81) || (rdev->pdev->revision == 0xC3) || + (rdev->pdev->device == 0x6660) || (rdev->pdev->device == 0x6664) || (rdev->pdev->device == 0x6665) || - (rdev->pdev->device == 0x6667)) { + (rdev->pdev->device == 0x6667) || + (rdev->pdev->device == 0x666F)) { max_sclk = 75000; } if ((rdev->pdev->revision == 0xC3) || diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index f2abaf1bda6a40..57465f69c68702 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -96,12 +96,17 @@ struct vmwgfx_hash_item { struct vmw_res_func; +struct vmw_bo; +struct vmw_bo; +struct vmw_resource_dirty; + /** - * struct vmw-resource - base class for hardware resources + * struct vmw_resource - base class for hardware resources * * @kref: For refcounting. * @dev_priv: Pointer to the device private for this resource. Immutable. * @id: Device id. Protected by @dev_priv::resource_lock. + * @used_prio: Priority for this resource. * @guest_memory_size: Guest memory buffer size. Immutable. * @res_dirty: Resource contains data not yet in the guest memory buffer. * Protected by resource reserved. @@ -117,18 +122,16 @@ struct vmw_res_func; * pin-count greater than zero. It is not on the resource LRU lists and its * guest memory buffer is pinned. Hence it can't be evicted. * @func: Method vtable for this resource. Immutable. - * @mob_node; Node for the MOB guest memory rbtree. Protected by + * @mob_node: Node for the MOB guest memory rbtree. Protected by * @guest_memory_bo reserved. * @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock. * @binding_head: List head for the context binding list. Protected by * the @dev_priv::binding_mutex + * @dirty: resource's dirty tracker * @res_free: The resource destructor. * @hw_destroy: Callback to destroy the resource on the device, as part of * resource destruction. */ -struct vmw_bo; -struct vmw_bo; -struct vmw_resource_dirty; struct vmw_resource { struct kref kref; struct vmw_private *dev_priv; @@ -196,8 +199,8 @@ struct vmw_surface_offset; * @quality_level: Quality level. * @autogen_filter: Filter for automatically generated mipmaps. * @array_size: Number of array elements for a 1D/2D texture. For cubemap - texture number of faces * array_size. This should be 0 for pre - SM4 device. + * texture number of faces * array_size. This should be 0 for pre + * SM4 device. * @buffer_byte_stride: Buffer byte stride. * @num_sizes: Size of @sizes. For GB surface this should always be 1. * @base_size: Surface dimension. @@ -265,18 +268,24 @@ struct vmw_fifo_state { struct vmw_res_cache_entry { uint32_t handle; struct vmw_resource *res; + /* private: */ void *private; + /* public: */ unsigned short valid_handle; unsigned short valid; }; /** * enum vmw_dma_map_mode - indicate how to perform TTM page dma mappings. + * @vmw_dma_alloc_coherent: Use TTM coherent pages + * @vmw_dma_map_populate: Unmap from DMA just after unpopulate + * @vmw_dma_map_bind: Unmap from DMA just before unbind */ enum vmw_dma_map_mode { - vmw_dma_alloc_coherent, /* Use TTM coherent pages */ - vmw_dma_map_populate, /* Unmap from DMA just after unpopulate */ - vmw_dma_map_bind, /* Unmap from DMA just before unbind */ + vmw_dma_alloc_coherent, + vmw_dma_map_populate, + vmw_dma_map_bind, + /* private: */ vmw_dma_map_max }; @@ -284,8 +293,11 @@ enum vmw_dma_map_mode { * struct vmw_sg_table - Scatter/gather table for binding, with additional * device-specific information. * + * @mode: which page mapping mode to use + * @pages: Array of page pointers to the pages. + * @addrs: DMA addresses to the pages if coherent pages are used. * @sgt: Pointer to a struct sg_table with binding information - * @num_regions: Number of regions with device-address contiguous pages + * @num_pages: Number of @pages */ struct vmw_sg_table { enum vmw_dma_map_mode mode; @@ -353,6 +365,7 @@ struct vmw_ctx_validation_info; * than from user-space * @fp: If @kernel is false, points to the file of the client. Otherwise * NULL + * @filp: DRM state for this file * @cmd_bounce: Command bounce buffer used for command validation before * copying to fifo space * @cmd_bounce_size: Current command bounce buffer size @@ -729,7 +742,7 @@ extern void vmw_svga_disable(struct vmw_private *dev_priv); bool vmwgfx_supported(struct vmw_private *vmw); -/** +/* * GMR utilities - vmwgfx_gmr.c */ @@ -739,7 +752,7 @@ extern int vmw_gmr_bind(struct vmw_private *dev_priv, int gmr_id); extern void vmw_gmr_unbind(struct vmw_private *dev_priv, int gmr_id); -/** +/* * User handles */ struct vmw_user_object { @@ -759,7 +772,7 @@ void *vmw_user_object_map_size(struct vmw_user_object *uo, size_t size); void vmw_user_object_unmap(struct vmw_user_object *uo); bool vmw_user_object_is_mapped(struct vmw_user_object *uo); -/** +/* * Resource utilities - vmwgfx_resource.c */ struct vmw_user_resource_conv; @@ -819,7 +832,7 @@ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) return !RB_EMPTY_NODE(&res->mob_node); } -/** +/* * GEM related functionality - vmwgfx_gem.c */ struct vmw_bo_params; @@ -833,7 +846,7 @@ extern int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); extern void vmw_debugfs_gem_init(struct vmw_private *vdev); -/** +/* * Misc Ioctl functionality - vmwgfx_ioctl.c */ @@ -846,7 +859,7 @@ extern int vmw_present_ioctl(struct drm_device *dev, void *data, extern int vmw_present_readback_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -/** +/* * Fifo utilities - vmwgfx_fifo.c */ @@ -880,9 +893,11 @@ extern int vmw_cmd_flush(struct vmw_private *dev_priv, /** - * vmw_fifo_caps - Returns the capabilities of the FIFO command + * vmw_fifo_caps - Get the capabilities of the FIFO command * queue or 0 if fifo memory isn't present. * @dev_priv: The device private context + * + * Returns: capabilities of the FIFO command or %0 if fifo memory not present */ static inline uint32_t vmw_fifo_caps(const struct vmw_private *dev_priv) { @@ -893,9 +908,11 @@ static inline uint32_t vmw_fifo_caps(const struct vmw_private *dev_priv) /** - * vmw_is_cursor_bypass3_enabled - Returns TRUE iff Cursor Bypass 3 - * is enabled in the FIFO. + * vmw_is_cursor_bypass3_enabled - check Cursor Bypass 3 enabled setting + * in the FIFO. * @dev_priv: The device private context + * + * Returns: %true iff Cursor Bypass 3 is enabled in the FIFO */ static inline bool vmw_is_cursor_bypass3_enabled(const struct vmw_private *dev_priv) @@ -903,7 +920,7 @@ vmw_is_cursor_bypass3_enabled(const struct vmw_private *dev_priv) return (vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_CURSOR_BYPASS_3) != 0; } -/** +/* * TTM buffer object driver - vmwgfx_ttm_buffer.c */ @@ -927,7 +944,7 @@ extern void vmw_piter_start(struct vmw_piter *viter, * * @viter: Pointer to the iterator to advance. * - * Returns false if past the list of pages, true otherwise. + * Returns: false if past the list of pages, true otherwise. */ static inline bool vmw_piter_next(struct vmw_piter *viter) { @@ -939,7 +956,7 @@ static inline bool vmw_piter_next(struct vmw_piter *viter) * * @viter: Pointer to the iterator * - * Returns the DMA address of the page pointed to by @viter. + * Returns: the DMA address of the page pointed to by @viter. */ static inline dma_addr_t vmw_piter_dma_addr(struct vmw_piter *viter) { @@ -951,14 +968,14 @@ static inline dma_addr_t vmw_piter_dma_addr(struct vmw_piter *viter) * * @viter: Pointer to the iterator * - * Returns the DMA address of the page pointed to by @viter. + * Returns: the DMA address of the page pointed to by @viter. */ static inline struct page *vmw_piter_page(struct vmw_piter *viter) { return viter->pages[viter->i]; } -/** +/* * Command submission - vmwgfx_execbuf.c */ @@ -993,7 +1010,7 @@ extern int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, int32_t out_fence_fd); bool vmw_cmd_describe(const void *buf, u32 *size, char const **cmd); -/** +/* * IRQs and wating - vmwgfx_irq.c */ @@ -1016,7 +1033,7 @@ bool vmw_generic_waiter_add(struct vmw_private *dev_priv, u32 flag, bool vmw_generic_waiter_remove(struct vmw_private *dev_priv, u32 flag, int *waiter_count); -/** +/* * Kernel modesetting - vmwgfx_kms.c */ @@ -1048,7 +1065,7 @@ extern int vmw_resource_pin(struct vmw_resource *res, bool interruptible); extern void vmw_resource_unpin(struct vmw_resource *res); extern enum vmw_res_type vmw_res_type(const struct vmw_resource *res); -/** +/* * Overlay control - vmwgfx_overlay.c */ @@ -1063,20 +1080,20 @@ int vmw_overlay_unref(struct vmw_private *dev_priv, uint32_t stream_id); int vmw_overlay_num_overlays(struct vmw_private *dev_priv); int vmw_overlay_num_free_overlays(struct vmw_private *dev_priv); -/** +/* * GMR Id manager */ int vmw_gmrid_man_init(struct vmw_private *dev_priv, int type); void vmw_gmrid_man_fini(struct vmw_private *dev_priv, int type); -/** +/* * System memory manager */ int vmw_sys_man_init(struct vmw_private *dev_priv); void vmw_sys_man_fini(struct vmw_private *dev_priv); -/** +/* * Prime - vmwgfx_prime.c */ @@ -1292,7 +1309,7 @@ extern void vmw_cmdbuf_irqthread(struct vmw_cmdbuf_man *man); * @line: The current line of the blit. * @line_offset: Offset of the current line segment. * @cpp: Bytes per pixel (granularity information). - * @memcpy: Which memcpy function to use. + * @do_cpy: Which memcpy function to use. */ struct vmw_diff_cpy { struct drm_rect rect; @@ -1380,13 +1397,14 @@ vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); /** * VMW_DEBUG_KMS - Debug output for kernel mode-setting + * @fmt: format string for the args * * This macro is for debugging vmwgfx mode-setting code. */ #define VMW_DEBUG_KMS(fmt, ...) \ DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) -/** +/* * Inline helper functions */ @@ -1417,11 +1435,13 @@ static inline void vmw_fifo_resource_dec(struct vmw_private *dev_priv) /** * vmw_fifo_mem_read - Perform a MMIO read from the fifo memory - * + * @vmw: The device private structure * @fifo_reg: The fifo register to read from * * This function is intended to be equivalent to ioread32() on * memremap'd memory, but without byteswapping. + * + * Returns: the value read */ static inline u32 vmw_fifo_mem_read(struct vmw_private *vmw, uint32 fifo_reg) { @@ -1431,8 +1451,9 @@ static inline u32 vmw_fifo_mem_read(struct vmw_private *vmw, uint32 fifo_reg) /** * vmw_fifo_mem_write - Perform a MMIO write to volatile memory - * - * @addr: The fifo register to write to + * @vmw: The device private structure + * @fifo_reg: The fifo register to write to + * @value: The value to write * * This function is intended to be equivalent to iowrite32 on * memremap'd memory, but without byteswapping. diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 55730e29d3ae9c..e7bddf840a7951 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -771,7 +771,8 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, ret = vmw_bo_dirty_add(bo); if (!ret && surface && surface->res.func->dirty_alloc) { surface->res.coherent = true; - ret = surface->res.func->dirty_alloc(&surface->res); + if (surface->res.dirty == NULL) + ret = surface->res.func->dirty_alloc(&surface->res); } ttm_bo_unreserve(&bo->tbo); } diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 2bda426a6986af..d1561ebe4e56ca 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -313,6 +313,8 @@ static void dev_fini_ggtt(void *arg) { struct xe_ggtt *ggtt = arg; + scoped_guard(mutex, &ggtt->lock) + ggtt->flags &= ~XE_GGTT_FLAGS_ONLINE; drain_workqueue(ggtt->wq); } @@ -377,6 +379,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (err) return err; + ggtt->flags |= XE_GGTT_FLAGS_ONLINE; err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); if (err) return err; @@ -410,13 +413,10 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) static void ggtt_node_remove(struct xe_ggtt_node *node) { struct xe_ggtt *ggtt = node->ggtt; - struct xe_device *xe = tile_to_xe(ggtt->tile); bool bound; - int idx; - - bound = drm_dev_enter(&xe->drm, &idx); mutex_lock(&ggtt->lock); + bound = ggtt->flags & XE_GGTT_FLAGS_ONLINE; if (bound) xe_ggtt_clear(ggtt, node->base.start, node->base.size); drm_mm_remove_node(&node->base); @@ -429,8 +429,6 @@ static void ggtt_node_remove(struct xe_ggtt_node *node) if (node->invalidate_on_remove) xe_ggtt_invalidate(ggtt); - drm_dev_exit(idx); - free_node: xe_ggtt_node_fini(node); } diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index d82b71a198bc2b..c002857bb76111 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -28,11 +28,14 @@ struct xe_ggtt { /** @size: Total usable size of this GGTT */ u64 size; -#define XE_GGTT_FLAGS_64K BIT(0) +#define XE_GGTT_FLAGS_64K BIT(0) +#define XE_GGTT_FLAGS_ONLINE BIT(1) /** * @flags: Flags for this GGTT * Acceptable flags: * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. + * - %XE_GGTT_FLAGS_ONLINE - is GGTT online, protected by ggtt->lock + * after init */ unsigned int flags; /** @scratch: Internal object allocation used as a scratch page */ diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index fe944687728cd8..03c1862ba497a6 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -12,6 +12,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sysfs.h" #include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sriov.h" static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) @@ -150,6 +151,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); gt->ccs_mode = num_engines; xe_gt_record_user_engines(gt); + guard(xe_pm_runtime)(xe); xe_gt_reset(gt); } diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 6df7c3f260e5bd..4ab65cae87433d 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1124,14 +1124,14 @@ static int guc_wait_ucode(struct xe_guc *guc) struct xe_guc_pc *guc_pc = >->uc.guc.pc; u32 before_freq, act_freq, cur_freq; u32 status = 0, tries = 0; + int load_result, ret; ktime_t before; u64 delta_ms; - int ret; before_freq = xe_guc_pc_get_act_freq(guc_pc); before = ktime_get(); - ret = poll_timeout_us(ret = guc_load_done(gt, &status, &tries), ret, + ret = poll_timeout_us(load_result = guc_load_done(gt, &status, &tries), load_result, 10 * USEC_PER_MSEC, GUC_LOAD_TIMEOUT_SEC * USEC_PER_SEC, false); @@ -1139,7 +1139,7 @@ static int guc_wait_ucode(struct xe_guc *guc) act_freq = xe_guc_pc_get_act_freq(guc_pc); cur_freq = xe_guc_pc_get_cur_freq_fw(guc_pc); - if (ret) { + if (ret || load_result <= 0) { xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz)\n", status, delta_ms, xe_guc_pc_get_act_freq(guc_pc), xe_guc_pc_get_cur_freq_fw(guc_pc)); @@ -1347,15 +1347,37 @@ int xe_guc_enable_communication(struct xe_guc *guc) return 0; } -int xe_guc_suspend(struct xe_guc *guc) +/** + * xe_guc_softreset() - Soft reset GuC + * @guc: The GuC object + * + * Send soft reset command to GuC through mmio send. + * + * Return: 0 if success, otherwise error code + */ +int xe_guc_softreset(struct xe_guc *guc) { - struct xe_gt *gt = guc_to_gt(guc); u32 action[] = { XE_GUC_ACTION_CLIENT_SOFT_RESET, }; int ret; + if (!xe_uc_fw_is_running(&guc->fw)) + return 0; + ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action)); + if (ret) + return ret; + + return 0; +} + +int xe_guc_suspend(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + ret = xe_guc_softreset(guc); if (ret) { xe_gt_err(gt, "GuC suspend failed: %pe\n", ERR_PTR(ret)); return ret; diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 66e7edc70ed9f4..02514914f40485 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -44,6 +44,7 @@ int xe_guc_opt_in_features_enable(struct xe_guc *guc); void xe_guc_runtime_suspend(struct xe_guc *guc); void xe_guc_runtime_resume(struct xe_guc *guc); int xe_guc_suspend(struct xe_guc *guc); +int xe_guc_softreset(struct xe_guc *guc); void xe_guc_notify(struct xe_guc *guc); int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index d04589140b776f..c80082b4c876c7 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -345,6 +345,7 @@ static void guc_action_disable_ct(void *arg) { struct xe_guc_ct *ct = arg; + xe_guc_ct_stop(ct); guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 799ef9f480036b..fc4f99d467635e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -48,6 +48,8 @@ #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 +static int guc_submit_reset_prepare(struct xe_guc *guc); + static struct xe_guc * exec_queue_to_guc(struct xe_exec_queue *q) { @@ -239,7 +241,7 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) EXEC_QUEUE_STATE_BANNED)); } -static void guc_submit_fini(struct drm_device *drm, void *arg) +static void guc_submit_sw_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; struct xe_device *xe = guc_to_xe(guc); @@ -257,6 +259,19 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) xa_destroy(&guc->submission_state.exec_queue_lookup); } +static void guc_submit_fini(void *arg) +{ + struct xe_guc *guc = arg; + + /* Forcefully kill any remaining exec queues */ + xe_guc_ct_stop(&guc->ct); + guc_submit_reset_prepare(guc); + xe_guc_softreset(guc); + xe_guc_submit_stop(guc); + xe_uc_fw_sanitize(&guc->fw); + xe_guc_submit_pause_abort(guc); +} + static void guc_submit_wedged_fini(void *arg) { struct xe_guc *guc = arg; @@ -326,7 +341,11 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) guc->submission_state.initialized = true; - return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); + err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc); + if (err) + return err; + + return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc); } /* @@ -1252,6 +1271,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc, */ void xe_guc_submit_wedge(struct xe_guc *guc) { + struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; unsigned long index; @@ -1266,20 +1286,28 @@ void xe_guc_submit_wedge(struct xe_guc *guc) if (!guc->submission_state.initialized) return; - err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, - guc_submit_wedged_fini, guc); - if (err) { - xe_gt_err(gt, "Failed to register clean-up in wedged.mode=%s; " - "Although device is wedged.\n", - xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); - return; - } + if (xe->wedged.mode == 2) { + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, + guc_submit_wedged_fini, guc); + if (err) { + xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " + "Although device is wedged.\n"); + return; + } - mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (xe_exec_queue_get_unless_zero(q)) - set_exec_queue_wedged(q); - mutex_unlock(&guc->submission_state.lock); + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + if (xe_exec_queue_get_unless_zero(q)) + set_exec_queue_wedged(q); + mutex_unlock(&guc->submission_state.lock); + } else { + /* Forcefully kill any remaining exec queues, signal fences */ + guc_submit_reset_prepare(guc); + xe_guc_submit_stop(guc); + xe_guc_softreset(guc); + xe_uc_fw_sanitize(&guc->fw); + xe_guc_submit_pause_abort(guc); + } } static bool guc_submit_hint_wedged(struct xe_guc *guc) @@ -2230,6 +2258,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; + bool do_destroy = false; /* Stop scheduling + flush any DRM scheduler operations */ xe_sched_submission_stop(sched); @@ -2237,7 +2266,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) /* Clean up lost G2H + reset engine state */ if (exec_queue_registered(q)) { if (exec_queue_destroyed(q)) - __guc_exec_queue_destroy(guc, q); + do_destroy = true; } if (q->guc->suspend_pending) { set_exec_queue_suspended(q); @@ -2273,18 +2302,15 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) xe_guc_exec_queue_trigger_cleanup(q); } } + + if (do_destroy) + __guc_exec_queue_destroy(guc, q); } -int xe_guc_submit_reset_prepare(struct xe_guc *guc) +static int guc_submit_reset_prepare(struct xe_guc *guc) { int ret; - if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) - return 0; - - if (!guc->submission_state.initialized) - return 0; - /* * Using an atomic here rather than submission_state.lock as this * function can be called while holding the CT lock (engine reset @@ -2299,6 +2325,17 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) return ret; } +int xe_guc_submit_reset_prepare(struct xe_guc *guc) +{ + if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) + return 0; + + if (!guc->submission_state.initialized) + return 0; + + return guc_submit_reset_prepare(guc); +} + void xe_guc_submit_reset_wait(struct xe_guc *guc) { wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || @@ -2695,8 +2732,7 @@ void xe_guc_submit_pause_abort(struct xe_guc *guc) continue; xe_sched_submission_start(sched); - if (exec_queue_killed_or_banned_or_wedged(q)) - xe_guc_exec_queue_trigger_cleanup(q); + guc_exec_queue_kill(q); } mutex_unlock(&guc->submission_state.lock); } diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index b0f037bc227ff3..7b70cc01fdb387 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -2413,14 +2413,14 @@ static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts) * @lrc: Pointer to the lrc. * * Return latest ctx timestamp. With support for active contexts, the - * calculation may bb slightly racy, so follow a read-again logic to ensure that + * calculation may be slightly racy, so follow a read-again logic to ensure that * the context is still active before returning the right timestamp. * * Returns: New ctx timestamp value */ u64 xe_lrc_timestamp(struct xe_lrc *lrc) { - u64 lrc_ts, reg_ts, new_ts; + u64 lrc_ts, reg_ts, new_ts = lrc->ctx_timestamp; u32 engine_id; lrc_ts = xe_lrc_ctx_timestamp(lrc); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4dd3f29933cf1a..fa90441d30529d 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -543,8 +543,7 @@ static ssize_t xe_oa_read(struct file *file, char __user *buf, size_t offset = 0; int ret; - /* Can't read from disabled streams */ - if (!stream->enabled || !stream->sample) + if (!stream->sample) return -EINVAL; if (!(file->f_flags & O_NONBLOCK)) { @@ -1460,6 +1459,10 @@ static void xe_oa_stream_disable(struct xe_oa_stream *stream) if (stream->sample) hrtimer_cancel(&stream->poll_check_timer); + + /* Update stream->oa_buffer.tail to allow any final reports to be read */ + if (xe_oa_buffer_check_unlocked(stream)) + wake_up(&stream->poll_wq); } static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 13b355fadd581a..2d9ce2c4cb4fe7 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1655,14 +1655,35 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, XE_WARN_ON(!level); /* Check for leaf node */ if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) && - (!xe_child->base.children || !xe_child->base.children[first])) { + xe_child->level <= MAX_HUGEPTE_LEVEL) { struct iosys_map *leaf_map = &xe_child->bo->vmap; pgoff_t count = xe_pt_num_entries(addr, next, xe_child->level, walk); for (pgoff_t i = 0; i < count; i++) { - u64 pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64); + u64 pte; int ret; + /* + * If not a leaf pt, skip unless non-leaf pt is interleaved between + * leaf ptes which causes the page walk to skip over the child leaves + */ + if (xe_child->base.children && xe_child->base.children[first + i]) { + u64 pt_size = 1ULL << walk->shifts[xe_child->level]; + bool edge_pt = (i == 0 && !IS_ALIGNED(addr, pt_size)) || + (i == count - 1 && !IS_ALIGNED(next, pt_size)); + + if (!edge_pt) { + xe_page_reclaim_list_abort(xe_walk->tile->primary_gt, + xe_walk->prl, + "PT is skipped by walk at level=%u offset=%lu", + xe_child->level, first + i); + break; + } + continue; + } + + pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64); + /* * In rare scenarios, pte may not be written yet due to racy conditions. * In such cases, invalidate the PRL and fallback to full PPC invalidation. @@ -1674,9 +1695,8 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, } /* Ensure it is a defined page */ - xe_tile_assert(xe_walk->tile, - xe_child->level == 0 || - (pte & (XE_PTE_PS64 | XE_PDE_PS_2M | XE_PDPE_PS_1G))); + xe_tile_assert(xe_walk->tile, xe_child->level == 0 || + (pte & (XE_PDE_PS_2M | XE_PDPE_PS_1G))); /* An entry should be added for 64KB but contigious 4K have XE_PTE_PS64 */ if (pte & XE_PTE_PS64) @@ -1701,11 +1721,11 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, killed = xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); /* - * Verify PRL is active and if entry is not a leaf pte (base.children conditions), - * there is a potential need to invalidate the PRL if any PTE (num_live) are dropped. + * Verify if any PTE are potentially dropped at non-leaf levels, either from being + * killed or the page walk covers the region. */ - if (xe_walk->prl && level > 1 && xe_child->num_live && - xe_child->base.children && xe_child->base.children[first]) { + if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) && + xe_child->level > MAX_HUGEPTE_LEVEL && xe_child->num_live) { bool covered = xe_pt_covers(addr, next, xe_child->level, &xe_walk->base); /* diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index f3d15994ca1ed7..50c7b45c59e3fb 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -444,6 +444,8 @@ hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, (u64)(long)ctx, true); /* prevent infinite recursions */ + if (ret > size) + ret = size; if (ret > 0) memcpy(buf, dma_data, ret); diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c index a1db3b3d066769..0fdc0968b9ef26 100644 --- a/drivers/hid/hid-appletb-kbd.c +++ b/drivers/hid/hid-appletb-kbd.c @@ -476,7 +476,7 @@ static int appletb_kbd_suspend(struct hid_device *hdev, pm_message_t msg) return 0; } -static int appletb_kbd_reset_resume(struct hid_device *hdev) +static int appletb_kbd_resume(struct hid_device *hdev) { struct appletb_kbd *kbd = hid_get_drvdata(hdev); @@ -500,7 +500,8 @@ static struct hid_driver appletb_kbd_hid_driver = { .event = appletb_kbd_hid_event, .input_configured = appletb_kbd_input_configured, .suspend = pm_ptr(appletb_kbd_suspend), - .reset_resume = pm_ptr(appletb_kbd_reset_resume), + .resume = pm_ptr(appletb_kbd_resume), + .reset_resume = pm_ptr(appletb_kbd_resume), .driver.dev_groups = appletb_kbd_groups, }; module_hid_driver(appletb_kbd_hid_driver); diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 687b785e2d0c13..bc93b27f9b1369 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1497,6 +1497,9 @@ static const struct hid_device_id asus_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X), QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD | QUIRK_ROG_ALLY_XPAD }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_XGM_2022), + }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_XGM_2023), }, diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 840a601138688d..833df14ef68f15 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2057,9 +2057,10 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 * rsize = max_buffer_size; if (csize < rsize) { - dbg_hid("report %d is too short, (%d < %d)\n", report->id, - csize, rsize); - memset(cdata + csize, 0, rsize - csize); + hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %d)\n", + report->id, rsize, csize); + ret = -EINVAL; + goto out; } if ((hid->claimed & HID_CLAIMED_HIDDEV) && hid->hiddev_report_event) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 4ab7640b119ac5..afcee13bad6139 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -229,6 +229,7 @@ #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X 0x1b4c #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD 0x1869 +#define USB_DEVICE_ID_ASUSTEK_XGM_2022 0x1970 #define USB_DEVICE_ID_ASUSTEK_XGM_2023 0x1a9a #define USB_VENDOR_ID_ATEN 0x0557 @@ -454,8 +455,6 @@ #define USB_DEVICE_ID_TOSHIBA_CLICK_L9W 0x0401 #define USB_DEVICE_ID_HP_X2 0x074d #define USB_DEVICE_ID_HP_X2_10_COVER 0x0755 -#define USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN 0x2544 -#define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706 #define I2C_DEVICE_ID_CHROMEBOOK_TROGDOR_POMPOM 0x2F81 #define USB_VENDOR_ID_ELECOM 0x056e diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index d5308adb28948c..9475b7e9da43cc 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -354,6 +354,7 @@ static enum power_supply_property hidinput_battery_props[] = { #define HID_BATTERY_QUIRK_FEATURE (1 << 1) /* ask for feature report */ #define HID_BATTERY_QUIRK_IGNORE (1 << 2) /* completely ignore the battery */ #define HID_BATTERY_QUIRK_AVOID_QUERY (1 << 3) /* do not query the battery */ +#define HID_BATTERY_QUIRK_DYNAMIC (1 << 4) /* report present only after life signs */ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, @@ -386,10 +387,6 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD), HID_BATTERY_QUIRK_IGNORE }, - { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN), - HID_BATTERY_QUIRK_IGNORE }, - { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN), - HID_BATTERY_QUIRK_IGNORE }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_L), HID_BATTERY_QUIRK_AVOID_QUERY }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_MW), @@ -402,8 +399,8 @@ static const struct hid_device_id hid_battery_quirks[] = { * Elan HID touchscreens seem to all report a non present battery, * set HID_BATTERY_QUIRK_IGNORE for all Elan I2C and USB HID devices. */ - { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, HID_ANY_ID), HID_BATTERY_QUIRK_IGNORE }, - { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, HID_ANY_ID), HID_BATTERY_QUIRK_IGNORE }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, HID_ANY_ID), HID_BATTERY_QUIRK_DYNAMIC }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, HID_ANY_ID), HID_BATTERY_QUIRK_DYNAMIC }, {} }; @@ -460,11 +457,14 @@ static int hidinput_get_battery_property(struct power_supply *psy, int ret = 0; switch (prop) { - case POWER_SUPPLY_PROP_PRESENT: case POWER_SUPPLY_PROP_ONLINE: val->intval = 1; break; + case POWER_SUPPLY_PROP_PRESENT: + val->intval = dev->battery_present; + break; + case POWER_SUPPLY_PROP_CAPACITY: if (dev->battery_status != HID_BATTERY_REPORTED && !dev->battery_avoid_query) { @@ -577,6 +577,8 @@ static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type, if (quirks & HID_BATTERY_QUIRK_AVOID_QUERY) dev->battery_avoid_query = true; + dev->battery_present = (quirks & HID_BATTERY_QUIRK_DYNAMIC) ? false : true; + dev->battery = power_supply_register(&dev->dev, psy_desc, &psy_cfg); if (IS_ERR(dev->battery)) { error = PTR_ERR(dev->battery); @@ -632,6 +634,7 @@ static void hidinput_update_battery(struct hid_device *dev, unsigned int usage, return; if (hidinput_update_battery_charge_status(dev, usage, value)) { + dev->battery_present = true; power_supply_changed(dev->battery); return; } @@ -647,6 +650,7 @@ static void hidinput_update_battery(struct hid_device *dev, unsigned int usage, if (dev->battery_status != HID_BATTERY_REPORTED || capacity != dev->battery_capacity || ktime_after(ktime_get_coarse(), dev->battery_ratelimit_time)) { + dev->battery_present = true; dev->battery_capacity = capacity; dev->battery_status = HID_BATTERY_REPORTED; dev->battery_ratelimit_time = diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index d40932809ce1ab..d1dea7297712d5 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4487,10 +4487,12 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) if (!ret) ret = hidpp_ff_init(hidpp, &data); - if (ret) + if (ret) { hid_warn(hidpp->hid_dev, "Unable to initialize force feedback support, errno %d\n", ret); + ret = 0; + } } /* @@ -4668,6 +4670,8 @@ static const struct hid_device_id hidpp_devices[] = { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb038) }, { /* Slim Solar+ K980 Keyboard over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb391) }, + { /* MX Master 4 mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb042) }, {} }; diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index b8a748bbf0fd8f..e82a3c4e5b44ef 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -526,12 +526,19 @@ static void mt_get_feature(struct hid_device *hdev, struct hid_report *report) dev_warn(&hdev->dev, "failed to fetch feature %d\n", report->id); } else { + /* The report ID in the request and the response should match */ + if (report->id != buf[0]) { + hid_err(hdev, "Returned feature report did not match the request\n"); + goto free; + } + ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, buf, size, 0); if (ret) dev_warn(&hdev->dev, "failed to report feature\n"); } +free: kfree(buf); } diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-hid.c b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-hid.c index f9fcb398673b20..8075992e8732ef 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-hid.c +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-hid.c @@ -127,6 +127,7 @@ int quicki2c_hid_probe(struct quicki2c_device *qcdev) hid->product = le16_to_cpu(qcdev->dev_desc.product_id); snprintf(hid->name, sizeof(hid->name), "%s %04X:%04X", "quicki2c-hid", hid->vendor, hid->product); + strscpy(hid->phys, dev_name(qcdev->dev), sizeof(hid->phys)); ret = hid_add_device(hid); if (ret) { diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-hid.c b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-hid.c index 82c72bfa2795ea..91d5807b4a8302 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-hid.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-hid.c @@ -118,6 +118,7 @@ int quickspi_hid_probe(struct quickspi_device *qsdev) hid->product = le16_to_cpu(qsdev->dev_desc.product_id); snprintf(hid->name, sizeof(hid->name), "%s %04X:%04X", "quickspi-hid", hid->vendor, hid->product); + strscpy(hid->phys, dev_name(qsdev->dev), sizeof(hid->phys)); ret = hid_add_device(hid); if (ret) { diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 9b2c710f8da182..da1f0ea85625dc 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1208,10 +1208,20 @@ static int wacom_intuos_bt_irq(struct wacom_wac *wacom, size_t len) switch (data[0]) { case 0x04: + if (len < 32) { + dev_warn(wacom->pen_input->dev.parent, + "Report 0x04 too short: %zu bytes\n", len); + break; + } wacom_intuos_bt_process_data(wacom, data + i); i += 10; fallthrough; case 0x03: + if (i == 1 && len < 22) { + dev_warn(wacom->pen_input->dev.parent, + "Report 0x03 too short: %zu bytes\n", len); + break; + } wacom_intuos_bt_process_data(wacom, data + i); i += 10; wacom_intuos_bt_process_data(wacom, data + i); diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c index c28aac0726deb7..fdffd4f002f6fd 100644 --- a/drivers/hv/mshv_regions.c +++ b/drivers/hv/mshv_regions.c @@ -314,15 +314,17 @@ int mshv_region_pin(struct mshv_mem_region *region) ret = pin_user_pages_fast(userspace_addr, nr_pages, FOLL_WRITE | FOLL_LONGTERM, pages); - if (ret < 0) + if (ret != nr_pages) goto release_pages; } return 0; release_pages: + if (ret > 0) + done_count += ret; mshv_region_invalidate_pages(region, 0, done_count); - return ret; + return ret < 0 ? ret : -ENOMEM; } static int mshv_region_chunk_unmap(struct mshv_mem_region *region, diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h index 04c2a1910a8af2..826798f1a8ecf1 100644 --- a/drivers/hv/mshv_root.h +++ b/drivers/hv/mshv_root.h @@ -190,7 +190,6 @@ struct hv_synic_pages { }; struct mshv_root { - struct hv_synic_pages __percpu *synic_pages; spinlock_t pt_ht_lock; DECLARE_HASHTABLE(pt_htable, MSHV_PARTITIONS_HASH_BITS); struct hv_partition_property_vmm_capabilities vmm_caps; @@ -249,8 +248,8 @@ int mshv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb, void mshv_unregister_doorbell(u64 partition_id, int doorbell_portid); void mshv_isr(void); -int mshv_synic_init(unsigned int cpu); -int mshv_synic_cleanup(unsigned int cpu); +int mshv_synic_init(struct device *dev); +void mshv_synic_exit(void); static inline bool mshv_partition_encrypted(struct mshv_partition *partition) { diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c index 82ff823ef0ca92..6f42423f7faa8c 100644 --- a/drivers/hv/mshv_root_main.c +++ b/drivers/hv/mshv_root_main.c @@ -120,7 +120,6 @@ static u16 mshv_passthru_hvcalls[] = { HVCALL_SET_VP_REGISTERS, HVCALL_TRANSLATE_VIRTUAL_ADDRESS, HVCALL_CLEAR_VIRTUAL_INTERRUPT, - HVCALL_SCRUB_PARTITION, HVCALL_REGISTER_INTERCEPT_RESULT, HVCALL_ASSERT_VIRTUAL_INTERRUPT, HVCALL_GET_GPA_PAGES_ACCESS_STATES, @@ -1289,7 +1288,7 @@ static int mshv_prepare_pinned_region(struct mshv_mem_region *region) */ static long mshv_map_user_memory(struct mshv_partition *partition, - struct mshv_user_mem_region mem) + struct mshv_user_mem_region *mem) { struct mshv_mem_region *region; struct vm_area_struct *vma; @@ -1297,12 +1296,12 @@ mshv_map_user_memory(struct mshv_partition *partition, ulong mmio_pfn; long ret; - if (mem.flags & BIT(MSHV_SET_MEM_BIT_UNMAP) || - !access_ok((const void __user *)mem.userspace_addr, mem.size)) + if (mem->flags & BIT(MSHV_SET_MEM_BIT_UNMAP) || + !access_ok((const void __user *)mem->userspace_addr, mem->size)) return -EINVAL; mmap_read_lock(current->mm); - vma = vma_lookup(current->mm, mem.userspace_addr); + vma = vma_lookup(current->mm, mem->userspace_addr); is_mmio = vma ? !!(vma->vm_flags & (VM_IO | VM_PFNMAP)) : 0; mmio_pfn = is_mmio ? vma->vm_pgoff : 0; mmap_read_unlock(current->mm); @@ -1310,7 +1309,7 @@ mshv_map_user_memory(struct mshv_partition *partition, if (!vma) return -EINVAL; - ret = mshv_partition_create_region(partition, &mem, ®ion, + ret = mshv_partition_create_region(partition, mem, ®ion, is_mmio); if (ret) return ret; @@ -1348,32 +1347,32 @@ mshv_map_user_memory(struct mshv_partition *partition, return 0; errout: - vfree(region); + mshv_region_put(region); return ret; } /* Called for unmapping both the guest ram and the mmio space */ static long mshv_unmap_user_memory(struct mshv_partition *partition, - struct mshv_user_mem_region mem) + struct mshv_user_mem_region *mem) { struct mshv_mem_region *region; - if (!(mem.flags & BIT(MSHV_SET_MEM_BIT_UNMAP))) + if (!(mem->flags & BIT(MSHV_SET_MEM_BIT_UNMAP))) return -EINVAL; spin_lock(&partition->pt_mem_regions_lock); - region = mshv_partition_region_by_gfn(partition, mem.guest_pfn); + region = mshv_partition_region_by_gfn(partition, mem->guest_pfn); if (!region) { spin_unlock(&partition->pt_mem_regions_lock); return -ENOENT; } /* Paranoia check */ - if (region->start_uaddr != mem.userspace_addr || - region->start_gfn != mem.guest_pfn || - region->nr_pages != HVPFN_DOWN(mem.size)) { + if (region->start_uaddr != mem->userspace_addr || + region->start_gfn != mem->guest_pfn || + region->nr_pages != HVPFN_DOWN(mem->size)) { spin_unlock(&partition->pt_mem_regions_lock); return -EINVAL; } @@ -1404,9 +1403,9 @@ mshv_partition_ioctl_set_memory(struct mshv_partition *partition, return -EINVAL; if (mem.flags & BIT(MSHV_SET_MEM_BIT_UNMAP)) - return mshv_unmap_user_memory(partition, mem); + return mshv_unmap_user_memory(partition, &mem); - return mshv_map_user_memory(partition, mem); + return mshv_map_user_memory(partition, &mem); } static long @@ -2064,7 +2063,6 @@ mshv_dev_release(struct inode *inode, struct file *filp) return 0; } -static int mshv_cpuhp_online; static int mshv_root_sched_online; static const char *scheduler_type_to_string(enum hv_scheduler_type type) @@ -2249,27 +2247,6 @@ root_scheduler_deinit(void) free_percpu(root_scheduler_output); } -static int mshv_reboot_notify(struct notifier_block *nb, - unsigned long code, void *unused) -{ - cpuhp_remove_state(mshv_cpuhp_online); - return 0; -} - -struct notifier_block mshv_reboot_nb = { - .notifier_call = mshv_reboot_notify, -}; - -static void mshv_root_partition_exit(void) -{ - unregister_reboot_notifier(&mshv_reboot_nb); -} - -static int __init mshv_root_partition_init(struct device *dev) -{ - return register_reboot_notifier(&mshv_reboot_nb); -} - static int __init mshv_init_vmm_caps(struct device *dev) { int ret; @@ -2314,39 +2291,21 @@ static int __init mshv_parent_partition_init(void) MSHV_HV_MAX_VERSION); } - mshv_root.synic_pages = alloc_percpu(struct hv_synic_pages); - if (!mshv_root.synic_pages) { - dev_err(dev, "Failed to allocate percpu synic page\n"); - ret = -ENOMEM; + ret = mshv_synic_init(dev); + if (ret) goto device_deregister; - } - - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mshv_synic", - mshv_synic_init, - mshv_synic_cleanup); - if (ret < 0) { - dev_err(dev, "Failed to setup cpu hotplug state: %i\n", ret); - goto free_synic_pages; - } - - mshv_cpuhp_online = ret; ret = mshv_init_vmm_caps(dev); if (ret) - goto remove_cpu_state; + goto synic_cleanup; ret = mshv_retrieve_scheduler_type(dev); if (ret) - goto remove_cpu_state; - - if (hv_root_partition()) - ret = mshv_root_partition_init(dev); - if (ret) - goto remove_cpu_state; + goto synic_cleanup; ret = root_scheduler_init(dev); if (ret) - goto exit_partition; + goto synic_cleanup; ret = mshv_debugfs_init(); if (ret) @@ -2367,13 +2326,8 @@ static int __init mshv_parent_partition_init(void) mshv_debugfs_exit(); deinit_root_scheduler: root_scheduler_deinit(); -exit_partition: - if (hv_root_partition()) - mshv_root_partition_exit(); -remove_cpu_state: - cpuhp_remove_state(mshv_cpuhp_online); -free_synic_pages: - free_percpu(mshv_root.synic_pages); +synic_cleanup: + mshv_synic_exit(); device_deregister: misc_deregister(&mshv_dev); return ret; @@ -2387,10 +2341,7 @@ static void __exit mshv_parent_partition_exit(void) misc_deregister(&mshv_dev); mshv_irqfd_wq_cleanup(); root_scheduler_deinit(); - if (hv_root_partition()) - mshv_root_partition_exit(); - cpuhp_remove_state(mshv_cpuhp_online); - free_percpu(mshv_root.synic_pages); + mshv_synic_exit(); } module_init(mshv_parent_partition_init); diff --git a/drivers/hv/mshv_synic.c b/drivers/hv/mshv_synic.c index 216065e21d2801..43f1bcbbf2d34d 100644 --- a/drivers/hv/mshv_synic.c +++ b/drivers/hv/mshv_synic.c @@ -10,13 +10,22 @@ #include #include #include +#include #include #include +#include +#include #include +#include #include "mshv_eventfd.h" #include "mshv.h" +static int synic_cpuhp_online; +static struct hv_synic_pages __percpu *synic_pages; +static int mshv_sint_vector = -1; /* hwirq for the SynIC SINTs */ +static int mshv_sint_irq = -1; /* Linux IRQ for mshv_sint_vector */ + static u32 synic_event_ring_get_queued_port(u32 sint_index) { struct hv_synic_event_ring_page **event_ring_page; @@ -26,7 +35,7 @@ static u32 synic_event_ring_get_queued_port(u32 sint_index) u32 message; u8 tail; - spages = this_cpu_ptr(mshv_root.synic_pages); + spages = this_cpu_ptr(synic_pages); event_ring_page = &spages->synic_event_ring_page; synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail); @@ -393,7 +402,7 @@ mshv_intercept_isr(struct hv_message *msg) void mshv_isr(void) { - struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages); + struct hv_synic_pages *spages = this_cpu_ptr(synic_pages); struct hv_message_page **msg_page = &spages->hyp_synic_message_page; struct hv_message *msg; bool handled; @@ -437,25 +446,21 @@ void mshv_isr(void) if (msg->header.message_flags.msg_pending) hv_set_non_nested_msr(HV_MSR_EOM, 0); -#ifdef HYPERVISOR_CALLBACK_VECTOR - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR); -#endif + add_interrupt_randomness(mshv_sint_vector); } else { pr_warn_once("%s: unknown message type 0x%x\n", __func__, msg->header.message_type); } } -int mshv_synic_init(unsigned int cpu) +static int mshv_synic_cpu_init(unsigned int cpu) { union hv_synic_simp simp; union hv_synic_siefp siefp; union hv_synic_sirbp sirbp; -#ifdef HYPERVISOR_CALLBACK_VECTOR union hv_synic_sint sint; -#endif union hv_synic_scontrol sctrl; - struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages); + struct hv_synic_pages *spages = this_cpu_ptr(synic_pages); struct hv_message_page **msg_page = &spages->hyp_synic_message_page; struct hv_synic_event_flags_page **event_flags_page = &spages->synic_event_flags_page; @@ -496,10 +501,12 @@ int mshv_synic_init(unsigned int cpu) hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64); -#ifdef HYPERVISOR_CALLBACK_VECTOR + if (mshv_sint_irq != -1) + enable_percpu_irq(mshv_sint_irq, 0); + /* Enable intercepts */ sint.as_uint64 = 0; - sint.vector = HYPERVISOR_CALLBACK_VECTOR; + sint.vector = mshv_sint_vector; sint.masked = false; sint.auto_eoi = hv_recommend_using_aeoi(); hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX, @@ -507,13 +514,12 @@ int mshv_synic_init(unsigned int cpu) /* Doorbell SINT */ sint.as_uint64 = 0; - sint.vector = HYPERVISOR_CALLBACK_VECTOR; + sint.vector = mshv_sint_vector; sint.masked = false; sint.as_intercept = 1; sint.auto_eoi = hv_recommend_using_aeoi(); hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX, sint.as_uint64); -#endif /* Enable global synic bit */ sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL); @@ -542,14 +548,14 @@ int mshv_synic_init(unsigned int cpu) return -EFAULT; } -int mshv_synic_cleanup(unsigned int cpu) +static int mshv_synic_cpu_exit(unsigned int cpu) { union hv_synic_sint sint; union hv_synic_simp simp; union hv_synic_siefp siefp; union hv_synic_sirbp sirbp; union hv_synic_scontrol sctrl; - struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages); + struct hv_synic_pages *spages = this_cpu_ptr(synic_pages); struct hv_message_page **msg_page = &spages->hyp_synic_message_page; struct hv_synic_event_flags_page **event_flags_page = &spages->synic_event_flags_page; @@ -568,6 +574,9 @@ int mshv_synic_cleanup(unsigned int cpu) hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX, sint.as_uint64); + if (mshv_sint_irq != -1) + disable_percpu_irq(mshv_sint_irq); + /* Disable Synic's event ring page */ sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP); sirbp.sirbp_enabled = false; @@ -663,3 +672,152 @@ mshv_unregister_doorbell(u64 partition_id, int doorbell_portid) mshv_portid_free(doorbell_portid); } + +static int mshv_synic_reboot_notify(struct notifier_block *nb, + unsigned long code, void *unused) +{ + if (!hv_root_partition()) + return 0; + + cpuhp_remove_state(synic_cpuhp_online); + return 0; +} + +static struct notifier_block mshv_synic_reboot_nb = { + .notifier_call = mshv_synic_reboot_notify, +}; + +#ifndef HYPERVISOR_CALLBACK_VECTOR +static DEFINE_PER_CPU(long, mshv_evt); + +static irqreturn_t mshv_percpu_isr(int irq, void *dev_id) +{ + mshv_isr(); + return IRQ_HANDLED; +} + +#ifdef CONFIG_ACPI +static int __init mshv_acpi_setup_sint_irq(void) +{ + return acpi_register_gsi(NULL, mshv_sint_vector, ACPI_EDGE_SENSITIVE, + ACPI_ACTIVE_HIGH); +} + +static void mshv_acpi_cleanup_sint_irq(void) +{ + acpi_unregister_gsi(mshv_sint_vector); +} +#else +static int __init mshv_acpi_setup_sint_irq(void) +{ + return -ENODEV; +} + +static void mshv_acpi_cleanup_sint_irq(void) +{ +} +#endif + +static int __init mshv_sint_vector_setup(void) +{ + int ret; + struct hv_register_assoc reg = { + .name = HV_ARM64_REGISTER_SINT_RESERVED_INTERRUPT_ID, + }; + union hv_input_vtl input_vtl = { 0 }; + + if (acpi_disabled) + return -ENODEV; + + ret = hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF, + 1, input_vtl, ®); + if (ret || !reg.value.reg64) + return -ENODEV; + + mshv_sint_vector = reg.value.reg64; + ret = mshv_acpi_setup_sint_irq(); + if (ret < 0) { + pr_err("Failed to setup IRQ for MSHV SINT vector %d: %d\n", + mshv_sint_vector, ret); + goto out_fail; + } + + mshv_sint_irq = ret; + + ret = request_percpu_irq(mshv_sint_irq, mshv_percpu_isr, "MSHV", + &mshv_evt); + if (ret) + goto out_unregister; + + return 0; + +out_unregister: + mshv_acpi_cleanup_sint_irq(); +out_fail: + return ret; +} + +static void mshv_sint_vector_cleanup(void) +{ + free_percpu_irq(mshv_sint_irq, &mshv_evt); + mshv_acpi_cleanup_sint_irq(); +} +#else /* !HYPERVISOR_CALLBACK_VECTOR */ +static int __init mshv_sint_vector_setup(void) +{ + mshv_sint_vector = HYPERVISOR_CALLBACK_VECTOR; + return 0; +} + +static void mshv_sint_vector_cleanup(void) +{ +} +#endif /* HYPERVISOR_CALLBACK_VECTOR */ + +int __init mshv_synic_init(struct device *dev) +{ + int ret = 0; + + ret = mshv_sint_vector_setup(); + if (ret) + return ret; + + synic_pages = alloc_percpu(struct hv_synic_pages); + if (!synic_pages) { + dev_err(dev, "Failed to allocate percpu synic page\n"); + ret = -ENOMEM; + goto sint_vector_cleanup; + } + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mshv_synic", + mshv_synic_cpu_init, + mshv_synic_cpu_exit); + if (ret < 0) { + dev_err(dev, "Failed to setup cpu hotplug state: %i\n", ret); + goto free_synic_pages; + } + + synic_cpuhp_online = ret; + + ret = register_reboot_notifier(&mshv_synic_reboot_nb); + if (ret) + goto remove_cpuhp_state; + + return 0; + +remove_cpuhp_state: + cpuhp_remove_state(synic_cpuhp_online); +free_synic_pages: + free_percpu(synic_pages); +sint_vector_cleanup: + mshv_sint_vector_cleanup(); + return ret; +} + +void mshv_synic_exit(void) +{ + unregister_reboot_notifier(&mshv_synic_reboot_nb); + cpuhp_remove_state(synic_cpuhp_online); + free_percpu(synic_pages); + mshv_sint_vector_cleanup(); +} diff --git a/drivers/hwmon/axi-fan-control.c b/drivers/hwmon/axi-fan-control.c index b7bb325c3ad966..01590dfa55e60d 100644 --- a/drivers/hwmon/axi-fan-control.c +++ b/drivers/hwmon/axi-fan-control.c @@ -507,7 +507,7 @@ static int axi_fan_control_probe(struct platform_device *pdev) ret = devm_request_threaded_irq(&pdev->dev, ctl->irq, NULL, axi_fan_control_irq_handler, IRQF_ONESHOT | IRQF_TRIGGER_HIGH, - pdev->driver_override, ctl); + NULL, ctl); if (ret) return dev_err_probe(&pdev->dev, ret, "failed to request an irq\n"); diff --git a/drivers/hwmon/max6639.c b/drivers/hwmon/max6639.c index 9a3c515efe2ed3..163d31f17bd49a 100644 --- a/drivers/hwmon/max6639.c +++ b/drivers/hwmon/max6639.c @@ -232,7 +232,7 @@ static int max6639_read_fan(struct device *dev, u32 attr, int channel, static int max6639_set_ppr(struct max6639_data *data, int channel, u8 ppr) { /* Decrement the PPR value and shift left by 6 to match the register format */ - return regmap_write(data->regmap, MAX6639_REG_FAN_PPR(channel), ppr-- << 6); + return regmap_write(data->regmap, MAX6639_REG_FAN_PPR(channel), --ppr << 6); } static int max6639_write_fan(struct device *dev, u32 attr, int channel, @@ -524,8 +524,8 @@ static int max6639_probe_child_from_dt(struct i2c_client *client, { struct device *dev = &client->dev; - u32 i; - int err, val; + u32 i, val; + int err; err = of_property_read_u32(child, "reg", &i); if (err) { @@ -540,8 +540,8 @@ static int max6639_probe_child_from_dt(struct i2c_client *client, err = of_property_read_u32(child, "pulses-per-revolution", &val); if (!err) { - if (val < 1 || val > 5) { - dev_err(dev, "invalid pulses-per-revolution %d of %pOFn\n", val, child); + if (val < 1 || val > 4) { + dev_err(dev, "invalid pulses-per-revolution %u of %pOFn\n", val, child); return -EINVAL; } data->ppr[i] = val; diff --git a/drivers/hwmon/pmbus/hac300s.c b/drivers/hwmon/pmbus/hac300s.c index 0a1d52cae91ed4..a073db1cfe2e49 100644 --- a/drivers/hwmon/pmbus/hac300s.c +++ b/drivers/hwmon/pmbus/hac300s.c @@ -58,6 +58,8 @@ static int hac300s_read_word_data(struct i2c_client *client, int page, case PMBUS_MFR_VOUT_MIN: case PMBUS_READ_VOUT: rv = pmbus_read_word_data(client, page, phase, reg); + if (rv < 0) + return rv; return FIELD_GET(LINEAR11_MANTISSA_MASK, rv); default: return -ENODATA; diff --git a/drivers/hwmon/pmbus/ina233.c b/drivers/hwmon/pmbus/ina233.c index dde1e16783943a..2d8b5a5347edc3 100644 --- a/drivers/hwmon/pmbus/ina233.c +++ b/drivers/hwmon/pmbus/ina233.c @@ -67,6 +67,8 @@ static int ina233_read_word_data(struct i2c_client *client, int page, switch (reg) { case PMBUS_VIRT_READ_VMON: ret = pmbus_read_word_data(client, 0, 0xff, MFR_READ_VSHUNT); + if (ret < 0) + return ret; /* Adjust returned value to match VIN coefficients */ /* VIN: 1.25 mV VSHUNT: 2.5 uV LSB */ diff --git a/drivers/hwmon/pmbus/isl68137.c b/drivers/hwmon/pmbus/isl68137.c index 97b61836f53a45..e7dac26b5be615 100644 --- a/drivers/hwmon/pmbus/isl68137.c +++ b/drivers/hwmon/pmbus/isl68137.c @@ -98,8 +98,11 @@ static ssize_t isl68137_avs_enable_show_page(struct i2c_client *client, { int val = pmbus_read_byte_data(client, page, PMBUS_OPERATION); - return sprintf(buf, "%d\n", - (val & ISL68137_VOUT_AVS) == ISL68137_VOUT_AVS ? 1 : 0); + if (val < 0) + return val; + + return sysfs_emit(buf, "%d\n", + (val & ISL68137_VOUT_AVS) == ISL68137_VOUT_AVS); } static ssize_t isl68137_avs_enable_store_page(struct i2c_client *client, diff --git a/drivers/hwmon/pmbus/mp2869.c b/drivers/hwmon/pmbus/mp2869.c index cc69a1e91dfe8a..4647892e511214 100644 --- a/drivers/hwmon/pmbus/mp2869.c +++ b/drivers/hwmon/pmbus/mp2869.c @@ -165,7 +165,7 @@ static int mp2869_read_byte_data(struct i2c_client *client, int page, int reg) { const struct pmbus_driver_info *info = pmbus_get_driver_info(client); struct mp2869_data *data = to_mp2869_data(info); - int ret; + int ret, mfr; switch (reg) { case PMBUS_VOUT_MODE: @@ -188,11 +188,14 @@ static int mp2869_read_byte_data(struct i2c_client *client, int page, int reg) if (ret < 0) return ret; + mfr = pmbus_read_byte_data(client, page, + PMBUS_STATUS_MFR_SPECIFIC); + if (mfr < 0) + return mfr; + ret = (ret & ~GENMASK(2, 2)) | FIELD_PREP(GENMASK(2, 2), - FIELD_GET(GENMASK(1, 1), - pmbus_read_byte_data(client, page, - PMBUS_STATUS_MFR_SPECIFIC))); + FIELD_GET(GENMASK(1, 1), mfr)); break; case PMBUS_STATUS_TEMPERATURE: /* @@ -207,15 +210,16 @@ static int mp2869_read_byte_data(struct i2c_client *client, int page, int reg) if (ret < 0) return ret; + mfr = pmbus_read_byte_data(client, page, + PMBUS_STATUS_MFR_SPECIFIC); + if (mfr < 0) + return mfr; + ret = (ret & ~GENMASK(7, 6)) | FIELD_PREP(GENMASK(6, 6), - FIELD_GET(GENMASK(1, 1), - pmbus_read_byte_data(client, page, - PMBUS_STATUS_MFR_SPECIFIC))) | + FIELD_GET(GENMASK(1, 1), mfr)) | FIELD_PREP(GENMASK(7, 7), - FIELD_GET(GENMASK(1, 1), - pmbus_read_byte_data(client, page, - PMBUS_STATUS_MFR_SPECIFIC))); + FIELD_GET(GENMASK(1, 1), mfr)); break; default: ret = -ENODATA; @@ -230,7 +234,7 @@ static int mp2869_read_word_data(struct i2c_client *client, int page, int phase, { const struct pmbus_driver_info *info = pmbus_get_driver_info(client); struct mp2869_data *data = to_mp2869_data(info); - int ret; + int ret, mfr; switch (reg) { case PMBUS_STATUS_WORD: @@ -246,11 +250,14 @@ static int mp2869_read_word_data(struct i2c_client *client, int page, int phase, if (ret < 0) return ret; + mfr = pmbus_read_byte_data(client, page, + PMBUS_STATUS_MFR_SPECIFIC); + if (mfr < 0) + return mfr; + ret = (ret & ~GENMASK(2, 2)) | FIELD_PREP(GENMASK(2, 2), - FIELD_GET(GENMASK(1, 1), - pmbus_read_byte_data(client, page, - PMBUS_STATUS_MFR_SPECIFIC))); + FIELD_GET(GENMASK(1, 1), mfr)); break; case PMBUS_READ_VIN: /* diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c index c31982d8519623..d0bc47b12cb07d 100644 --- a/drivers/hwmon/pmbus/mp2975.c +++ b/drivers/hwmon/pmbus/mp2975.c @@ -313,6 +313,8 @@ static int mp2973_read_word_data(struct i2c_client *client, int page, case PMBUS_STATUS_WORD: /* MP2973 & MP2971 return PGOOD instead of PB_STATUS_POWER_GOOD_N. */ ret = pmbus_read_word_data(client, page, phase, reg); + if (ret < 0) + return ret; ret ^= PB_STATUS_POWER_GOOD_N; break; case PMBUS_OT_FAULT_LIMIT: diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index e11d50750e635d..7cb6b9b864a74e 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -1213,6 +1213,8 @@ config I2C_TEGRA tristate "NVIDIA Tegra internal I2C controller" depends on ARCH_TEGRA || (COMPILE_TEST && (ARC || ARM || ARM64 || M68K || RISCV || SUPERH || SPARC)) # COMPILE_TEST needs architectures with readsX()/writesX() primitives + depends on PINCTRL + # ARCH_TEGRA implies PINCTRL, but the COMPILE_TEST side doesn't. help If you say yes to this option, support will be included for the I2C controller embedded in NVIDIA Tegra SOCs diff --git a/drivers/i2c/busses/i2c-cp2615.c b/drivers/i2c/busses/i2c-cp2615.c index e2d7cd2390fc4a..8212875700e15d 100644 --- a/drivers/i2c/busses/i2c-cp2615.c +++ b/drivers/i2c/busses/i2c-cp2615.c @@ -298,6 +298,9 @@ cp2615_i2c_probe(struct usb_interface *usbif, const struct usb_device_id *id) if (!adap) return -ENOMEM; + if (!usbdev->serial) + return -EINVAL; + strscpy(adap->name, usbdev->serial, sizeof(adap->name)); adap->owner = THIS_MODULE; adap->dev.parent = &usbif->dev; diff --git a/drivers/i2c/busses/i2c-fsi.c b/drivers/i2c/busses/i2c-fsi.c index 82c87e04ac6fee..b2dc5ae1d0e4b8 100644 --- a/drivers/i2c/busses/i2c-fsi.c +++ b/drivers/i2c/busses/i2c-fsi.c @@ -729,6 +729,7 @@ static int fsi_i2c_probe(struct fsi_device *fsi_dev) rc = i2c_add_adapter(&port->adapter); if (rc < 0) { dev_err(dev, "Failed to register adapter: %d\n", rc); + of_node_put(np); kfree(port); continue; } diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c index 09af3b3625f110..f55840b2eb9ab7 100644 --- a/drivers/i2c/busses/i2c-pxa.c +++ b/drivers/i2c/busses/i2c-pxa.c @@ -268,6 +268,7 @@ struct pxa_i2c { struct pinctrl *pinctrl; struct pinctrl_state *pinctrl_default; struct pinctrl_state *pinctrl_recovery; + bool reset_before_xfer; }; #define _IBMR(i2c) ((i2c)->reg_ibmr) @@ -1144,6 +1145,11 @@ static int i2c_pxa_xfer(struct i2c_adapter *adap, { struct pxa_i2c *i2c = adap->algo_data; + if (i2c->reset_before_xfer) { + i2c_pxa_reset(i2c); + i2c->reset_before_xfer = false; + } + return i2c_pxa_internal_xfer(i2c, msgs, num, i2c_pxa_do_xfer); } @@ -1521,7 +1527,16 @@ static int i2c_pxa_probe(struct platform_device *dev) } } - i2c_pxa_reset(i2c); + /* + * Skip reset on Armada 3700 when recovery is used to avoid + * controller hang due to the pinctrl state changes done by + * the generic recovery initialization code. The reset will + * be performed later, prior to the first transfer. + */ + if (i2c_type == REGS_A3700 && i2c->adap.bus_recovery_info) + i2c->reset_before_xfer = true; + else + i2c_pxa_reset(i2c); ret = i2c_add_numbered_adapter(&i2c->adap); if (ret < 0) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index bec619b9af4e33..4eaeb395d5db19 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -2047,8 +2047,11 @@ static int tegra_i2c_probe(struct platform_device *pdev) * * VI I2C device shouldn't be marked as IRQ-safe because VI I2C won't * be used for atomic transfers. ACPI device is not IRQ safe also. + * + * Devices with pinctrl states cannot be marked IRQ-safe as the pinctrl + * state transitions during runtime PM require mutexes. */ - if (!IS_VI(i2c_dev) && !has_acpi_companion(i2c_dev->dev)) + if (!IS_VI(i2c_dev) && !has_acpi_companion(i2c_dev->dev) && !i2c_dev->dev->pins) pm_runtime_irq_safe(i2c_dev->dev); pm_runtime_enable(i2c_dev->dev); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index cff4fcca2c345a..edc34c69f0f235 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -55,7 +55,8 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d if (dirty) ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt, - DMA_BIDIRECTIONAL, 0); + DMA_BIDIRECTIONAL, + DMA_ATTR_REQUIRE_COHERENT); for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) { unpin_user_page_range_dirty_lock(sg_page(sg), @@ -169,7 +170,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, unsigned long lock_limit; unsigned long new_pinned; unsigned long cur_base; - unsigned long dma_attr = 0; + unsigned long dma_attr = DMA_ATTR_REQUIRE_COHERENT; struct mm_struct *mm; unsigned long npages; int pinned, ret; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 81c4d77338722e..760d5f4623b553 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2909,8 +2909,21 @@ static struct iommu_domain blocked_domain = { static struct protection_domain identity_domain; +static int amd_iommu_identity_attach(struct iommu_domain *dom, struct device *dev, + struct iommu_domain *old) +{ + /* + * Don't allow attaching a device to the identity domain if SNP is + * enabled. + */ + if (amd_iommu_snp_en) + return -EINVAL; + + return amd_iommu_attach_device(dom, dev, old); +} + static const struct iommu_domain_ops identity_domain_ops = { - .attach_dev = amd_iommu_attach_device, + .attach_dev = amd_iommu_identity_attach, }; void amd_iommu_init_identity_domain(void) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 5dac64be61bb27..94d5141696424d 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1211,7 +1211,7 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, */ if (dev_use_swiotlb(dev, size, dir) && iova_unaligned(iovad, phys, size)) { - if (attrs & DMA_ATTR_MMIO) + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) return DMA_MAPPING_ERROR; phys = iommu_dma_map_swiotlb(dev, phys, size, dir, attrs); @@ -1223,7 +1223,8 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, arch_sync_dma_for_device(phys, size, dir); iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); - if (iova == DMA_MAPPING_ERROR && !(attrs & DMA_ATTR_MMIO)) + if (iova == DMA_MAPPING_ERROR && + !(attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))) swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); return iova; } @@ -1233,7 +1234,7 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle, { phys_addr_t phys; - if (attrs & DMA_ATTR_MMIO) { + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) { __iommu_dma_unmap(dev, dma_handle, size); return; } @@ -1945,9 +1946,21 @@ int dma_iova_link(struct device *dev, struct dma_iova_state *state, if (WARN_ON_ONCE(iova_start_pad && offset > 0)) return -EIO; + /* + * DMA_IOVA_USE_SWIOTLB is set on state after some entry + * took SWIOTLB path, which we were supposed to prevent + * for DMA_ATTR_REQUIRE_COHERENT attribute. + */ + if (WARN_ON_ONCE((state->__size & DMA_IOVA_USE_SWIOTLB) && + (attrs & DMA_ATTR_REQUIRE_COHERENT))) + return -EOPNOTSUPP; + + if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT)) + return -EOPNOTSUPP; + if (dev_use_swiotlb(dev, size, dir) && iova_unaligned(iovad, phys, size)) { - if (attrs & DMA_ATTR_MMIO) + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) return -EPERM; return iommu_dma_iova_link_swiotlb(dev, state, phys, offset, diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index d68c06025cac2f..69222dbd2af0ea 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1314,7 +1314,6 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) if (fault & DMA_FSTS_ITE) { head = readl(iommu->reg + DMAR_IQH_REG); head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH; - head |= 1; tail = readl(iommu->reg + DMAR_IQT_REG); tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH; @@ -1331,7 +1330,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) do { if (qi->desc_status[head] == QI_IN_USE) qi->desc_status[head] = QI_ABORT; - head = (head - 2 + QI_LENGTH) % QI_LENGTH; + head = (head - 1 + QI_LENGTH) % QI_LENGTH; } while (head != tail); /* diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index fea10acd4f021f..57cd1db7207ab9 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -164,9 +164,12 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, if (IS_ERR(dev_pasid)) return PTR_ERR(dev_pasid); - ret = iopf_for_domain_replace(domain, old, dev); - if (ret) - goto out_remove_dev_pasid; + /* SVA with non-IOMMU/PRI IOPF handling is allowed. */ + if (info->pri_supported) { + ret = iopf_for_domain_replace(domain, old, dev); + if (ret) + goto out_remove_dev_pasid; + } /* Setup the pasid table: */ sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; @@ -181,7 +184,8 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, return 0; out_unwind_iopf: - iopf_for_domain_replace(old, domain, dev); + if (info->pri_supported) + iopf_for_domain_replace(old, domain, dev); out_remove_dev_pasid: domain_remove_dev_pasid(domain, dev, pasid); return ret; diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 07d64908a05fd7..bc7c7232a43e2d 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -182,13 +182,13 @@ void iommu_sva_unbind_device(struct iommu_sva *handle) iommu_detach_device_pasid(domain, dev, iommu_mm->pasid); if (--domain->users == 0) { list_del(&domain->next); - iommu_domain_free(domain); - } + if (list_empty(&iommu_mm->sva_domains)) { + list_del(&iommu_mm->mm_list_elm); + if (list_empty(&iommu_sva_mms)) + iommu_sva_present = false; + } - if (list_empty(&iommu_mm->sva_domains)) { - list_del(&iommu_mm->mm_list_elm); - if (list_empty(&iommu_sva_mms)) - iommu_sva_present = false; + iommu_domain_free(domain); } mutex_unlock(&iommu_sva_lock); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 35db5178095404..50718ab810a413 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1213,7 +1213,11 @@ static int iommu_create_device_direct_mappings(struct iommu_domain *domain, if (addr == end) goto map_end; - phys_addr = iommu_iova_to_phys(domain, addr); + /* + * Return address by iommu_iova_to_phys for 0 is + * ambiguous. Offset to address 1 if addr is 0. + */ + phys_addr = iommu_iova_to_phys(domain, addr ? addr : 1); if (!phys_addr) { map_size += pg_size; continue; diff --git a/drivers/irqchip/irq-riscv-rpmi-sysmsi.c b/drivers/irqchip/irq-riscv-rpmi-sysmsi.c index 5c74c561ce3161..612f3972f7af03 100644 --- a/drivers/irqchip/irq-riscv-rpmi-sysmsi.c +++ b/drivers/irqchip/irq-riscv-rpmi-sysmsi.c @@ -250,6 +250,7 @@ static int rpmi_sysmsi_probe(struct platform_device *pdev) rc = riscv_acpi_get_gsi_info(fwnode, &priv->gsi_base, &id, &nr_irqs, NULL); if (rc) { + mbox_free_channel(priv->chan); dev_err(dev, "failed to find GSI mapping\n"); return rc; } diff --git a/drivers/media/mc/mc-request.c b/drivers/media/mc/mc-request.c index c7c7d4a86c6bc5..13e77648807c29 100644 --- a/drivers/media/mc/mc-request.c +++ b/drivers/media/mc/mc-request.c @@ -192,6 +192,8 @@ static long media_request_ioctl_reinit(struct media_request *req) struct media_device *mdev = req->mdev; unsigned long flags; + mutex_lock(&mdev->req_queue_mutex); + spin_lock_irqsave(&req->lock, flags); if (req->state != MEDIA_REQUEST_STATE_IDLE && req->state != MEDIA_REQUEST_STATE_COMPLETE) { @@ -199,6 +201,7 @@ static long media_request_ioctl_reinit(struct media_request *req) "request: %s not in idle or complete state, cannot reinit\n", req->debug_str); spin_unlock_irqrestore(&req->lock, flags); + mutex_unlock(&mdev->req_queue_mutex); return -EBUSY; } if (req->access_count) { @@ -206,6 +209,7 @@ static long media_request_ioctl_reinit(struct media_request *req) "request: %s is being accessed, cannot reinit\n", req->debug_str); spin_unlock_irqrestore(&req->lock, flags); + mutex_unlock(&mdev->req_queue_mutex); return -EBUSY; } req->state = MEDIA_REQUEST_STATE_CLEANING; @@ -216,6 +220,7 @@ static long media_request_ioctl_reinit(struct media_request *req) spin_lock_irqsave(&req->lock, flags); req->state = MEDIA_REQUEST_STATE_IDLE; spin_unlock_irqrestore(&req->lock, flags); + mutex_unlock(&mdev->req_queue_mutex); return 0; } diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c b/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c index 28267ee3019030..3119f3bc9f98bd 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c @@ -500,11 +500,15 @@ void rkvdec_hevc_run_preamble(struct rkvdec_ctx *ctx, ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS); run->ext_sps_st_rps = ctrl ? ctrl->p_cur.p : NULL; + } else { + run->ext_sps_st_rps = NULL; } if (ctx->has_sps_lt_rps) { ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS); run->ext_sps_lt_rps = ctrl ? ctrl->p_cur.p : NULL; + } else { + run->ext_sps_lt_rps = NULL; } rkvdec_run_preamble(ctx, &run->base); diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c b/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c index 97f1efde2e47e6..fb4f849d736684 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c @@ -130,7 +130,7 @@ struct rkvdec_h264_ctx { struct vdpu383_regs_h26x regs; }; -static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb) +static noinline_for_stack void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb) { pps->top_field_order_cnt0 = dpb[0].top_field_order_cnt; pps->bot_field_order_cnt0 = dpb[0].bottom_field_order_cnt; @@ -166,6 +166,31 @@ static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_d pps->bot_field_order_cnt15 = dpb[15].bottom_field_order_cnt; } +static noinline_for_stack void set_dec_params(struct rkvdec_pps *pps, const struct v4l2_ctrl_h264_decode_params *dec_params) +{ + const struct v4l2_h264_dpb_entry *dpb = dec_params->dpb; + + for (int i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) { + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) + pps->is_longterm |= (1 << i); + pps->ref_field_flags |= + (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i; + pps->ref_colmv_use_flag |= + (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i; + pps->ref_topfield_used |= + (!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i; + pps->ref_botfield_used |= + (!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i; + } + pps->pic_field_flag = + !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC); + pps->pic_associated_flag = + !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD); + + pps->cur_top_field = dec_params->top_field_order_cnt; + pps->cur_bot_field = dec_params->bottom_field_order_cnt; +} + static void assemble_hw_pps(struct rkvdec_ctx *ctx, struct rkvdec_h264_run *run) { @@ -177,7 +202,6 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx, struct rkvdec_h264_priv_tbl *priv_tbl = h264_ctx->priv_tbl.cpu; struct rkvdec_sps_pps *hw_ps; u32 pic_width, pic_height; - u32 i; /* * HW read the SPS/PPS information from PPS packet index by PPS id. @@ -261,28 +285,8 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx, !!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT); set_field_order_cnt(&hw_ps->pps, dpb); + set_dec_params(&hw_ps->pps, dec_params); - for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) { - if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) - hw_ps->pps.is_longterm |= (1 << i); - - hw_ps->pps.ref_field_flags |= - (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i; - hw_ps->pps.ref_colmv_use_flag |= - (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i; - hw_ps->pps.ref_topfield_used |= - (!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i; - hw_ps->pps.ref_botfield_used |= - (!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i; - } - - hw_ps->pps.pic_field_flag = - !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC); - hw_ps->pps.pic_associated_flag = - !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD); - - hw_ps->pps.cur_top_field = dec_params->top_field_order_cnt; - hw_ps->pps.cur_bot_field = dec_params->bottom_field_order_cnt; } static void rkvdec_write_regs(struct rkvdec_ctx *ctx) diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c b/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c index e4cdd2122873e3..2751f5396ee801 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c @@ -893,7 +893,8 @@ static void rkvdec_vp9_done(struct rkvdec_ctx *ctx, update_ctx_last_info(vp9_ctx); } -static void rkvdec_init_v4l2_vp9_count_tbl(struct rkvdec_ctx *ctx) +static noinline_for_stack void +rkvdec_init_v4l2_vp9_count_tbl(struct rkvdec_ctx *ctx) { struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv; struct rkvdec_vp9_intra_frame_symbol_counts *intra_cnts = vp9_ctx->count_tbl.cpu; diff --git a/drivers/media/platform/synopsys/Kconfig b/drivers/media/platform/synopsys/Kconfig index e798ec00b18975..bf2ac092fbb39d 100644 --- a/drivers/media/platform/synopsys/Kconfig +++ b/drivers/media/platform/synopsys/Kconfig @@ -7,6 +7,7 @@ config VIDEO_DW_MIPI_CSI2RX depends on VIDEO_DEV depends on V4L_PLATFORM_DRIVERS depends on PM && COMMON_CLK + select GENERIC_PHY_MIPI_DPHY select MEDIA_CONTROLLER select V4L2_FWNODE select VIDEO_V4L2_SUBDEV_API diff --git a/drivers/media/platform/synopsys/dw-mipi-csi2rx.c b/drivers/media/platform/synopsys/dw-mipi-csi2rx.c index 170346ae1a599e..4d96171a650b6d 100644 --- a/drivers/media/platform/synopsys/dw-mipi-csi2rx.c +++ b/drivers/media/platform/synopsys/dw-mipi-csi2rx.c @@ -301,7 +301,7 @@ dw_mipi_csi2rx_enum_mbus_code(struct v4l2_subdev *sd, return 0; case DW_MIPI_CSI2RX_PAD_SINK: - if (code->index > csi2->formats_num) + if (code->index >= csi2->formats_num) return -EINVAL; code->code = csi2->formats[code->index].code; diff --git a/drivers/media/platform/verisilicon/imx8m_vpu_hw.c b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c index 6f8e43b7f1575b..fa4224de4b996a 100644 --- a/drivers/media/platform/verisilicon/imx8m_vpu_hw.c +++ b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c @@ -343,7 +343,7 @@ const struct hantro_variant imx8mq_vpu_variant = { .num_regs = ARRAY_SIZE(imx8mq_reg_names) }; -static const struct of_device_id imx8mq_vpu_shared_resources[] __initconst = { +static const struct of_device_id imx8mq_vpu_shared_resources[] = { { .compatible = "nxp,imx8mq-vpu-g1", }, { .compatible = "nxp,imx8mq-vpu-g2", }, { /* sentinel */ } diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 37d33d4a363d7e..a2b650f4ec3c32 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -3082,13 +3082,14 @@ static long __video_do_ioctl(struct file *file, } /* - * We need to serialize streamon/off with queueing new requests. + * We need to serialize streamon/off/reqbufs with queueing new requests. * These ioctls may trigger the cancellation of a streaming * operation, and that should not be mixed with queueing a new * request at the same time. */ if (v4l2_device_supports_requests(vfd->v4l2_dev) && - (cmd == VIDIOC_STREAMON || cmd == VIDIOC_STREAMOFF)) { + (cmd == VIDIOC_STREAMON || cmd == VIDIOC_STREAMOFF || + cmd == VIDIOC_REQBUFS)) { req_queue_lock = &vfd->v4l2_dev->mdev->req_queue_mutex; if (mutex_lock_interruptible(req_queue_lock)) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index b0f91cc9e40e43..6e4084407662a1 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -68,6 +68,9 @@ #define GLI_9750_MISC_TX1_DLY_VALUE 0x5 #define SDHCI_GLI_9750_MISC_SSC_OFF BIT(26) +#define SDHCI_GLI_9750_GM_BURST_SIZE 0x510 +#define SDHCI_GLI_9750_GM_BURST_SIZE_R_OSRC_LMT GENMASK(17, 16) + #define SDHCI_GLI_9750_TUNING_CONTROL 0x540 #define SDHCI_GLI_9750_TUNING_CONTROL_EN BIT(4) #define GLI_9750_TUNING_CONTROL_EN_ON 0x1 @@ -345,10 +348,16 @@ static void gli_set_9750(struct sdhci_host *host) u32 misc_value; u32 parameter_value; u32 control_value; + u32 burst_value; u16 ctrl2; gl9750_wt_on(host); + /* clear R_OSRC_Lmt to avoid DMA write corruption */ + burst_value = sdhci_readl(host, SDHCI_GLI_9750_GM_BURST_SIZE); + burst_value &= ~SDHCI_GLI_9750_GM_BURST_SIZE_R_OSRC_LMT; + sdhci_writel(host, burst_value, SDHCI_GLI_9750_GM_BURST_SIZE); + driving_value = sdhci_readl(host, SDHCI_GLI_9750_DRIVING); pll_value = sdhci_readl(host, SDHCI_GLI_9750_PLL); sw_ctrl_value = sdhci_readl(host, SDHCI_GLI_9750_SW_CTRL); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index ac7e11f37af71f..fec9329e1edbed 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -4532,8 +4532,15 @@ int sdhci_setup_host(struct sdhci_host *host) * their platform code before calling sdhci_add_host(), and we * won't assume 8-bit width for hosts without that CAP. */ - if (!(host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA)) + if (host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA) { + host->caps1 &= ~(SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_DDR50); + if (host->quirks2 & SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400) + host->caps1 &= ~SDHCI_SUPPORT_HS400; + mmc->caps2 &= ~(MMC_CAP2_HS200 | MMC_CAP2_HS400 | MMC_CAP2_HS400_ES); + mmc->caps &= ~(MMC_CAP_DDR | MMC_CAP_UHS); + } else { mmc->caps |= MMC_CAP_4_BIT_DATA; + } if (host->quirks2 & SDHCI_QUIRK2_HOST_NO_CMD23) mmc->caps &= ~MMC_CAP_CMD23; diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c index 0427d76f45d0a6..5b9dadd5405e3b 100644 --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c @@ -2350,14 +2350,12 @@ static int brcmnand_write(struct mtd_info *mtd, struct nand_chip *chip, for (i = 0; i < ctrl->max_oob; i += 4) oob_reg_write(ctrl, i, 0xffffffff); - if (mtd->oops_panic_write) + if (mtd->oops_panic_write) { /* switch to interrupt polling and PIO mode */ disable_ctrl_irqs(ctrl); - - if (use_dma(ctrl) && (has_edu(ctrl) || !oob) && flash_dma_buf_ok(buf)) { + } else if (use_dma(ctrl) && (has_edu(ctrl) || !oob) && flash_dma_buf_ok(buf)) { if (ctrl->dma_trans(host, addr, (u32 *)buf, oob, mtd->writesize, CMD_PROGRAM_PAGE)) - ret = -EIO; goto out; diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c index 99135ec230105e..d53b35a8b3cb24 100644 --- a/drivers/mtd/nand/raw/cadence-nand-controller.c +++ b/drivers/mtd/nand/raw/cadence-nand-controller.c @@ -3133,7 +3133,7 @@ static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) sizeof(*cdns_ctrl->cdma_desc), &cdns_ctrl->dma_cdma_desc, GFP_KERNEL); - if (!cdns_ctrl->dma_cdma_desc) + if (!cdns_ctrl->cdma_desc) return -ENOMEM; cdns_ctrl->buf_size = SZ_16K; diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 38429363251ca7..dfd8361bdd3626 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -4737,11 +4737,16 @@ static void nand_shutdown(struct mtd_info *mtd) static int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { struct nand_chip *chip = mtd_to_nand(mtd); + int ret; if (!chip->ops.lock_area) return -ENOTSUPP; - return chip->ops.lock_area(chip, ofs, len); + nand_get_device(chip); + ret = chip->ops.lock_area(chip, ofs, len); + nand_release_device(chip); + + return ret; } /** @@ -4753,11 +4758,16 @@ static int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) static int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { struct nand_chip *chip = mtd_to_nand(mtd); + int ret; if (!chip->ops.unlock_area) return -ENOTSUPP; - return chip->ops.unlock_area(chip, ofs, len); + nand_get_device(chip); + ret = chip->ops.unlock_area(chip, ofs, len); + nand_release_device(chip); + + return ret; } /* Set default functions */ diff --git a/drivers/mtd/nand/raw/pl35x-nand-controller.c b/drivers/mtd/nand/raw/pl35x-nand-controller.c index 947fd86ac5fa2b..f2c65eb7a8d9aa 100644 --- a/drivers/mtd/nand/raw/pl35x-nand-controller.c +++ b/drivers/mtd/nand/raw/pl35x-nand-controller.c @@ -862,6 +862,9 @@ static int pl35x_nfc_setup_interface(struct nand_chip *chip, int cs, PL35X_SMC_NAND_TAR_CYCLES(tmgs.t_ar) | PL35X_SMC_NAND_TRR_CYCLES(tmgs.t_rr); + writel(plnand->timings, nfc->conf_regs + PL35X_SMC_CYCLES); + pl35x_smc_update_regs(nfc); + return 0; } diff --git a/drivers/mtd/parsers/redboot.c b/drivers/mtd/parsers/redboot.c index 558905160ddbaa..bf162c44eafe0c 100644 --- a/drivers/mtd/parsers/redboot.c +++ b/drivers/mtd/parsers/redboot.c @@ -270,9 +270,9 @@ static int parse_redboot_partitions(struct mtd_info *master, strcpy(names, fl->img->name); #ifdef CONFIG_MTD_REDBOOT_PARTS_READONLY - if (!memcmp(names, "RedBoot", 8) || - !memcmp(names, "RedBoot config", 15) || - !memcmp(names, "FIS directory", 14)) { + if (!strcmp(names, "RedBoot") || + !strcmp(names, "RedBoot config") || + !strcmp(names, "FIS directory")) { parts[i].mask_flags = MTD_WRITEABLE; } #endif diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 8ffeb41c3e08a6..1eee519c01e5ca 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -2345,15 +2345,15 @@ int spi_nor_hwcaps_pp2cmd(u32 hwcaps) } /** - * spi_nor_spimem_check_op - check if the operation is supported - * by controller + * spi_nor_spimem_check_read_pp_op - check if a read or a page program operation is + * supported by controller *@nor: pointer to a 'struct spi_nor' *@op: pointer to op template to be checked * * Returns 0 if operation is supported, -EOPNOTSUPP otherwise. */ -static int spi_nor_spimem_check_op(struct spi_nor *nor, - struct spi_mem_op *op) +static int spi_nor_spimem_check_read_pp_op(struct spi_nor *nor, + struct spi_mem_op *op) { /* * First test with 4 address bytes. The opcode itself might @@ -2396,7 +2396,7 @@ static int spi_nor_spimem_check_readop(struct spi_nor *nor, if (spi_nor_protocol_is_dtr(nor->read_proto)) op.dummy.nbytes *= 2; - return spi_nor_spimem_check_op(nor, &op); + return spi_nor_spimem_check_read_pp_op(nor, &op); } /** @@ -2414,7 +2414,7 @@ static int spi_nor_spimem_check_pp(struct spi_nor *nor, spi_nor_spimem_setup_op(nor, &op, pp->proto); - return spi_nor_spimem_check_op(nor, &op); + return spi_nor_spimem_check_read_pp_op(nor, &op); } /** @@ -2466,7 +2466,7 @@ spi_nor_spimem_adjust_hwcaps(struct spi_nor *nor, u32 *hwcaps) spi_nor_spimem_setup_op(nor, &op, nor->reg_proto); - if (spi_nor_spimem_check_op(nor, &op)) + if (!spi_mem_supports_op(nor->spimem, &op)) nor->flags |= SNOR_F_NO_READ_CR; } } diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c index 8adbec7c5084aa..8967b65f6d8408 100644 --- a/drivers/net/bonding/bond_debugfs.c +++ b/drivers/net/bonding/bond_debugfs.c @@ -34,11 +34,17 @@ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v) for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->used_next) { client_info = &(bond_info->rx_hashtbl[hash_index]); - seq_printf(m, "%-15pI4 %-15pI4 %-17pM %s\n", - &client_info->ip_src, - &client_info->ip_dst, - &client_info->mac_dst, - client_info->slave->dev->name); + if (client_info->slave) + seq_printf(m, "%-15pI4 %-15pI4 %-17pM %s\n", + &client_info->ip_src, + &client_info->ip_dst, + &client_info->mac_dst, + client_info->slave->dev->name); + else + seq_printf(m, "%-15pI4 %-15pI4 %-17pM (none)\n", + &client_info->ip_src, + &client_info->ip_dst, + &client_info->mac_dst); } spin_unlock_bh(&bond->mode_lock); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 707419270ebf21..33f414d03ab913 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1530,9 +1530,11 @@ static int bond_header_create(struct sk_buff *skb, struct net_device *bond_dev, return ret; } -static int bond_header_parse(const struct sk_buff *skb, unsigned char *haddr) +static int bond_header_parse(const struct sk_buff *skb, + const struct net_device *dev, + unsigned char *haddr) { - struct bonding *bond = netdev_priv(skb->dev); + struct bonding *bond = netdev_priv(dev); const struct header_ops *slave_ops; struct slave *slave; int ret = 0; @@ -1542,7 +1544,7 @@ static int bond_header_parse(const struct sk_buff *skb, unsigned char *haddr) if (slave) { slave_ops = READ_ONCE(slave->dev->header_ops); if (slave_ops && slave_ops->parse) - ret = slave_ops->parse(skb, haddr); + ret = slave_ops->parse(skb, slave->dev, haddr); } rcu_read_unlock(); return ret; diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 0498198a469658..766d455950f55d 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -601,7 +601,9 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], /* We need synchronization with dev->stop() */ ASSERT_RTNL(); - can_ctrlmode_changelink(dev, data, extack); + err = can_ctrlmode_changelink(dev, data, extack); + if (err) + return err; if (data[IFLA_CAN_BITTIMING]) { struct can_bittiming bt; diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index bb7782582f4014..0d0190ae094a1b 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1225,7 +1225,11 @@ static int mcp251x_open(struct net_device *net) } mutex_lock(&priv->mcp_lock); - mcp251x_power_enable(priv->transceiver, 1); + ret = mcp251x_power_enable(priv->transceiver, 1); + if (ret) { + dev_err(&spi->dev, "failed to enable transceiver power: %pe\n", ERR_PTR(ret)); + goto out_close_candev; + } priv->force_quit = 0; priv->tx_skb = NULL; @@ -1272,6 +1276,7 @@ static int mcp251x_open(struct net_device *net) mcp251x_hw_sleep(spi); out_close: mcp251x_power_enable(priv->transceiver, 0); +out_close_candev: close_candev(net); mutex_unlock(&priv->mcp_lock); if (release_irq) @@ -1516,11 +1521,25 @@ static int __maybe_unused mcp251x_can_resume(struct device *dev) { struct spi_device *spi = to_spi_device(dev); struct mcp251x_priv *priv = spi_get_drvdata(spi); + int ret = 0; - if (priv->after_suspend & AFTER_SUSPEND_POWER) - mcp251x_power_enable(priv->power, 1); - if (priv->after_suspend & AFTER_SUSPEND_UP) - mcp251x_power_enable(priv->transceiver, 1); + if (priv->after_suspend & AFTER_SUSPEND_POWER) { + ret = mcp251x_power_enable(priv->power, 1); + if (ret) { + dev_err(dev, "failed to restore power: %pe\n", ERR_PTR(ret)); + return ret; + } + } + + if (priv->after_suspend & AFTER_SUSPEND_UP) { + ret = mcp251x_power_enable(priv->transceiver, 1); + if (ret) { + dev_err(dev, "failed to restore transceiver power: %pe\n", ERR_PTR(ret)); + if (priv->after_suspend & AFTER_SUSPEND_POWER) + mcp251x_power_enable(priv->power, 0); + return ret; + } + } if (priv->after_suspend & (AFTER_SUSPEND_POWER | AFTER_SUSPEND_UP)) queue_work(priv->wq, &priv->restart_work); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 960685596093b6..de3efa3ce9a75f 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -980,15 +980,19 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds) ret = bcm_sf2_sw_rst(priv); if (ret) { pr_err("%s: failed to software reset switch\n", __func__); + if (!priv->wol_ports_mask) + clk_disable_unprepare(priv->clk); return ret; } bcm_sf2_crossbar_setup(priv); ret = bcm_sf2_cfp_resume(ds); - if (ret) + if (ret) { + if (!priv->wol_ports_mask) + clk_disable_unprepare(priv->clk); return ret; - + } if (priv->hw_params.num_gphy == 1) bcm_sf2_gphy_enable_set(ds, true); diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 62bcbbbe2a9565..56cf9a926a83da 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -3083,7 +3083,6 @@ static void airoha_remove(struct platform_device *pdev) if (!port) continue; - airoha_dev_stop(port->dev); unregister_netdev(port->dev); airoha_metadata_dst_free(port); } diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 42dbe8f93231e8..5724f8f2defd51 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -227,7 +227,9 @@ static int airoha_ppe_get_wdma_info(struct net_device *dev, const u8 *addr, if (!dev) return -ENODEV; + rcu_read_lock(); err = dev_fill_forward_path(dev, addr, &stack); + rcu_read_unlock(); if (err) return err; diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index cd7dddeb91dd67..9787c1857e13bf 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -25,7 +25,7 @@ config B44 select SSB select MII select PHYLIB - select FIXED_PHY if BCM47XX + select FIXED_PHY help If you have a network (Ethernet) controller of this type, say Y or M here. diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index aa6d8606849f0f..972474893a6bcb 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c @@ -1152,12 +1152,6 @@ void bcmasp_enable_wol(struct bcmasp_intf *intf, bool en) } } -static void bcmasp_wol_irq_destroy(struct bcmasp_priv *priv) -{ - if (priv->wol_irq > 0) - free_irq(priv->wol_irq, priv); -} - static void bcmasp_eee_fixup(struct bcmasp_intf *intf, bool en) { u32 reg, phy_lpi_overwrite; @@ -1255,7 +1249,7 @@ static int bcmasp_probe(struct platform_device *pdev) if (priv->irq <= 0) return -EINVAL; - priv->clk = devm_clk_get_optional_enabled(dev, "sw_asp"); + priv->clk = devm_clk_get_optional(dev, "sw_asp"); if (IS_ERR(priv->clk)) return dev_err_probe(dev, PTR_ERR(priv->clk), "failed to request clock\n"); @@ -1283,6 +1277,10 @@ static int bcmasp_probe(struct platform_device *pdev) bcmasp_set_pdata(priv, pdata); + ret = clk_prepare_enable(priv->clk); + if (ret) + return dev_err_probe(dev, ret, "failed to start clock\n"); + /* Enable all clocks to ensure successful probing */ bcmasp_core_clock_set(priv, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE, 0); @@ -1294,8 +1292,10 @@ static int bcmasp_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, priv->irq, bcmasp_isr, 0, pdev->name, priv); - if (ret) - return dev_err_probe(dev, ret, "failed to request ASP interrupt: %d", ret); + if (ret) { + dev_err(dev, "Failed to request ASP interrupt: %d", ret); + goto err_clock_disable; + } /* Register mdio child nodes */ of_platform_populate(dev->of_node, bcmasp_mdio_of_match, NULL, dev); @@ -1307,13 +1307,17 @@ static int bcmasp_probe(struct platform_device *pdev) priv->mda_filters = devm_kcalloc(dev, priv->num_mda_filters, sizeof(*priv->mda_filters), GFP_KERNEL); - if (!priv->mda_filters) - return -ENOMEM; + if (!priv->mda_filters) { + ret = -ENOMEM; + goto err_clock_disable; + } priv->net_filters = devm_kcalloc(dev, priv->num_net_filters, sizeof(*priv->net_filters), GFP_KERNEL); - if (!priv->net_filters) - return -ENOMEM; + if (!priv->net_filters) { + ret = -ENOMEM; + goto err_clock_disable; + } bcmasp_core_init_filters(priv); @@ -1322,7 +1326,8 @@ static int bcmasp_probe(struct platform_device *pdev) ports_node = of_find_node_by_name(dev->of_node, "ethernet-ports"); if (!ports_node) { dev_warn(dev, "No ports found\n"); - return -EINVAL; + ret = -EINVAL; + goto err_clock_disable; } i = 0; @@ -1344,8 +1349,6 @@ static int bcmasp_probe(struct platform_device *pdev) */ bcmasp_core_clock_set(priv, 0, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE); - clk_disable_unprepare(priv->clk); - /* Now do the registration of the network ports which will take care * of managing the clock properly. */ @@ -1358,13 +1361,16 @@ static int bcmasp_probe(struct platform_device *pdev) count++; } + clk_disable_unprepare(priv->clk); + dev_info(dev, "Initialized %d port(s)\n", count); return ret; err_cleanup: - bcmasp_wol_irq_destroy(priv); bcmasp_remove_intfs(priv); +err_clock_disable: + clk_disable_unprepare(priv->clk); return ret; } @@ -1376,7 +1382,6 @@ static void bcmasp_remove(struct platform_device *pdev) if (!priv) return; - bcmasp_wol_irq_destroy(priv); bcmasp_remove_intfs(priv); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c426a41c366391..0751c0e4581a21 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2929,6 +2929,8 @@ static int bnxt_async_event_process(struct bnxt *bp, u16 type = (u16)BNXT_EVENT_BUF_PRODUCER_TYPE(data1); u32 offset = BNXT_EVENT_BUF_PRODUCER_OFFSET(data2); + if (type >= ARRAY_SIZE(bp->bs_trace)) + goto async_event_process_exit; bnxt_bs_trace_check_wrap(&bp->bs_trace[type], offset); goto async_event_process_exit; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 9a41b9e0423c75..a97d651130dfb8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2146,7 +2146,7 @@ enum board_idx { }; #define BNXT_TRACE_BUF_MAGIC_BYTE ((u8)0xbc) -#define BNXT_TRACE_MAX 11 +#define BNXT_TRACE_MAX (DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE + 1) struct bnxt_bs_trace_info { u8 *magic_byte; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 8fb55128829806..96d5d4f7f51fe8 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -123,7 +123,7 @@ static int bcmgenet_poll_wol_status(struct bcmgenet_priv *priv) while (!(bcmgenet_rbuf_readl(priv, RBUF_STATUS) & RBUF_STATUS_WOL)) { retries++; - if (retries > 5) { + if (retries > 50) { netdev_crit(dev, "polling wol mode timeout\n"); return -ETIMEDOUT; } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 2328fce336447e..21a5dd34272442 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -17029,6 +17029,13 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent) return err; } +static int tg3_is_default_mac_address(u8 *addr) +{ + static const u8 default_mac_address[ETH_ALEN] = { 0x00, 0x10, 0x18, 0x00, 0x00, 0x00 }; + + return ether_addr_equal(default_mac_address, addr); +} + static int tg3_get_device_address(struct tg3 *tp, u8 *addr) { u32 hi, lo, mac_offset; @@ -17102,6 +17109,10 @@ static int tg3_get_device_address(struct tg3 *tp, u8 *addr) if (!is_valid_ether_addr(addr)) return -EINVAL; + + if (tg3_is_default_mac_address(addr)) + return device_get_mac_address(&tp->pdev->dev, addr); + return 0; } diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index f290d608b4094c..99e7d5cf3786ff 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1071,7 +1071,7 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budge } if (tx_skb->skb) { - napi_consume_skb(tx_skb->skb, budget); + dev_consume_skb_any(tx_skb->skb); tx_skb->skb = NULL; } } @@ -2669,6 +2669,14 @@ static void macb_init_tieoff(struct macb *bp) desc->ctrl = 0; } +static void gem_init_rx_ring(struct macb_queue *queue) +{ + queue->rx_tail = 0; + queue->rx_prepared_head = 0; + + gem_rx_refill(queue); +} + static void gem_init_rings(struct macb *bp) { struct macb_queue *queue; @@ -2686,10 +2694,7 @@ static void gem_init_rings(struct macb *bp) queue->tx_head = 0; queue->tx_tail = 0; - queue->rx_tail = 0; - queue->rx_prepared_head = 0; - - gem_rx_refill(queue); + gem_init_rx_ring(queue); } macb_init_tieoff(bp); @@ -3219,7 +3224,7 @@ static void gem_get_ethtool_stats(struct net_device *dev, spin_lock_irq(&bp->stats_lock); gem_update_stats(bp); memcpy(data, &bp->ethtool_stats, sizeof(u64) - * (GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES)); + * (GEM_STATS_LEN + QUEUE_STATS_LEN * bp->num_queues)); spin_unlock_irq(&bp->stats_lock); } @@ -3978,6 +3983,9 @@ static int gem_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) struct macb *bp = netdev_priv(netdev); int ret; + if (!(netdev->hw_features & NETIF_F_NTUPLE)) + return -EOPNOTSUPP; + switch (cmd->cmd) { case ETHTOOL_SRXCLSRLINS: if ((cmd->fs.location >= bp->max_tuples) @@ -5768,9 +5776,9 @@ static int __maybe_unused macb_suspend(struct device *dev) struct macb_queue *queue; struct in_device *idev; unsigned long flags; + u32 tmp, ifa_local; unsigned int q; int err; - u32 tmp; if (!device_may_wakeup(&bp->dev->dev)) phy_exit(bp->phy); @@ -5779,14 +5787,21 @@ static int __maybe_unused macb_suspend(struct device *dev) return 0; if (bp->wol & MACB_WOL_ENABLED) { - /* Check for IP address in WOL ARP mode */ - idev = __in_dev_get_rcu(bp->dev); - if (idev) - ifa = rcu_dereference(idev->ifa_list); - if ((bp->wolopts & WAKE_ARP) && !ifa) { - netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n"); - return -EOPNOTSUPP; + if (bp->wolopts & WAKE_ARP) { + /* Check for IP address in WOL ARP mode */ + rcu_read_lock(); + idev = __in_dev_get_rcu(bp->dev); + if (idev) + ifa = rcu_dereference(idev->ifa_list); + if (!ifa) { + rcu_read_unlock(); + netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n"); + return -EOPNOTSUPP; + } + ifa_local = be32_to_cpu(ifa->ifa_local); + rcu_read_unlock(); } + spin_lock_irqsave(&bp->lock, flags); /* Disable Tx and Rx engines before disabling the queues, @@ -5825,8 +5840,9 @@ static int __maybe_unused macb_suspend(struct device *dev) if (bp->wolopts & WAKE_ARP) { tmp |= MACB_BIT(ARP); /* write IP address into register */ - tmp |= MACB_BFEXT(IP, be32_to_cpu(ifa->ifa_local)); + tmp |= MACB_BFEXT(IP, ifa_local); } + spin_unlock_irqrestore(&bp->lock, flags); /* Change interrupt handler and * Enable WoL IRQ on queue 0 @@ -5839,11 +5855,12 @@ static int __maybe_unused macb_suspend(struct device *dev) dev_err(dev, "Unable to request IRQ %d (error %d)\n", bp->queues[0].irq, err); - spin_unlock_irqrestore(&bp->lock, flags); return err; } + spin_lock_irqsave(&bp->lock, flags); queue_writel(bp->queues, IER, GEM_BIT(WOL)); gem_writel(bp, WOL, tmp); + spin_unlock_irqrestore(&bp->lock, flags); } else { err = devm_request_irq(dev, bp->queues[0].irq, macb_wol_interrupt, IRQF_SHARED, netdev->name, bp->queues); @@ -5851,13 +5868,13 @@ static int __maybe_unused macb_suspend(struct device *dev) dev_err(dev, "Unable to request IRQ %d (error %d)\n", bp->queues[0].irq, err); - spin_unlock_irqrestore(&bp->lock, flags); return err; } + spin_lock_irqsave(&bp->lock, flags); queue_writel(bp->queues, IER, MACB_BIT(WOL)); macb_writel(bp, WOL, tmp); + spin_unlock_irqrestore(&bp->lock, flags); } - spin_unlock_irqrestore(&bp->lock, flags); enable_irq_wake(bp->queues[0].irq); } @@ -5924,6 +5941,8 @@ static int __maybe_unused macb_resume(struct device *dev) queue_readl(bp->queues, ISR); if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(bp->queues, ISR, -1); + spin_unlock_irqrestore(&bp->lock, flags); + /* Replace interrupt handler on queue 0 */ devm_free_irq(dev, bp->queues[0].irq, bp->queues); err = devm_request_irq(dev, bp->queues[0].irq, macb_interrupt, @@ -5932,10 +5951,8 @@ static int __maybe_unused macb_resume(struct device *dev) dev_err(dev, "Unable to request IRQ %d (error %d)\n", bp->queues[0].irq, err); - spin_unlock_irqrestore(&bp->lock, flags); return err; } - spin_unlock_irqrestore(&bp->lock, flags); disable_irq_wake(bp->queues[0].irq); @@ -5947,8 +5964,18 @@ static int __maybe_unused macb_resume(struct device *dev) rtnl_unlock(); } + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) + macb_init_buffers(bp); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) { + if (macb_is_gem(bp)) + gem_init_rx_ring(queue); + else + macb_init_rx_ring(queue); + } + napi_enable(&queue->napi_rx); napi_enable(&queue->napi_tx); } diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index c9e77819196e17..d91f7b1aa39caa 100644 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -357,8 +357,10 @@ void gem_ptp_remove(struct net_device *ndev) { struct macb *bp = netdev_priv(ndev); - if (bp->ptp_clock) + if (bp->ptp_clock) { ptp_clock_unregister(bp->ptp_clock); + bp->ptp_clock = NULL; + } gem_ptp_clear_timer(bp); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index fed89d4f1e1dc4..2fe140ddebb23b 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -813,6 +813,8 @@ static void enetc_get_ringparam(struct net_device *ndev, { struct enetc_ndev_priv *priv = netdev_priv(ndev); + ring->rx_max_pending = priv->rx_bd_count; + ring->tx_max_pending = priv->tx_bd_count; ring->rx_pending = priv->rx_bd_count; ring->tx_pending = priv->tx_bd_count; diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index ab67c709d5a0b7..1cd1f3f2930a07 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -313,14 +313,13 @@ static int iavf_get_sset_count(struct net_device *netdev, int sset) { /* Report the maximum number queues, even if not every queue is * currently configured. Since allocation of queues is in pairs, - * use netdev->real_num_tx_queues * 2. The real_num_tx_queues is set - * at device creation and never changes. + * use netdev->num_tx_queues * 2. The num_tx_queues is set at + * device creation and never changes. */ if (sset == ETH_SS_STATS) return IAVF_STATS_LEN + - (IAVF_QUEUE_STATS_LEN * 2 * - netdev->real_num_tx_queues); + (IAVF_QUEUE_STATS_LEN * 2 * netdev->num_tx_queues); else return -EINVAL; } @@ -345,19 +344,19 @@ static void iavf_get_ethtool_stats(struct net_device *netdev, iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats); rcu_read_lock(); - /* As num_active_queues describe both tx and rx queues, we can use - * it to iterate over rings' stats. + /* Use num_tx_queues to report stats for the maximum number of queues. + * Queues beyond num_active_queues will report zero. */ - for (i = 0; i < adapter->num_active_queues; i++) { - struct iavf_ring *ring; + for (i = 0; i < netdev->num_tx_queues; i++) { + struct iavf_ring *tx_ring = NULL, *rx_ring = NULL; - /* Tx rings stats */ - ring = &adapter->tx_rings[i]; - iavf_add_queue_stats(&data, ring); + if (i < adapter->num_active_queues) { + tx_ring = &adapter->tx_rings[i]; + rx_ring = &adapter->rx_rings[i]; + } - /* Rx rings stats */ - ring = &adapter->rx_rings[i]; - iavf_add_queue_stats(&data, ring); + iavf_add_queue_stats(&data, tx_ring); + iavf_add_queue_stats(&data, rx_ring); } rcu_read_unlock(); } @@ -376,9 +375,9 @@ static void iavf_get_stat_strings(struct net_device *netdev, u8 *data) iavf_add_stat_strings(&data, iavf_gstrings_stats); /* Queues are always allocated in pairs, so we just use - * real_num_tx_queues for both Tx and Rx queues. + * num_tx_queues for both Tx and Rx queues. */ - for (i = 0; i < netdev->real_num_tx_queues; i++) { + for (i = 0; i < netdev->num_tx_queues; i++) { iavf_add_stat_strings(&data, iavf_gstrings_queue_stats, "tx", i); iavf_add_stat_strings(&data, iavf_gstrings_queue_stats, diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 7925ee152c760a..dad001abc9086b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -757,10 +757,13 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, adapter->num_vlan_filters++; iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER); } else if (f->state == IAVF_VLAN_REMOVE) { - /* IAVF_VLAN_REMOVE means that VLAN wasn't yet removed. - * We can safely only change the state here. + /* Re-add the filter since we cannot tell whether the + * pending delete has already been processed by the PF. + * A duplicate add is harmless. */ - f->state = IAVF_VLAN_ACTIVE; + f->state = IAVF_VLAN_ADD; + iavf_schedule_aq_request(adapter, + IAVF_FLAG_AQ_ADD_VLAN_FILTER); } clearout: diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 2b2b22af42beeb..eb3a48330cc155 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -839,6 +839,28 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) WRITE_ONCE(ring->xsk_pool, ice_get_xp_from_qid(vsi, qid)); } +/** + * ice_get_max_txq - return the maximum number of Tx queues for in a PF + * @pf: PF structure + * + * Return: maximum number of Tx queues + */ +static inline int ice_get_max_txq(struct ice_pf *pf) +{ + return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq); +} + +/** + * ice_get_max_rxq - return the maximum number of Rx queues for in a PF + * @pf: PF structure + * + * Return: maximum number of Rx queues + */ +static inline int ice_get_max_rxq(struct ice_pf *pf) +{ + return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq); +} + /** * ice_get_main_vsi - Get the PF VSI * @pf: PF instance diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 301947d53ede5b..e6a20af6f63de5 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1930,6 +1930,17 @@ __ice_get_ethtool_stats(struct net_device *netdev, int i = 0; char *p; + if (ice_is_port_repr_netdev(netdev)) { + ice_update_eth_stats(vsi); + + for (j = 0; j < ICE_VSI_STATS_LEN; j++) { + p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset; + data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + return; + } + ice_update_pf_stats(pf); ice_update_vsi_stats(vsi); @@ -1939,9 +1950,6 @@ __ice_get_ethtool_stats(struct net_device *netdev, sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } - if (ice_is_port_repr_netdev(netdev)) - return; - /* populate per queue stats */ rcu_read_lock(); @@ -3773,24 +3781,6 @@ ice_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) return 0; } -/** - * ice_get_max_txq - return the maximum number of Tx queues for in a PF - * @pf: PF structure - */ -static int ice_get_max_txq(struct ice_pf *pf) -{ - return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq); -} - -/** - * ice_get_max_rxq - return the maximum number of Rx queues for in a PF - * @pf: PF structure - */ -static int ice_get_max_rxq(struct ice_pf *pf) -{ - return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq); -} - /** * ice_get_combined_cnt - return the current number of combined channels * @vsi: PF VSI pointer diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index e7308e381e2f06..3c36e3641b9e99 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4699,8 +4699,8 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) struct net_device *netdev; u8 mac_addr[ETH_ALEN]; - netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, - vsi->alloc_rxq); + netdev = alloc_etherdev_mqs(sizeof(*np), ice_get_max_txq(vsi->back), + ice_get_max_rxq(vsi->back)); if (!netdev) return -ENOMEM; diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 90f99443a922cd..096566c697f444 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -2,6 +2,7 @@ /* Copyright (C) 2019-2021, Intel Corporation. */ #include "ice.h" +#include "ice_lib.h" #include "ice_eswitch.h" #include "devlink/devlink.h" #include "devlink/port.h" @@ -67,7 +68,7 @@ ice_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) return; vsi = repr->src_vsi; - ice_update_vsi_stats(vsi); + ice_update_eth_stats(vsi); eth_stats = &vsi->eth_stats; stats->tx_packets = eth_stats->tx_unicast + eth_stats->tx_broadcast + @@ -315,7 +316,7 @@ ice_repr_reg_netdev(struct net_device *netdev, const struct net_device_ops *ops) static int ice_repr_ready_vf(struct ice_repr *repr) { - return !ice_check_vf_ready_for_cfg(repr->vf); + return ice_check_vf_ready_for_cfg(repr->vf); } static int ice_repr_ready_sf(struct ice_repr *repr) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index b206fba092c8fe..ec1b75f039bb25 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -1066,7 +1066,7 @@ bool idpf_vport_set_hsplit(const struct idpf_vport *vport, u8 val); int idpf_idc_init(struct idpf_adapter *adapter); int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter, enum iidc_function_type ftype); -void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info); +void idpf_idc_deinit_core_aux_device(struct idpf_adapter *adapter); void idpf_idc_deinit_vport_aux_device(struct iidc_rdma_vport_dev_info *vdev_info); void idpf_idc_issue_reset_event(struct iidc_rdma_core_dev_info *cdev_info); void idpf_idc_vdev_mtu_event(struct iidc_rdma_vport_dev_info *vdev_info, diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c index bd4785fb8d3e6e..7e4f4ac9265377 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_idc.c +++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c @@ -470,10 +470,11 @@ int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter, /** * idpf_idc_deinit_core_aux_device - de-initialize Auxiliary Device(s) - * @cdev_info: IDC core device info pointer + * @adapter: driver private data structure */ -void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info) +void idpf_idc_deinit_core_aux_device(struct idpf_adapter *adapter) { + struct iidc_rdma_core_dev_info *cdev_info = adapter->cdev_info; struct iidc_rdma_priv_dev_info *privd; if (!cdev_info) @@ -485,6 +486,7 @@ void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info) kfree(privd->mapped_mem_regions); kfree(privd); kfree(cdev_info); + adapter->cdev_info = NULL; } /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 2522599930223f..f6b3b15364ff68 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -1860,13 +1860,13 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, idpf_queue_assign(HSPLIT_EN, q, hs); idpf_queue_assign(RSC_EN, q, rsc); - bufq_set->num_refillqs = num_rxq; bufq_set->refillqs = kcalloc(num_rxq, swq_size, GFP_KERNEL); if (!bufq_set->refillqs) { err = -ENOMEM; goto err_alloc; } + bufq_set->num_refillqs = num_rxq; for (unsigned int k = 0; k < bufq_set->num_refillqs; k++) { struct idpf_sw_queue *refillq = &bufq_set->refillqs[k]; diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index d5a877e1fef8b2..113ecfc16dd722 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -3668,7 +3668,7 @@ void idpf_vc_core_deinit(struct idpf_adapter *adapter) idpf_ptp_release(adapter); idpf_deinit_task(adapter); - idpf_idc_deinit_core_aux_device(adapter->cdev_info); + idpf_idc_deinit_core_aux_device(adapter); idpf_rel_rx_pt_lkup(adapter); idpf_intr_rel(adapter); diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index a427f05814c1ae..17236813965d33 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -781,6 +781,8 @@ int igc_ptp_hwtstamp_set(struct net_device *netdev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack); void igc_ptp_tx_hang(struct igc_adapter *adapter); +void igc_ptp_clear_xsk_tx_tstamp_queue(struct igc_adapter *adapter, + u16 queue_id); void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts); void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index b2e8d0c0f827f1..72bc5128d8b882 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -264,6 +264,13 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) /* reset next_to_use and next_to_clean */ tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; + + /* Clear any lingering XSK TX timestamp requests */ + if (test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags)) { + struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); + + igc_ptp_clear_xsk_tx_tstamp_queue(adapter, tx_ring->queue_index); + } } /** @@ -1730,11 +1737,8 @@ static netdev_tx_t igc_xmit_frame(struct sk_buff *skb, /* The minimum packet size with TCTL.PSP set is 17 so pad the skb * in order to meet this minimum size requirement. */ - if (skb->len < 17) { - if (skb_padto(skb, 17)) - return NETDEV_TX_OK; - skb->len = 17; - } + if (skb_put_padto(skb, 17)) + return NETDEV_TX_OK; return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb)); } diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 44ee1938676612..3d6b2264164af8 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -577,6 +577,39 @@ static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter) spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } +/** + * igc_ptp_clear_xsk_tx_tstamp_queue - Clear pending XSK TX timestamps for a queue + * @adapter: Board private structure + * @queue_id: TX queue index to clear timestamps for + * + * Iterates over all TX timestamp registers and releases any pending + * timestamp requests associated with the given TX queue. This is + * called when an XDP pool is being disabled to ensure no stale + * timestamp references remain. + */ +void igc_ptp_clear_xsk_tx_tstamp_queue(struct igc_adapter *adapter, u16 queue_id) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + + for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { + struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; + + if (tstamp->buffer_type != IGC_TX_BUFFER_TYPE_XSK) + continue; + if (tstamp->xsk_queue_index != queue_id) + continue; + if (!tstamp->xsk_tx_buffer) + continue; + + igc_ptp_free_tx_buffer(adapter, tstamp); + } + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); +} + static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; diff --git a/drivers/net/ethernet/intel/libie/fwlog.c b/drivers/net/ethernet/intel/libie/fwlog.c index 4d0c8370386bc7..96bba57c8a5b90 100644 --- a/drivers/net/ethernet/intel/libie/fwlog.c +++ b/drivers/net/ethernet/intel/libie/fwlog.c @@ -433,17 +433,21 @@ libie_debugfs_module_write(struct file *filp, const char __user *buf, module = libie_find_module_by_dentry(fwlog->debugfs_modules, dentry); if (module < 0) { dev_info(dev, "unknown module\n"); - return -EINVAL; + count = -EINVAL; + goto free_cmd_buf; } cnt = sscanf(cmd_buf, "%s", user_val); - if (cnt != 1) - return -EINVAL; + if (cnt != 1) { + count = -EINVAL; + goto free_cmd_buf; + } log_level = sysfs_match_string(libie_fwlog_level_string, user_val); if (log_level < 0) { dev_info(dev, "unknown log level '%s'\n", user_val); - return -EINVAL; + count = -EINVAL; + goto free_cmd_buf; } if (module != LIBIE_AQC_FW_LOG_ID_MAX) { @@ -458,6 +462,9 @@ libie_debugfs_module_write(struct file *filp, const char __user *buf, fwlog->cfg.module_entries[i].log_level = log_level; } +free_cmd_buf: + kfree(cmd_buf); + return count; } @@ -515,23 +522,31 @@ libie_debugfs_nr_messages_write(struct file *filp, const char __user *buf, return PTR_ERR(cmd_buf); ret = sscanf(cmd_buf, "%s", user_val); - if (ret != 1) - return -EINVAL; + if (ret != 1) { + count = -EINVAL; + goto free_cmd_buf; + } ret = kstrtos16(user_val, 0, &nr_messages); - if (ret) - return ret; + if (ret) { + count = ret; + goto free_cmd_buf; + } if (nr_messages < LIBIE_AQC_FW_LOG_MIN_RESOLUTION || nr_messages > LIBIE_AQC_FW_LOG_MAX_RESOLUTION) { dev_err(dev, "Invalid FW log number of messages %d, value must be between %d - %d\n", nr_messages, LIBIE_AQC_FW_LOG_MIN_RESOLUTION, LIBIE_AQC_FW_LOG_MAX_RESOLUTION); - return -EINVAL; + count = -EINVAL; + goto free_cmd_buf; } fwlog->cfg.log_resolution = nr_messages; +free_cmd_buf: + kfree(cmd_buf); + return count; } @@ -588,8 +603,10 @@ libie_debugfs_enable_write(struct file *filp, const char __user *buf, return PTR_ERR(cmd_buf); ret = sscanf(cmd_buf, "%s", user_val); - if (ret != 1) - return -EINVAL; + if (ret != 1) { + ret = -EINVAL; + goto free_cmd_buf; + } ret = kstrtobool(user_val, &enable); if (ret) @@ -624,6 +641,8 @@ libie_debugfs_enable_write(struct file *filp, const char __user *buf, */ if (WARN_ON(ret != (ssize_t)count && ret >= 0)) ret = -EIO; +free_cmd_buf: + kfree(cmd_buf); return ret; } @@ -682,8 +701,10 @@ libie_debugfs_log_size_write(struct file *filp, const char __user *buf, return PTR_ERR(cmd_buf); ret = sscanf(cmd_buf, "%s", user_val); - if (ret != 1) - return -EINVAL; + if (ret != 1) { + ret = -EINVAL; + goto free_cmd_buf; + } index = sysfs_match_string(libie_fwlog_log_size, user_val); if (index < 0) { @@ -712,6 +733,8 @@ libie_debugfs_log_size_write(struct file *filp, const char __user *buf, */ if (WARN_ON(ret != (ssize_t)count && ret >= 0)) ret = -EIO; +free_cmd_buf: + kfree(cmd_buf); return ret; } diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index d1b8650cb4b488..f442b874bb5933 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5016,7 +5016,7 @@ static int mvpp2_bm_switch_buffers(struct mvpp2 *priv, bool percpu) if (priv->percpu_pools) numbufs = port->nrxqs * 2; - if (change_percpu) + if (change_percpu && priv->global_tx_fc) mvpp2_bm_pool_update_priv_fc(priv, false); for (i = 0; i < numbufs; i++) @@ -5041,7 +5041,7 @@ static int mvpp2_bm_switch_buffers(struct mvpp2 *priv, bool percpu) mvpp2_open(port->dev); } - if (change_percpu) + if (change_percpu && priv->global_tx_fc) mvpp2_bm_pool_update_priv_fc(priv, true); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index f8eaaf37963b11..abcbd38db9dbbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -287,6 +287,7 @@ struct mlx5e_ipsec_sa_entry { struct mlx5e_ipsec_dwork *dwork; struct mlx5e_ipsec_limits limits; u32 rx_mapped_id; + u8 ctx[MLX5_ST_SZ_BYTES(ipsec_aso)]; }; struct mlx5_accel_pol_xfrm_attrs { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 33344e00719b81..05faad5083d9d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -310,10 +310,11 @@ static void mlx5e_ipsec_aso_update(struct mlx5e_ipsec_sa_entry *sa_entry, mlx5e_ipsec_aso_query(sa_entry, data); } -static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, - u32 mode_param) +static void +mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, + u32 mode_param, + struct mlx5_accel_esp_xfrm_attrs *attrs) { - struct mlx5_accel_esp_xfrm_attrs attrs = {}; struct mlx5_wqe_aso_ctrl_seg data = {}; if (mode_param < MLX5E_IPSEC_ESN_SCOPE_MID) { @@ -323,18 +324,7 @@ static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, sa_entry->esn_state.overlap = 1; } - mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs); - - /* It is safe to execute the modify below unlocked since the only flows - * that could affect this HW object, are create, destroy and this work. - * - * Creation flow can't co-exist with this modify work, the destruction - * flow would cancel this work, and this work is a single entity that - * can't conflict with it self. - */ - spin_unlock_bh(&sa_entry->x->lock); - mlx5_accel_esp_modify_xfrm(sa_entry, &attrs); - spin_lock_bh(&sa_entry->x->lock); + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, attrs); data.data_offset_condition_operand = MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; @@ -370,20 +360,18 @@ static void mlx5e_ipsec_aso_update_soft(struct mlx5e_ipsec_sa_entry *sa_entry, static void mlx5e_ipsec_handle_limits(struct mlx5e_ipsec_sa_entry *sa_entry) { struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; - struct mlx5e_ipsec *ipsec = sa_entry->ipsec; - struct mlx5e_ipsec_aso *aso = ipsec->aso; bool soft_arm, hard_arm; u64 hard_cnt; lockdep_assert_held(&sa_entry->x->lock); - soft_arm = !MLX5_GET(ipsec_aso, aso->ctx, soft_lft_arm); - hard_arm = !MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm); + soft_arm = !MLX5_GET(ipsec_aso, sa_entry->ctx, soft_lft_arm); + hard_arm = !MLX5_GET(ipsec_aso, sa_entry->ctx, hard_lft_arm); if (!soft_arm && !hard_arm) /* It is not lifetime event */ return; - hard_cnt = MLX5_GET(ipsec_aso, aso->ctx, remove_flow_pkt_cnt); + hard_cnt = MLX5_GET(ipsec_aso, sa_entry->ctx, remove_flow_pkt_cnt); if (!hard_cnt || hard_arm) { /* It is possible to see packet counter equal to zero without * hard limit event armed. Such situation can be if packet @@ -453,11 +441,11 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) struct mlx5e_ipsec_work *work = container_of(_work, struct mlx5e_ipsec_work, work); struct mlx5e_ipsec_sa_entry *sa_entry = work->data; + struct mlx5_accel_esp_xfrm_attrs tmp = {}; struct mlx5_accel_esp_xfrm_attrs *attrs; - struct mlx5e_ipsec_aso *aso; + bool need_modify = false; int ret; - aso = sa_entry->ipsec->aso; attrs = &sa_entry->attrs; spin_lock_bh(&sa_entry->x->lock); @@ -465,18 +453,22 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) if (ret) goto unlock; + if (attrs->lft.soft_packet_limit != XFRM_INF) + mlx5e_ipsec_handle_limits(sa_entry); + if (attrs->replay_esn.trigger && - !MLX5_GET(ipsec_aso, aso->ctx, esn_event_arm)) { - u32 mode_param = MLX5_GET(ipsec_aso, aso->ctx, mode_parameter); + !MLX5_GET(ipsec_aso, sa_entry->ctx, esn_event_arm)) { + u32 mode_param = MLX5_GET(ipsec_aso, sa_entry->ctx, + mode_parameter); - mlx5e_ipsec_update_esn_state(sa_entry, mode_param); + mlx5e_ipsec_update_esn_state(sa_entry, mode_param, &tmp); + need_modify = true; } - if (attrs->lft.soft_packet_limit != XFRM_INF) - mlx5e_ipsec_handle_limits(sa_entry); - unlock: spin_unlock_bh(&sa_entry->x->lock); + if (need_modify) + mlx5_accel_esp_modify_xfrm(sa_entry, &tmp); kfree(work); } @@ -629,6 +621,8 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry, /* We are in atomic context */ udelay(10); } while (ret && time_is_after_jiffies(expires)); + if (!ret) + memcpy(sa_entry->ctx, aso->ctx, MLX5_ST_SZ_BYTES(ipsec_aso)); spin_unlock_bh(&aso->lock); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 26178d0bac925a..faccc60fc93a8b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -1489,24 +1489,24 @@ static int esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node *node, return err; } -static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev) +static u32 mlx5_esw_qos_lag_link_speed_get(struct mlx5_core_dev *mdev, + bool take_rtnl) { struct ethtool_link_ksettings lksettings; struct net_device *slave, *master; u32 speed = SPEED_UNKNOWN; - /* Lock ensures a stable reference to master and slave netdevice - * while port speed of master is queried. - */ - ASSERT_RTNL(); - slave = mlx5_uplink_netdev_get(mdev); if (!slave) goto out; + if (take_rtnl) + rtnl_lock(); master = netdev_master_upper_dev_get(slave); if (master && !__ethtool_get_link_ksettings(master, &lksettings)) speed = lksettings.base.speed; + if (take_rtnl) + rtnl_unlock(); out: mlx5_uplink_netdev_put(mdev, slave); @@ -1514,20 +1514,15 @@ static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev) } static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max, - bool hold_rtnl_lock, struct netlink_ext_ack *extack) + bool take_rtnl, + struct netlink_ext_ack *extack) { int err; if (!mlx5_lag_is_active(mdev)) goto skip_lag; - if (hold_rtnl_lock) - rtnl_lock(); - - *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev); - - if (hold_rtnl_lock) - rtnl_unlock(); + *link_speed_max = mlx5_esw_qos_lag_link_speed_get(mdev, take_rtnl); if (*link_speed_max != (u32)SPEED_UNKNOWN) return 0; diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index a3845edf0e48fa..f0b5dd752f084f 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3053,6 +3053,11 @@ static void lan743x_phylink_mac_link_up(struct phylink_config *config, else if (speed == SPEED_100) mac_cr |= MAC_CR_CFG_L_; + if (duplex == DUPLEX_FULL) + mac_cr |= MAC_CR_DPX_; + else + mac_cr &= ~MAC_CR_DPX_; + lan743x_csr_write(adapter, MAC_CR, mac_cr); lan743x_ptp_update_latency(adapter, speed); diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index ba3467f1e2eaef..48a9acea4ab6c1 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -814,9 +814,6 @@ void mana_hwc_destroy_channel(struct gdma_context *gc) gc->max_num_cqs = 0; } - kfree(hwc->caller_ctx); - hwc->caller_ctx = NULL; - if (hwc->txq) mana_hwc_destroy_wq(hwc, hwc->txq); @@ -826,6 +823,9 @@ void mana_hwc_destroy_channel(struct gdma_context *gc) if (hwc->cq) mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq); + kfree(hwc->caller_ctx); + hwc->caller_ctx = NULL; + mana_gd_free_res_map(&hwc->inflight_msg_res); hwc->num_inflight_msg = 0; diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 9017e806ecdaa9..dca62fb9a3a9ef 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -3425,6 +3425,7 @@ static int add_adev(struct gdma_dev *gd, const char *name) struct auxiliary_device *adev; struct mana_adev *madev; int ret; + int id; madev = kzalloc_obj(*madev); if (!madev) @@ -3434,7 +3435,8 @@ static int add_adev(struct gdma_dev *gd, const char *name) ret = mana_adev_idx_alloc(); if (ret < 0) goto idx_fail; - adev->id = ret; + id = ret; + adev->id = id; adev->name = name; adev->dev.parent = gd->gdma_context->dev; @@ -3460,7 +3462,7 @@ static int add_adev(struct gdma_dev *gd, const char *name) auxiliary_device_uninit(adev); init_fail: - mana_adev_idx_free(adev->id); + mana_adev_idx_free(id); idx_fail: kfree(madev); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 8d040e611d5aaa..637e635bbf03a4 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1719,13 +1719,18 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa) if (ether_addr_equal(netdev->dev_addr, mac)) return 0; - err = ionic_program_mac(lif, mac); - if (err < 0) - return err; + /* Only program macs for virtual functions to avoid losing the permanent + * Mac across warm reset/reboot. + */ + if (lif->ionic->pdev->is_virtfn) { + err = ionic_program_mac(lif, mac); + if (err < 0) + return err; - if (err > 0) - netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n", - __func__); + if (err > 0) + netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n", + __func__); + } err = eth_prepare_mac_addr_change(netdev, addr); if (err) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 0cf9dfe0fa362b..fd4e7622f1231b 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -962,7 +962,6 @@ static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id, pkt_len -= 4; cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); psdata = cppi5_hdesc_get_psdata(desc_rx); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); count++; xsk_buff_set_size(xdp, pkt_len); xsk_buff_dma_sync_for_cpu(xdp); @@ -988,6 +987,7 @@ static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id, emac_dispatch_skb_zc(emac, xdp, psdata); xsk_buff_free(xdp); } + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); } if (xdp_status & ICSSG_XDP_REDIR) @@ -1057,7 +1057,6 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) /* firmware adds 4 CRC bytes, strip them */ pkt_len -= 4; cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); /* if allocation fails we drop the packet but push the * descriptor back to the ring with old page to prevent a stall @@ -1075,6 +1074,11 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false); *xdp_state = emac_run_xdp(emac, &xdp, &pkt_len); + if (*xdp_state == ICSSG_XDP_CONSUMED) { + page_pool_recycle_direct(pool, page); + goto requeue; + } + if (*xdp_state != ICSSG_XDP_PASS) goto requeue; headroom = xdp.data - xdp.data_hard_start; @@ -1110,6 +1114,7 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) ndev->stats.rx_packets++; requeue: + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); /* queue another RX DMA */ ret = prueth_dma_rx_push_mapped(emac, &emac->rx_chns, new_page, PRUETH_MAX_PKT_SIZE); diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 5ec028a00c6200..3645ebde049a02 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -109,8 +109,11 @@ static int nsim_forward_skb(struct net_device *tx_dev, int ret; ret = __dev_forward_skb(rx_dev, skb); - if (ret) + if (ret) { + if (psp_ext) + __skb_ext_put(psp_ext); return ret; + } nsim_psp_handle_ext(skb, psp_ext); diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index b7282f5c96328a..120aeb539d9f2a 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -2058,6 +2058,68 @@ static const struct ethtool_ops team_ethtool_ops = { * rt netlink interface ***********************/ +/* For tx path we need a linkup && enabled port and for parse any port + * suffices. + */ +static struct team_port *team_header_port_get_rcu(struct team *team, + bool txable) +{ + struct team_port *port; + + list_for_each_entry_rcu(port, &team->port_list, list) { + if (!txable || team_port_txable(port)) + return port; + } + + return NULL; +} + +static int team_header_create(struct sk_buff *skb, struct net_device *team_dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + struct team *team = netdev_priv(team_dev); + const struct header_ops *port_ops; + struct team_port *port; + int ret = 0; + + rcu_read_lock(); + port = team_header_port_get_rcu(team, true); + if (port) { + port_ops = READ_ONCE(port->dev->header_ops); + if (port_ops && port_ops->create) + ret = port_ops->create(skb, port->dev, + type, daddr, saddr, len); + } + rcu_read_unlock(); + return ret; +} + +static int team_header_parse(const struct sk_buff *skb, + const struct net_device *team_dev, + unsigned char *haddr) +{ + struct team *team = netdev_priv(team_dev); + const struct header_ops *port_ops; + struct team_port *port; + int ret = 0; + + rcu_read_lock(); + port = team_header_port_get_rcu(team, false); + if (port) { + port_ops = READ_ONCE(port->dev->header_ops); + if (port_ops && port_ops->parse) + ret = port_ops->parse(skb, port->dev, haddr); + } + rcu_read_unlock(); + return ret; +} + +static const struct header_ops team_header_ops = { + .create = team_header_create, + .parse = team_header_parse, +}; + static void team_setup_by_port(struct net_device *dev, struct net_device *port_dev) { @@ -2066,7 +2128,8 @@ static void team_setup_by_port(struct net_device *dev, if (port_dev->type == ARPHRD_ETHER) dev->header_ops = team->header_ops_cache; else - dev->header_ops = port_dev->header_ops; + dev->header_ops = port_dev->header_ops ? + &team_header_ops : NULL; dev->type = port_dev->type; dev->hard_header_len = port_dev->hard_header_len; dev->needed_headroom = port_dev->needed_headroom; diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h index a5f93b6c4482c3..fa5cab9d3e55c5 100644 --- a/drivers/net/tun_vnet.h +++ b/drivers/net/tun_vnet.h @@ -244,7 +244,7 @@ tun_vnet_hdr_tnl_from_skb(unsigned int flags, if (virtio_net_hdr_tnl_from_skb(skb, tnl_hdr, has_tnl_offload, tun_vnet_is_little_endian(flags), - vlan_hlen, true)) { + vlan_hlen, true, false)) { struct virtio_net_hdr_v1 *hdr = &tnl_hdr->hash_hdr.hdr; struct skb_shared_info *sinfo = skb_shinfo(skb); diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index cbffa9ae1bb6ba..dd53f413c38f60 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -1395,14 +1395,14 @@ static int aqc111_suspend(struct usb_interface *intf, pm_message_t message) aqc111_write16_cmd_nopm(dev, AQ_ACCESS_MAC, SFR_MEDIUM_STATUS_MODE, 2, ®16); - aqc111_write_cmd(dev, AQ_WOL_CFG, 0, 0, - WOL_CFG_SIZE, &wol_cfg); - aqc111_write32_cmd(dev, AQ_PHY_OPS, 0, 0, - &aqc111_data->phy_cfg); + aqc111_write_cmd_nopm(dev, AQ_WOL_CFG, 0, 0, + WOL_CFG_SIZE, &wol_cfg); + aqc111_write32_cmd_nopm(dev, AQ_PHY_OPS, 0, 0, + &aqc111_data->phy_cfg); } else { aqc111_data->phy_cfg |= AQ_LOW_POWER; - aqc111_write32_cmd(dev, AQ_PHY_OPS, 0, 0, - &aqc111_data->phy_cfg); + aqc111_write32_cmd_nopm(dev, AQ_PHY_OPS, 0, 0, + &aqc111_data->phy_cfg); /* Disable RX path */ aqc111_read16_cmd_nopm(dev, AQ_ACCESS_MAC, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 7057c6c0cfc6b7..bb9929727eb932 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1656,6 +1656,7 @@ int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset) struct usbnet *dev = netdev_priv(skb_in->dev); struct usb_cdc_ncm_ndp16 *ndp16; int ret = -EINVAL; + size_t ndp_len; if ((ndpoffset + sizeof(struct usb_cdc_ncm_ndp16)) > skb_in->len) { netif_dbg(dev, rx_err, dev->net, "invalid NDP offset <%u>\n", @@ -1675,8 +1676,8 @@ int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset) sizeof(struct usb_cdc_ncm_dpe16)); ret--; /* we process NDP entries except for the last one */ - if ((sizeof(struct usb_cdc_ncm_ndp16) + - ret * (sizeof(struct usb_cdc_ncm_dpe16))) > skb_in->len) { + ndp_len = struct_size_t(struct usb_cdc_ncm_ndp16, dpe16, ret); + if (ndpoffset + ndp_len > skb_in->len) { netif_dbg(dev, rx_err, dev->net, "Invalid nframes = %d\n", ret); ret = -EINVAL; } @@ -1692,6 +1693,7 @@ int cdc_ncm_rx_verify_ndp32(struct sk_buff *skb_in, int ndpoffset) struct usbnet *dev = netdev_priv(skb_in->dev); struct usb_cdc_ncm_ndp32 *ndp32; int ret = -EINVAL; + size_t ndp_len; if ((ndpoffset + sizeof(struct usb_cdc_ncm_ndp32)) > skb_in->len) { netif_dbg(dev, rx_err, dev->net, "invalid NDP offset <%u>\n", @@ -1711,8 +1713,8 @@ int cdc_ncm_rx_verify_ndp32(struct sk_buff *skb_in, int ndpoffset) sizeof(struct usb_cdc_ncm_dpe32)); ret--; /* we process NDP entries except for the last one */ - if ((sizeof(struct usb_cdc_ncm_ndp32) + - ret * (sizeof(struct usb_cdc_ncm_dpe32))) > skb_in->len) { + ndp_len = struct_size_t(struct usb_cdc_ncm_ndp32, dpe32, ret); + if (ndpoffset + ndp_len > skb_in->len) { netif_dbg(dev, rx_err, dev->net, "Invalid nframes = %d\n", ret); ret = -EINVAL; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 72d6a9c6a5a24f..ab2108ee206a91 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3267,8 +3267,12 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) struct virtio_net_hdr_v1_hash_tunnel *hdr; int num_sg; unsigned hdr_len = vi->hdr_len; + bool feature_hdrlen; bool can_push; + feature_hdrlen = virtio_has_feature(vi->vdev, + VIRTIO_NET_F_GUEST_HDRLEN); + pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); /* Make sure it's safe to cast between formats */ @@ -3288,7 +3292,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl, virtio_is_little_endian(vi->vdev), 0, - false)) + false, feature_hdrlen)) return -EPROTO; if (vi->mergeable_rx_bufs) @@ -3351,6 +3355,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Don't wait up for transmitted skbs to be freed. */ if (!use_napi) { skb_orphan(skb); + skb_dst_drop(skb); nf_reset_ct(skb); } diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c index 121e51ce1bc0ea..8b27d8cc086ab7 100644 --- a/drivers/net/wireless/ath/ath9k/channel.c +++ b/drivers/net/wireless/ath/ath9k/channel.c @@ -1006,7 +1006,7 @@ static void ath_scan_send_probe(struct ath_softc *sc, skb_set_queue_mapping(skb, IEEE80211_AC_VO); if (!ieee80211_tx_prepare_skb(sc->hw, vif, skb, band, NULL)) - goto error; + return; txctl.txq = sc->tx.txq_map[IEEE80211_AC_VO]; if (ath_tx_start(sc->hw, skb, &txctl)) @@ -1119,10 +1119,8 @@ ath_chanctx_send_vif_ps_frame(struct ath_softc *sc, struct ath_vif *avp, skb->priority = 7; skb_set_queue_mapping(skb, IEEE80211_AC_VO); - if (!ieee80211_tx_prepare_skb(sc->hw, vif, skb, band, &sta)) { - dev_kfree_skb_any(skb); + if (!ieee80211_tx_prepare_skb(sc->hw, vif, skb, band, &sta)) return false; - } break; default: return false; diff --git a/drivers/net/wireless/mediatek/mt76/scan.c b/drivers/net/wireless/mediatek/mt76/scan.c index ff9176cdee3dee..63b0447e55c15b 100644 --- a/drivers/net/wireless/mediatek/mt76/scan.c +++ b/drivers/net/wireless/mediatek/mt76/scan.c @@ -63,10 +63,8 @@ mt76_scan_send_probe(struct mt76_dev *dev, struct cfg80211_ssid *ssid) rcu_read_lock(); - if (!ieee80211_tx_prepare_skb(phy->hw, vif, skb, band, NULL)) { - ieee80211_free_txskb(phy->hw, skb); + if (!ieee80211_tx_prepare_skb(phy->hw, vif, skb, band, NULL)) goto out; - } info = IEEE80211_SKB_CB(skb); if (req->no_cck) diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c index 6241866d39df6d..75cfbcfb7626dc 100644 --- a/drivers/net/wireless/ti/wlcore/tx.c +++ b/drivers/net/wireless/ti/wlcore/tx.c @@ -210,7 +210,7 @@ static int wl1271_tx_allocate(struct wl1271 *wl, struct wl12xx_vif *wlvif, if (skb_headroom(skb) < (total_len - skb->len) && pskb_expand_head(skb, (total_len - skb->len), 0, GFP_ATOMIC)) { wl1271_free_tx_id(wl, id); - return -EAGAIN; + return -ENOMEM; } desc = skb_push(skb, total_len - skb->len); diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index e89173f9163774..1b6e55eb81a28b 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -3021,7 +3021,6 @@ static void hw_scan_work(struct work_struct *work) hwsim->tmp_chan->band, NULL)) { rcu_read_unlock(); - kfree_skb(probe); continue; } @@ -6489,7 +6488,7 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) if (info->attrs[HWSIM_ATTR_PMSR_SUPPORT]) { struct cfg80211_pmsr_capabilities *pmsr_capa; - pmsr_capa = kmalloc_obj(*pmsr_capa); + pmsr_capa = kzalloc_obj(*pmsr_capa); if (!pmsr_capa) { ret = -ENOMEM; goto out_free; diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index 6a5ce8ff91f0b5..b3d34433bd14a0 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -47,8 +47,8 @@ static int nxp_nci_i2c_set_mode(void *phy_id, { struct nxp_nci_i2c_phy *phy = (struct nxp_nci_i2c_phy *) phy_id; - gpiod_set_value(phy->gpiod_fw, (mode == NXP_NCI_MODE_FW) ? 1 : 0); - gpiod_set_value(phy->gpiod_en, (mode != NXP_NCI_MODE_COLD) ? 1 : 0); + gpiod_set_value_cansleep(phy->gpiod_fw, (mode == NXP_NCI_MODE_FW) ? 1 : 0); + gpiod_set_value_cansleep(phy->gpiod_en, (mode != NXP_NCI_MODE_COLD) ? 1 : 0); usleep_range(10000, 15000); if (mode == NXP_NCI_MODE_COLD) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index bd9621d3f73cfc..45b7d756e39a04 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -486,14 +486,15 @@ EXPORT_SYMBOL_GPL(nd_synchronize); static void nd_async_device_register(void *d, async_cookie_t cookie) { struct device *dev = d; + struct device *parent = dev->parent; if (device_add(dev) != 0) { dev_err(dev, "%s: failed\n", __func__); put_device(dev); } put_device(dev); - if (dev->parent) - put_device(dev->parent); + if (parent) + put_device(parent); } static void nd_async_device_unregister(void *d, async_cookie_t cookie) diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index 582938b7b4f1fb..33548935765e33 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -894,6 +894,11 @@ static void pci_epf_test_bar_subrange_setup(struct pci_epf_test *epf_test, dev_err(&epf->dev, "pci_epc_set_bar() failed: %d\n", ret); bar->submap = old_submap; bar->num_submap = old_nsub; + ret = pci_epc_set_bar(epc, epf->func_no, epf->vfunc_no, bar); + if (ret) + dev_warn(&epf->dev, "Failed to restore the original BAR mapping: %d\n", + ret); + kfree(submap); goto err; } diff --git a/drivers/pci/pwrctrl/core.c b/drivers/pci/pwrctrl/core.c index 6f7dea6746e0e0..7754baed67f229 100644 --- a/drivers/pci/pwrctrl/core.c +++ b/drivers/pci/pwrctrl/core.c @@ -268,6 +268,46 @@ int pci_pwrctrl_power_on_devices(struct device *parent) } EXPORT_SYMBOL_GPL(pci_pwrctrl_power_on_devices); +/* + * Check whether the pwrctrl device really needs to be created or not. The + * pwrctrl device will only be created if the node satisfies below requirements: + * + * 1. Presence of compatible property with "pci" prefix to match against the + * pwrctrl driver (AND) + * 2. At least one of the power supplies defined in the devicetree node of the + * device (OR) in the remote endpoint parent node to indicate pwrctrl + * requirement. + */ +static bool pci_pwrctrl_is_required(struct device_node *np) +{ + struct device_node *endpoint; + const char *compat; + int ret; + + ret = of_property_read_string(np, "compatible", &compat); + if (ret < 0) + return false; + + if (!strstarts(compat, "pci")) + return false; + + if (of_pci_supply_present(np)) + return true; + + if (of_graph_is_present(np)) { + for_each_endpoint_of_node(np, endpoint) { + struct device_node *remote __free(device_node) = + of_graph_get_remote_port_parent(endpoint); + if (remote) { + if (of_pci_supply_present(remote)) + return true; + } + } + } + + return false; +} + static int pci_pwrctrl_create_device(struct device_node *np, struct device *parent) { @@ -287,19 +327,7 @@ static int pci_pwrctrl_create_device(struct device_node *np, return 0; } - /* - * Sanity check to make sure that the node has the compatible property - * to allow driver binding. - */ - if (!of_property_present(np, "compatible")) - return 0; - - /* - * Check whether the pwrctrl device really needs to be created or not. - * This is decided based on at least one of the power supplies defined - * in the devicetree node of the device or the graph property. - */ - if (!of_pci_supply_present(np) && !of_graph_is_present(np)) { + if (!pci_pwrctrl_is_required(np)) { dev_dbg(parent, "Skipping OF node: %s\n", np->name); return 0; } diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index d6a46fe0cda891..3f518dce6d23f1 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1135,9 +1135,12 @@ int mtk_pctrl_init(struct platform_device *pdev, goto chip_error; } - ret = mtk_eint_init(pctl, pdev); - if (ret) - goto chip_error; + /* Only initialize EINT if we have EINT pins */ + if (data->eint_hw.ap_num > 0) { + ret = mtk_eint_init(pctl, pdev); + if (ret) + goto chip_error; + } return 0; diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index 83f940fe30b26a..d02d42513ebbca 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -723,6 +723,21 @@ static const struct pinconf_ops pmic_gpio_pinconf_ops = { .pin_config_group_dbg_show = pmic_gpio_config_dbg_show, }; +static int pmic_gpio_get_direction(struct gpio_chip *chip, unsigned pin) +{ + struct pmic_gpio_state *state = gpiochip_get_data(chip); + struct pmic_gpio_pad *pad; + + pad = state->ctrl->desc->pins[pin].drv_data; + + if (!pad->is_enabled || pad->analog_pass || + (!pad->input_enabled && !pad->output_enabled)) + return -EINVAL; + + /* Make sure the state is aligned on what pmic_gpio_get() returns */ + return pad->input_enabled ? GPIO_LINE_DIRECTION_IN : GPIO_LINE_DIRECTION_OUT; +} + static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned pin) { struct pmic_gpio_state *state = gpiochip_get_data(chip); @@ -801,6 +816,7 @@ static void pmic_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) } static const struct gpio_chip pmic_gpio_gpio_template = { + .get_direction = pmic_gpio_get_direction, .direction_input = pmic_gpio_direction_input, .direction_output = pmic_gpio_direction_output, .get = pmic_gpio_get, diff --git a/drivers/pinctrl/renesas/pinctrl-rza1.c b/drivers/pinctrl/renesas/pinctrl-rza1.c index bc8b9b9ad05b99..d2949e4dbaf73c 100644 --- a/drivers/pinctrl/renesas/pinctrl-rza1.c +++ b/drivers/pinctrl/renesas/pinctrl-rza1.c @@ -589,7 +589,7 @@ static inline unsigned int rza1_get_bit(struct rza1_port *port, { void __iomem *mem = RZA1_ADDR(port->base, reg, port->id); - return ioread16(mem) & BIT(bit); + return !!(ioread16(mem) & BIT(bit)); } /** diff --git a/drivers/pinctrl/renesas/pinctrl-rzt2h.c b/drivers/pinctrl/renesas/pinctrl-rzt2h.c index 9949108a35bbc3..5927744c7a966c 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzt2h.c +++ b/drivers/pinctrl/renesas/pinctrl-rzt2h.c @@ -85,7 +85,7 @@ struct rzt2h_pinctrl { struct gpio_chip gpio_chip; struct pinctrl_gpio_range gpio_range; DECLARE_BITMAP(used_irqs, RZT2H_INTERRUPTS_NUM); - spinlock_t lock; /* lock read/write registers */ + raw_spinlock_t lock; /* lock read/write registers */ struct mutex mutex; /* serialize adding groups and functions */ bool safety_port_enabled; atomic_t wakeup_path; @@ -145,7 +145,7 @@ static void rzt2h_pinctrl_set_pfc_mode(struct rzt2h_pinctrl *pctrl, u64 reg64; u16 reg16; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); /* Set pin to 'Non-use (Hi-Z input protection)' */ reg16 = rzt2h_pinctrl_readw(pctrl, port, PM(port)); @@ -474,7 +474,7 @@ static int rzt2h_gpio_request(struct gpio_chip *chip, unsigned int offset) if (ret) return ret; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); /* Select GPIO mode in PMC Register */ rzt2h_pinctrl_set_gpio_en(pctrl, port, bit, true); @@ -487,7 +487,7 @@ static void rzt2h_gpio_set_direction(struct rzt2h_pinctrl *pctrl, u32 port, { u16 reg; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); reg = rzt2h_pinctrl_readw(pctrl, port, PM(port)); reg &= ~PM_PIN_MASK(bit); @@ -509,7 +509,7 @@ static int rzt2h_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) if (ret) return ret; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); if (rzt2h_pinctrl_readb(pctrl, port, PMC(port)) & BIT(bit)) { /* @@ -547,7 +547,7 @@ static int rzt2h_gpio_set(struct gpio_chip *chip, unsigned int offset, u8 bit = RZT2H_PIN_ID_TO_PIN(offset); u8 reg; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); reg = rzt2h_pinctrl_readb(pctrl, port, P(port)); if (value) @@ -833,6 +833,7 @@ static int rzt2h_gpio_register(struct rzt2h_pinctrl *pctrl) if (ret) return dev_err_probe(dev, ret, "Unable to parse gpio-ranges\n"); + of_node_put(of_args.np); if (of_args.args[0] != 0 || of_args.args[1] != 0 || of_args.args[2] != pctrl->data->n_port_pins) return dev_err_probe(dev, -EINVAL, @@ -964,7 +965,7 @@ static int rzt2h_pinctrl_probe(struct platform_device *pdev) if (ret) return ret; - spin_lock_init(&pctrl->lock); + raw_spin_lock_init(&pctrl->lock); mutex_init(&pctrl->mutex); platform_set_drvdata(pdev, pctrl); diff --git a/drivers/pinctrl/stm32/Kconfig b/drivers/pinctrl/stm32/Kconfig index 5f67e1ee66dd96..d6a17152301216 100644 --- a/drivers/pinctrl/stm32/Kconfig +++ b/drivers/pinctrl/stm32/Kconfig @@ -65,6 +65,7 @@ config PINCTRL_STM32_HDP select PINMUX select GENERIC_PINCONF select GPIOLIB + select GPIO_GENERIC help The Hardware Debug Port allows the observation of internal signals. It uses configurable multiplexer to route signals in a dedicated observation register. diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index c990b611817292..d3042e0c9712e8 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -157,6 +157,7 @@ sunxi_pinctrl_desc_find_function_by_name(struct sunxi_pinctrl *pctl, const char *pin_name, const char *func_name) { + unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK; int i; for (i = 0; i < pctl->desc->npins; i++) { @@ -168,7 +169,7 @@ sunxi_pinctrl_desc_find_function_by_name(struct sunxi_pinctrl *pctl, while (func->name) { if (!strcmp(func->name, func_name) && (!func->variant || - func->variant & pctl->variant)) + func->variant & variant)) return func; func++; @@ -209,6 +210,8 @@ sunxi_pinctrl_desc_find_function_by_pin_and_mux(struct sunxi_pinctrl *pctl, const u16 pin_num, const u8 muxval) { + unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK; + for (unsigned int i = 0; i < pctl->desc->npins; i++) { const struct sunxi_desc_pin *pin = pctl->desc->pins + i; struct sunxi_desc_function *func = pin->functions; @@ -216,7 +219,7 @@ sunxi_pinctrl_desc_find_function_by_pin_and_mux(struct sunxi_pinctrl *pctl, if (pin->pin.number != pin_num) continue; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; while (func->name) { @@ -1089,6 +1092,9 @@ static int sunxi_pinctrl_irq_request_resources(struct irq_data *d) { struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d); struct sunxi_desc_function *func; + unsigned int offset; + u32 reg, shift, mask; + u8 disabled_mux, muxval; int ret; func = sunxi_pinctrl_desc_find_function_by_pin(pctl, @@ -1096,8 +1102,21 @@ static int sunxi_pinctrl_irq_request_resources(struct irq_data *d) if (!func) return -EINVAL; - ret = gpiochip_lock_as_irq(pctl->chip, - pctl->irq_array[d->hwirq] - pctl->desc->pin_base); + offset = pctl->irq_array[d->hwirq] - pctl->desc->pin_base; + sunxi_mux_reg(pctl, offset, ®, &shift, &mask); + muxval = (readl(pctl->membase + reg) & mask) >> shift; + + /* Change muxing to GPIO INPUT mode if at reset value */ + if (pctl->flags & SUNXI_PINCTRL_NEW_REG_LAYOUT) + disabled_mux = SUN4I_FUNC_DISABLED_NEW; + else + disabled_mux = SUN4I_FUNC_DISABLED_OLD; + + if (muxval == disabled_mux) + sunxi_pmx_set(pctl->pctl_dev, pctl->irq_array[d->hwirq], + SUN4I_FUNC_INPUT); + + ret = gpiochip_lock_as_irq(pctl->chip, offset); if (ret) { dev_err(pctl->dev, "unable to lock HW IRQ %lu for IRQ\n", irqd_to_hwirq(d)); @@ -1338,6 +1357,7 @@ static int sunxi_pinctrl_add_function(struct sunxi_pinctrl *pctl, static int sunxi_pinctrl_build_state(struct platform_device *pdev) { struct sunxi_pinctrl *pctl = platform_get_drvdata(pdev); + unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK; void *ptr; int i; @@ -1362,7 +1382,7 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev) const struct sunxi_desc_pin *pin = pctl->desc->pins + i; struct sunxi_pinctrl_group *group = pctl->groups + pctl->ngroups; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; group->name = pin->pin.name; @@ -1387,11 +1407,11 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev) const struct sunxi_desc_pin *pin = pctl->desc->pins + i; struct sunxi_desc_function *func; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; for (func = pin->functions; func->name; func++) { - if (func->variant && !(pctl->variant & func->variant)) + if (func->variant && !(variant & func->variant)) continue; /* Create interrupt mapping while we're at it */ @@ -1419,14 +1439,14 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev) const struct sunxi_desc_pin *pin = pctl->desc->pins + i; struct sunxi_desc_function *func; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; for (func = pin->functions; func->name; func++) { struct sunxi_pinctrl_function *func_item; const char **func_grp; - if (func->variant && !(pctl->variant & func->variant)) + if (func->variant && !(variant & func->variant)) continue; func_item = sunxi_pinctrl_find_function_by_name(pctl, @@ -1568,7 +1588,7 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev, pctl->dev = &pdev->dev; pctl->desc = desc; - pctl->variant = flags & SUNXI_PINCTRL_VARIANT_MASK; + pctl->flags = flags; if (flags & SUNXI_PINCTRL_NEW_REG_LAYOUT) { pctl->bank_mem_size = D1_BANK_MEM_SIZE; pctl->pull_regs_offset = D1_PULL_REGS_OFFSET; @@ -1604,8 +1624,9 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev, for (i = 0, pin_idx = 0; i < pctl->desc->npins; i++) { const struct sunxi_desc_pin *pin = pctl->desc->pins + i; + unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; pins[pin_idx++] = pin->pin; diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.h b/drivers/pinctrl/sunxi/pinctrl-sunxi.h index ad26e4de16a85e..0daf7600e2fb01 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.h +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.h @@ -86,6 +86,8 @@ #define SUN4I_FUNC_INPUT 0 #define SUN4I_FUNC_IRQ 6 +#define SUN4I_FUNC_DISABLED_OLD 7 +#define SUN4I_FUNC_DISABLED_NEW 15 #define SUNXI_PINCTRL_VARIANT_MASK GENMASK(7, 0) #define SUNXI_PINCTRL_NEW_REG_LAYOUT BIT(8) @@ -174,7 +176,7 @@ struct sunxi_pinctrl { unsigned *irq_array; raw_spinlock_t lock; struct pinctrl_dev *pctl_dev; - unsigned long variant; + unsigned long flags; u32 bank_mem_size; u32 pull_regs_offset; u32 dlevel_field_width; diff --git a/drivers/platform/olpc/olpc-xo175-ec.c b/drivers/platform/olpc/olpc-xo175-ec.c index fa7b3bda688a63..bee271a4fda1a9 100644 --- a/drivers/platform/olpc/olpc-xo175-ec.c +++ b/drivers/platform/olpc/olpc-xo175-ec.c @@ -482,7 +482,7 @@ static int olpc_xo175_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *resp, dev_dbg(dev, "CMD %x, %zd bytes expected\n", cmd, resp_len); if (inlen > 5) { - dev_err(dev, "command len %zd too big!\n", resp_len); + dev_err(dev, "command len %zd too big!\n", inlen); return -EOVERFLOW; } diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c index 19f82c1d309059..631ffc0978d16d 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.c +++ b/drivers/platform/x86/amd/hsmp/hsmp.c @@ -117,7 +117,7 @@ static int __hsmp_send_message(struct hsmp_socket *sock, struct hsmp_message *ms } if (unlikely(mbox_status == HSMP_STATUS_NOT_READY)) { - dev_err(sock->dev, "Message ID 0x%X failure : SMU tmeout (status = 0x%X)\n", + dev_err(sock->dev, "Message ID 0x%X failure : SMU timeout (status = 0x%X)\n", msg->msg_id, mbox_status); return -ETIMEDOUT; } else if (unlikely(mbox_status == HSMP_ERR_INVALID_MSG)) { diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h index 208f6fe16168c2..569743746347cd 100644 --- a/drivers/platform/x86/asus-armoury.h +++ b/drivers/platform/x86/asus-armoury.h @@ -1080,6 +1080,20 @@ static const struct dmi_system_id power_limits[] = { .requires_fan_curve = true, }, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GA503QM"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 15, + .ppt_pl1_spl_def = 35, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 65, + .ppt_pl2_sppt_max = 80, + }, + }, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "GA503QR"), @@ -1518,6 +1532,35 @@ static const struct dmi_system_id power_limits[] = { }, }, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GZ302EA"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 28, + .ppt_pl1_spl_def = 60, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 32, + .ppt_pl2_sppt_def = 75, + .ppt_pl2_sppt_max = 92, + .ppt_pl3_fppt_min = 45, + .ppt_pl3_fppt_def = 86, + .ppt_pl3_fppt_max = 93, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 28, + .ppt_pl1_spl_def = 45, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 32, + .ppt_pl2_sppt_def = 52, + .ppt_pl2_sppt_max = 92, + .ppt_pl3_fppt_min = 45, + .ppt_pl3_fppt_def = 71, + .ppt_pl3_fppt_max = 93, + }, + }, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "G513I"), @@ -1596,6 +1639,40 @@ static const struct dmi_system_id power_limits[] = { .requires_fan_curve = true, }, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "G614FP"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 30, + .ppt_pl1_spl_max = 120, + .ppt_pl2_sppt_min = 65, + .ppt_pl2_sppt_def = 140, + .ppt_pl2_sppt_max = 165, + .ppt_pl3_fppt_min = 65, + .ppt_pl3_fppt_def = 140, + .ppt_pl3_fppt_max = 165, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + .nv_dynamic_boost_min = 5, + .nv_dynamic_boost_max = 15, + .nv_tgp_min = 50, + .nv_tgp_max = 100, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 25, + .ppt_pl1_spl_max = 65, + .ppt_pl2_sppt_min = 25, + .ppt_pl2_sppt_max = 65, + .ppt_pl3_fppt_min = 35, + .ppt_pl3_fppt_max = 75, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + .requires_fan_curve = true, + }, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "G614J"), diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index a38a65f5c550d0..b4677c5bba5b44 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -548,7 +548,7 @@ static const struct dmi_system_id asus_quirks[] = { .callback = dmi_matched, .ident = "ASUS ROG Z13", .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_SYS_VENDOR, "ASUS"), DMI_MATCH(DMI_PRODUCT_NAME, "ROG Flow Z13"), }, .driver_data = &quirk_asus_z13, diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index 68ede7e5757a91..988a0acc9622b8 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -120,6 +120,13 @@ static const struct thermal_profile_params omen_v1_thermal_params = { .ec_tp_offset = HP_VICTUS_S_EC_THERMAL_PROFILE_OFFSET, }; +static const struct thermal_profile_params omen_v1_legacy_thermal_params = { + .performance = HP_OMEN_V1_THERMAL_PROFILE_PERFORMANCE, + .balanced = HP_OMEN_V1_THERMAL_PROFILE_DEFAULT, + .low_power = HP_OMEN_V1_THERMAL_PROFILE_DEFAULT, + .ec_tp_offset = HP_OMEN_EC_THERMAL_PROFILE_OFFSET, +}; + /* * A generic pointer for the currently-active board's thermal profile * parameters. @@ -175,6 +182,10 @@ static const char * const victus_thermal_profile_boards[] = { /* DMI Board names of Victus 16-r and Victus 16-s laptops */ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst = { + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8A4D") }, + .driver_data = (void *)&omen_v1_legacy_thermal_params, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BAB") }, .driver_data = (void *)&omen_v1_thermal_params, @@ -183,6 +194,10 @@ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BBE") }, .driver_data = (void *)&victus_s_thermal_params, }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BCA") }, + .driver_data = (void *)&omen_v1_thermal_params, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BCD") }, .driver_data = (void *)&omen_v1_thermal_params, @@ -195,6 +210,10 @@ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BD5") }, .driver_data = (void *)&victus_s_thermal_params, }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8C76") }, + .driver_data = (void *)&omen_v1_thermal_params, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8C78") }, .driver_data = (void *)&omen_v1_thermal_params, diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 95c405c8bac02a..2ddd8af8c1ce9e 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -438,6 +438,14 @@ static int intel_hid_pl_suspend_handler(struct device *device) return 0; } +static int intel_hid_pl_freeze_handler(struct device *device) +{ + struct intel_hid_priv *priv = dev_get_drvdata(device); + + priv->wakeup_mode = false; + return intel_hid_pl_suspend_handler(device); +} + static int intel_hid_pl_resume_handler(struct device *device) { intel_hid_pm_complete(device); @@ -452,7 +460,7 @@ static int intel_hid_pl_resume_handler(struct device *device) static const struct dev_pm_ops intel_hid_pl_pm_ops = { .prepare = intel_hid_pm_prepare, .complete = intel_hid_pm_complete, - .freeze = intel_hid_pl_suspend_handler, + .freeze = intel_hid_pl_freeze_handler, .thaw = intel_hid_pl_resume_handler, .restore = intel_hid_pl_resume_handler, .suspend = intel_hid_pl_suspend_handler, diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c index b8cdaa233ea9ce..e238c3105c78e7 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c @@ -558,6 +558,9 @@ static bool disable_dynamic_sst_features(void) { u64 value; + if (!static_cpu_has(X86_FEATURE_HWP)) + return true; + rdmsrq(MSR_PM_ENABLE, value); return !(value & 0x1); } @@ -869,7 +872,7 @@ static int isst_if_get_perf_level(void __user *argp) _read_pp_info("current_level", perf_level.current_level, SST_PP_STATUS_OFFSET, SST_PP_LEVEL_START, SST_PP_LEVEL_WIDTH, SST_MUL_FACTOR_NONE) _read_pp_info("locked", perf_level.locked, SST_PP_STATUS_OFFSET, - SST_PP_LOCK_START, SST_PP_LEVEL_WIDTH, SST_MUL_FACTOR_NONE) + SST_PP_LOCK_START, SST_PP_LOCK_WIDTH, SST_MUL_FACTOR_NONE) _read_pp_info("feature_state", perf_level.feature_state, SST_PP_STATUS_OFFSET, SST_PP_FEATURE_STATE_START, SST_PP_FEATURE_STATE_WIDTH, SST_MUL_FACTOR_NONE) perf_level.enabled = !!(power_domain_info->sst_header.cap_mask & BIT(1)); diff --git a/drivers/platform/x86/lenovo/wmi-gamezone.c b/drivers/platform/x86/lenovo/wmi-gamezone.c index 381836d29a9646..c7fe7e3c9f1791 100644 --- a/drivers/platform/x86/lenovo/wmi-gamezone.c +++ b/drivers/platform/x86/lenovo/wmi-gamezone.c @@ -31,8 +31,6 @@ #define LWMI_GZ_METHOD_ID_SMARTFAN_SET 44 #define LWMI_GZ_METHOD_ID_SMARTFAN_GET 45 -static BLOCKING_NOTIFIER_HEAD(gz_chain_head); - struct lwmi_gz_priv { enum thermal_mode current_mode; struct notifier_block event_nb; diff --git a/drivers/pmdomain/bcm/bcm2835-power.c b/drivers/pmdomain/bcm/bcm2835-power.c index 0450202bbee251..eee87a30053258 100644 --- a/drivers/pmdomain/bcm/bcm2835-power.c +++ b/drivers/pmdomain/bcm/bcm2835-power.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -153,7 +154,6 @@ struct bcm2835_power { static int bcm2835_asb_control(struct bcm2835_power *power, u32 reg, bool enable) { void __iomem *base = power->asb; - u64 start; u32 val; switch (reg) { @@ -166,8 +166,6 @@ static int bcm2835_asb_control(struct bcm2835_power *power, u32 reg, bool enable break; } - start = ktime_get_ns(); - /* Enable the module's async AXI bridges. */ if (enable) { val = readl(base + reg) & ~ASB_REQ_STOP; @@ -176,11 +174,9 @@ static int bcm2835_asb_control(struct bcm2835_power *power, u32 reg, bool enable } writel(PM_PASSWORD | val, base + reg); - while (!!(readl(base + reg) & ASB_ACK) == enable) { - cpu_relax(); - if (ktime_get_ns() - start >= 1000) - return -ETIMEDOUT; - } + if (readl_poll_timeout_atomic(base + reg, val, + !!(val & ASB_ACK) != enable, 0, 5)) + return -ETIMEDOUT; return 0; } diff --git a/drivers/pmdomain/mediatek/mtk-pm-domains.c b/drivers/pmdomain/mediatek/mtk-pm-domains.c index f64f24d520ddd7..e2800aa1bc597f 100644 --- a/drivers/pmdomain/mediatek/mtk-pm-domains.c +++ b/drivers/pmdomain/mediatek/mtk-pm-domains.c @@ -1203,7 +1203,7 @@ static int scpsys_probe(struct platform_device *pdev) scpsys->soc_data = soc; scpsys->pd_data.domains = scpsys->domains; - scpsys->pd_data.num_domains = soc->num_domains; + scpsys->pd_data.num_domains = num_domains; parent = dev->parent; if (!parent) { diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 1eebc26021871d..0666be6b0e88d3 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -1428,6 +1428,7 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, static int mpam_restore_mbwu_state(void *_ris) { int i; + u64 val; struct mon_read mwbu_arg; struct mpam_msc_ris *ris = _ris; struct mpam_class *class = ris->vmsc->comp->class; @@ -1437,6 +1438,7 @@ static int mpam_restore_mbwu_state(void *_ris) mwbu_arg.ris = ris; mwbu_arg.ctx = &ris->mbwu_state[i].cfg; mwbu_arg.type = mpam_msmon_choose_counter(class); + mwbu_arg.val = &val; __ris_msmon_read(&mwbu_arg); } diff --git a/drivers/resctrl/test_mpam_devices.c b/drivers/resctrl/test_mpam_devices.c index 3e8d564a0c6474..31871f51972999 100644 --- a/drivers/resctrl/test_mpam_devices.c +++ b/drivers/resctrl/test_mpam_devices.c @@ -322,9 +322,17 @@ static void test_mpam_enable_merge_features(struct kunit *test) mutex_unlock(&mpam_list_lock); } +static void __test_mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) +{ + /* Avoid warnings when running with CONFIG_DEBUG_PREEMPT */ + guard(preempt)(); + + mpam_reset_msc_bitmap(msc, reg, wd); +} + static void test_mpam_reset_msc_bitmap(struct kunit *test) { - char __iomem *buf = kunit_kzalloc(test, SZ_16K, GFP_KERNEL); + char __iomem *buf = (__force char __iomem *)kunit_kzalloc(test, SZ_16K, GFP_KERNEL); struct mpam_msc fake_msc = {}; u32 *test_result; @@ -339,33 +347,33 @@ static void test_mpam_reset_msc_bitmap(struct kunit *test) mutex_init(&fake_msc.part_sel_lock); mutex_lock(&fake_msc.part_sel_lock); - test_result = (u32 *)(buf + MPAMCFG_CPBM); + test_result = (__force u32 *)(buf + MPAMCFG_CPBM); - mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 0); + __test_mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 0); KUNIT_EXPECT_EQ(test, test_result[0], 0); KUNIT_EXPECT_EQ(test, test_result[1], 0); test_result[0] = 0; test_result[1] = 0; - mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 1); + __test_mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 1); KUNIT_EXPECT_EQ(test, test_result[0], 1); KUNIT_EXPECT_EQ(test, test_result[1], 0); test_result[0] = 0; test_result[1] = 0; - mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 16); + __test_mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 16); KUNIT_EXPECT_EQ(test, test_result[0], 0xffff); KUNIT_EXPECT_EQ(test, test_result[1], 0); test_result[0] = 0; test_result[1] = 0; - mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 32); + __test_mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 32); KUNIT_EXPECT_EQ(test, test_result[0], 0xffffffff); KUNIT_EXPECT_EQ(test, test_result[1], 0); test_result[0] = 0; test_result[1] = 0; - mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 33); + __test_mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 33); KUNIT_EXPECT_EQ(test, test_result[0], 0xffffffff); KUNIT_EXPECT_EQ(test, test_result[1], 1); test_result[0] = 0; diff --git a/drivers/reset/reset-rzg2l-usbphy-ctrl.c b/drivers/reset/reset-rzg2l-usbphy-ctrl.c index 32bc268c9149e3..05dd9b4a02df5c 100644 --- a/drivers/reset/reset-rzg2l-usbphy-ctrl.c +++ b/drivers/reset/reset-rzg2l-usbphy-ctrl.c @@ -136,6 +136,9 @@ static int rzg2l_usbphy_ctrl_set_pwrrdy(struct regmap_field *pwrrdy, { u32 val = power_on ? 0 : 1; + if (!pwrrdy) + return 0; + /* The initialization path guarantees that the mask is 1 bit long. */ return regmap_field_update_bits(pwrrdy, 1, val); } diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index 9aa7218b4e8d2b..1ed6be6e85d2cf 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -1535,10 +1535,8 @@ static int of_qcom_slim_ngd_register(struct device *parent, ngd->id = id; ngd->pdev->dev.parent = parent; - ret = driver_set_override(&ngd->pdev->dev, - &ngd->pdev->driver_override, - QCOM_SLIM_NGD_DRV_NAME, - strlen(QCOM_SLIM_NGD_DRV_NAME)); + ret = device_set_driver_override(&ngd->pdev->dev, + QCOM_SLIM_NGD_DRV_NAME); if (ret) { platform_device_put(ngd->pdev); kfree(ngd); diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 411381f1a1c4b5..9ddafcb18f1c73 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -1827,6 +1827,8 @@ EXPORT_SYMBOL(qman_create_fq); void qman_destroy_fq(struct qman_fq *fq) { + int leaked; + /* * We don't need to lock the FQ as it is a pre-condition that the FQ be * quiesced. Instead, run some checks. @@ -1834,11 +1836,29 @@ void qman_destroy_fq(struct qman_fq *fq) switch (fq->state) { case qman_fq_state_parked: case qman_fq_state_oos: - if (fq_isset(fq, QMAN_FQ_FLAG_DYNAMIC_FQID)) - qman_release_fqid(fq->fqid); + /* + * There's a race condition here on releasing the fqid, + * setting the fq_table to NULL, and freeing the fqid. + * To prevent it, this order should be respected: + */ + if (fq_isset(fq, QMAN_FQ_FLAG_DYNAMIC_FQID)) { + leaked = qman_shutdown_fq(fq->fqid); + if (leaked) + pr_debug("FQID %d leaked\n", fq->fqid); + } DPAA_ASSERT(fq_table[fq->idx]); fq_table[fq->idx] = NULL; + + if (fq_isset(fq, QMAN_FQ_FLAG_DYNAMIC_FQID) && !leaked) { + /* + * fq_table[fq->idx] should be set to null before + * freeing fq->fqid otherwise it could by allocated by + * qman_alloc_fqid() while still being !NULL + */ + smp_wmb(); + gen_pool_free(qm_fqalloc, fq->fqid | DPAA_GENALLOC_OFF, 1); + } return; default: break; diff --git a/drivers/soc/fsl/qe/qmc.c b/drivers/soc/fsl/qe/qmc.c index c4587b32a59bfa..672adff8e35f4c 100644 --- a/drivers/soc/fsl/qe/qmc.c +++ b/drivers/soc/fsl/qe/qmc.c @@ -1790,8 +1790,8 @@ static int qmc_qe_init_resources(struct qmc *qmc, struct platform_device *pdev) return -EINVAL; qmc->dpram_offset = res->start - qe_muram_dma(qe_muram_addr(0)); qmc->dpram = devm_ioremap_resource(qmc->dev, res); - if (IS_ERR(qmc->scc_pram)) - return PTR_ERR(qmc->scc_pram); + if (IS_ERR(qmc->dpram)) + return PTR_ERR(qmc->dpram); return 0; } diff --git a/drivers/soc/microchip/mpfs-sys-controller.c b/drivers/soc/microchip/mpfs-sys-controller.c index 8e7ae3cb92fff9..10b2fc39da66cf 100644 --- a/drivers/soc/microchip/mpfs-sys-controller.c +++ b/drivers/soc/microchip/mpfs-sys-controller.c @@ -142,8 +142,10 @@ static int mpfs_sys_controller_probe(struct platform_device *pdev) sys_controller->flash = of_get_mtd_device_by_node(np); of_node_put(np); - if (IS_ERR(sys_controller->flash)) - return dev_err_probe(dev, PTR_ERR(sys_controller->flash), "Failed to get flash\n"); + if (IS_ERR(sys_controller->flash)) { + ret = dev_err_probe(dev, PTR_ERR(sys_controller->flash), "Failed to get flash\n"); + goto out_free; + } no_flash: sys_controller->client.dev = dev; @@ -155,8 +157,7 @@ static int mpfs_sys_controller_probe(struct platform_device *pdev) if (IS_ERR(sys_controller->chan)) { ret = dev_err_probe(dev, PTR_ERR(sys_controller->chan), "Failed to get mbox channel\n"); - kfree(sys_controller); - return ret; + goto out_free; } init_completion(&sys_controller->c); @@ -174,6 +175,10 @@ static int mpfs_sys_controller_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Registered MPFS system controller\n"); return 0; + +out_free: + kfree(sys_controller); + return ret; } static void mpfs_sys_controller_remove(struct platform_device *pdev) diff --git a/drivers/soc/rockchip/grf.c b/drivers/soc/rockchip/grf.c index 04937c40da471e..b459607c118aa0 100644 --- a/drivers/soc/rockchip/grf.c +++ b/drivers/soc/rockchip/grf.c @@ -231,6 +231,7 @@ static int __init rockchip_grf_init(void) grf = syscon_node_to_regmap(np); if (IS_ERR(grf)) { pr_err("%s: could not get grf syscon\n", __func__); + of_node_put(np); return PTR_ERR(grf); } diff --git a/drivers/spi/spi-amlogic-spifc-a4.c b/drivers/spi/spi-amlogic-spifc-a4.c index 3956869cfec119..1aabafa36e48ed 100644 --- a/drivers/spi/spi-amlogic-spifc-a4.c +++ b/drivers/spi/spi-amlogic-spifc-a4.c @@ -1083,14 +1083,6 @@ static int aml_sfc_clk_init(struct aml_sfc *sfc) return clk_set_rate(sfc->core_clk, SFC_BUS_DEFAULT_CLK); } -static int aml_sfc_disable_clk(struct aml_sfc *sfc) -{ - clk_disable_unprepare(sfc->core_clk); - clk_disable_unprepare(sfc->gate_clk); - - return 0; -} - static int aml_sfc_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -1141,16 +1133,12 @@ static int aml_sfc_probe(struct platform_device *pdev) /* Enable Amlogic flash controller spi mode */ ret = regmap_write(sfc->regmap_base, SFC_SPI_CFG, SPI_MODE_EN); - if (ret) { - dev_err(dev, "failed to enable SPI mode\n"); - goto err_out; - } + if (ret) + return dev_err_probe(dev, ret, "failed to enable SPI mode\n"); ret = dma_set_mask(sfc->dev, DMA_BIT_MASK(32)); - if (ret) { - dev_err(sfc->dev, "failed to set dma mask\n"); - goto err_out; - } + if (ret) + return dev_err_probe(sfc->dev, ret, "failed to set dma mask\n"); sfc->ecc_eng.dev = &pdev->dev; sfc->ecc_eng.integration = NAND_ECC_ENGINE_INTEGRATION_PIPELINED; @@ -1158,10 +1146,8 @@ static int aml_sfc_probe(struct platform_device *pdev) sfc->ecc_eng.priv = sfc; ret = nand_ecc_register_on_host_hw_engine(&sfc->ecc_eng); - if (ret) { - dev_err(&pdev->dev, "failed to register Aml host ecc engine.\n"); - goto err_out; - } + if (ret) + return dev_err_probe(&pdev->dev, ret, "failed to register Aml host ecc engine.\n"); ret = of_property_read_u32(np, "amlogic,rx-adj", &val); if (!ret) @@ -1177,24 +1163,7 @@ static int aml_sfc_probe(struct platform_device *pdev) ctrl->min_speed_hz = SFC_MIN_FREQUENCY; ctrl->num_chipselect = SFC_MAX_CS_NUM; - ret = devm_spi_register_controller(dev, ctrl); - if (ret) - goto err_out; - - return 0; - -err_out: - aml_sfc_disable_clk(sfc); - - return ret; -} - -static void aml_sfc_remove(struct platform_device *pdev) -{ - struct spi_controller *ctlr = platform_get_drvdata(pdev); - struct aml_sfc *sfc = spi_controller_get_devdata(ctlr); - - aml_sfc_disable_clk(sfc); + return devm_spi_register_controller(dev, ctrl); } static const struct of_device_id aml_sfc_of_match[] = { @@ -1212,7 +1181,6 @@ static struct platform_driver aml_sfc_driver = { .of_match_table = aml_sfc_of_match, }, .probe = aml_sfc_probe, - .remove = aml_sfc_remove, }; module_platform_driver(aml_sfc_driver); diff --git a/drivers/spi/spi-amlogic-spisg.c b/drivers/spi/spi-amlogic-spisg.c index 1509df2b17aed4..9d568e385f052a 100644 --- a/drivers/spi/spi-amlogic-spisg.c +++ b/drivers/spi/spi-amlogic-spisg.c @@ -729,9 +729,9 @@ static int aml_spisg_probe(struct platform_device *pdev) }; if (of_property_read_bool(dev->of_node, "spi-slave")) - ctlr = spi_alloc_target(dev, sizeof(*spisg)); + ctlr = devm_spi_alloc_target(dev, sizeof(*spisg)); else - ctlr = spi_alloc_host(dev, sizeof(*spisg)); + ctlr = devm_spi_alloc_host(dev, sizeof(*spisg)); if (!ctlr) return -ENOMEM; @@ -750,10 +750,8 @@ static int aml_spisg_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(spisg->map), "regmap init failed\n"); irq = platform_get_irq(pdev, 0); - if (irq < 0) { - ret = irq; - goto out_controller; - } + if (irq < 0) + return irq; ret = device_reset_optional(dev); if (ret) @@ -817,8 +815,6 @@ static int aml_spisg_probe(struct platform_device *pdev) if (spisg->core) clk_disable_unprepare(spisg->core); clk_disable_unprepare(spisg->pclk); -out_controller: - spi_controller_put(ctlr); return ret; } diff --git a/drivers/spi/spi-axiado.c b/drivers/spi/spi-axiado.c index 8cea81432c5bac..8ddcd27def22b8 100644 --- a/drivers/spi/spi-axiado.c +++ b/drivers/spi/spi-axiado.c @@ -765,30 +765,22 @@ static int ax_spi_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ctlr); xspi->regs = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(xspi->regs)) { - ret = PTR_ERR(xspi->regs); - goto remove_ctlr; - } + if (IS_ERR(xspi->regs)) + return PTR_ERR(xspi->regs); xspi->pclk = devm_clk_get(&pdev->dev, "pclk"); - if (IS_ERR(xspi->pclk)) { - dev_err(&pdev->dev, "pclk clock not found.\n"); - ret = PTR_ERR(xspi->pclk); - goto remove_ctlr; - } + if (IS_ERR(xspi->pclk)) + return dev_err_probe(&pdev->dev, PTR_ERR(xspi->pclk), + "pclk clock not found.\n"); xspi->ref_clk = devm_clk_get(&pdev->dev, "ref"); - if (IS_ERR(xspi->ref_clk)) { - dev_err(&pdev->dev, "ref clock not found.\n"); - ret = PTR_ERR(xspi->ref_clk); - goto remove_ctlr; - } + if (IS_ERR(xspi->ref_clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(xspi->ref_clk), + "ref clock not found.\n"); ret = clk_prepare_enable(xspi->pclk); - if (ret) { - dev_err(&pdev->dev, "Unable to enable APB clock.\n"); - goto remove_ctlr; - } + if (ret) + return dev_err_probe(&pdev->dev, ret, "Unable to enable APB clock.\n"); ret = clk_prepare_enable(xspi->ref_clk); if (ret) { @@ -869,8 +861,7 @@ static int ax_spi_probe(struct platform_device *pdev) clk_disable_unprepare(xspi->ref_clk); clk_dis_apb: clk_disable_unprepare(xspi->pclk); -remove_ctlr: - spi_controller_put(ctlr); + return ret; } diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 43ce47f2454cf2..d5fb0edc8e0c8b 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -359,9 +359,9 @@ static int setup_fifo_params(struct spi_device *spi_slv, writel((spi_slv->mode & SPI_LOOP) ? LOOPBACK_ENABLE : 0, se->base + SE_SPI_LOOPBACK); if (cs_changed) writel(chipselect, se->base + SE_SPI_DEMUX_SEL); - if (mode_changed & SE_SPI_CPHA) + if (mode_changed & SPI_CPHA) writel((spi_slv->mode & SPI_CPHA) ? CPHA : 0, se->base + SE_SPI_CPHA); - if (mode_changed & SE_SPI_CPOL) + if (mode_changed & SPI_CPOL) writel((spi_slv->mode & SPI_CPOL) ? CPOL : 0, se->base + SE_SPI_CPOL); if ((mode_changed & SPI_CS_HIGH) || (cs_changed && (spi_slv->mode & SPI_CS_HIGH))) writel((spi_slv->mode & SPI_CS_HIGH) ? BIT(chipselect) : 0, se->base + SE_SPI_DEMUX_OUTPUT_INV); @@ -906,10 +906,13 @@ static irqreturn_t geni_spi_isr(int irq, void *data) struct spi_controller *spi = data; struct spi_geni_master *mas = spi_controller_get_devdata(spi); struct geni_se *se = &mas->se; - u32 m_irq; + u32 m_irq, dma_tx_status, dma_rx_status; m_irq = readl(se->base + SE_GENI_M_IRQ_STATUS); - if (!m_irq) + dma_tx_status = readl_relaxed(se->base + SE_DMA_TX_IRQ_STAT); + dma_rx_status = readl_relaxed(se->base + SE_DMA_RX_IRQ_STAT); + + if (!m_irq && !dma_tx_status && !dma_rx_status) return IRQ_NONE; if (m_irq & (M_CMD_OVERRUN_EN | M_ILLEGAL_CMD_EN | M_CMD_FAILURE_EN | @@ -957,8 +960,6 @@ static irqreturn_t geni_spi_isr(int irq, void *data) } } else if (mas->cur_xfer_mode == GENI_SE_DMA) { const struct spi_transfer *xfer = mas->cur_xfer; - u32 dma_tx_status = readl_relaxed(se->base + SE_DMA_TX_IRQ_STAT); - u32 dma_rx_status = readl_relaxed(se->base + SE_DMA_RX_IRQ_STAT); if (dma_tx_status) writel(dma_tx_status, se->base + SE_DMA_TX_IRQ_CLR); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 61f7bde8c7fbbc..53dee314d76ae9 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3049,6 +3049,8 @@ static void spi_controller_release(struct device *dev) struct spi_controller *ctlr; ctlr = container_of(dev, struct spi_controller, dev); + + free_percpu(ctlr->pcpu_statistics); kfree(ctlr); } @@ -3192,6 +3194,12 @@ struct spi_controller *__spi_alloc_controller(struct device *dev, if (!ctlr) return NULL; + ctlr->pcpu_statistics = spi_alloc_pcpu_stats(NULL); + if (!ctlr->pcpu_statistics) { + kfree(ctlr); + return NULL; + } + device_initialize(&ctlr->dev); INIT_LIST_HEAD(&ctlr->queue); spin_lock_init(&ctlr->queue_lock); @@ -3480,17 +3488,8 @@ int spi_register_controller(struct spi_controller *ctlr) dev_info(dev, "controller is unqueued, this is deprecated\n"); } else if (ctlr->transfer_one || ctlr->transfer_one_message) { status = spi_controller_initialize_queue(ctlr); - if (status) { - device_del(&ctlr->dev); - goto free_bus_id; - } - } - /* Add statistics */ - ctlr->pcpu_statistics = spi_alloc_pcpu_stats(dev); - if (!ctlr->pcpu_statistics) { - dev_err(dev, "Error allocating per-cpu statistics\n"); - status = -ENOMEM; - goto destroy_queue; + if (status) + goto del_ctrl; } mutex_lock(&board_lock); @@ -3504,8 +3503,8 @@ int spi_register_controller(struct spi_controller *ctlr) acpi_register_spi_devices(ctlr); return status; -destroy_queue: - spi_destroy_queue(ctlr); +del_ctrl: + device_del(&ctlr->dev); free_bus_id: mutex_lock(&board_lock); idr_remove(&spi_controller_idr, ctlr->bus_num); diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 6c5b9e352e5e01..e9ea9f80cfd9ac 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -23,29 +23,11 @@ struct tee_shm_dma_mem { struct page *page; }; -static void shm_put_kernel_pages(struct page **pages, size_t page_count) -{ - size_t n; - - for (n = 0; n < page_count; n++) - put_page(pages[n]); -} - -static void shm_get_kernel_pages(struct page **pages, size_t page_count) -{ - size_t n; - - for (n = 0; n < page_count; n++) - get_page(pages[n]); -} - static void release_registered_pages(struct tee_shm *shm) { if (shm->pages) { if (shm->flags & TEE_SHM_USER_MAPPED) unpin_user_pages(shm->pages, shm->num_pages); - else - shm_put_kernel_pages(shm->pages, shm->num_pages); kfree(shm->pages); } @@ -477,13 +459,6 @@ register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags, goto err_put_shm_pages; } - /* - * iov_iter_extract_kvec_pages does not get reference on the pages, - * get a reference on them. - */ - if (iov_iter_is_kvec(iter)) - shm_get_kernel_pages(shm->pages, num_pages); - shm->offset = off; shm->size = len; shm->num_pages = num_pages; @@ -499,8 +474,6 @@ register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags, err_put_shm_pages: if (!iov_iter_is_kvec(iter)) unpin_user_pages(shm->pages, shm->num_pages); - else - shm_put_kernel_pages(shm->pages, shm->num_pages); err_free_shm_pages: kfree(shm->pages); err_free_shm: diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c index 49ff3bae727109..91f291627132aa 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c @@ -176,15 +176,21 @@ static inline void write_soc_slider(struct proc_thermal_device *proc_priv, u64 v static void set_soc_power_profile(struct proc_thermal_device *proc_priv, int slider) { + u8 offset; u64 val; val = read_soc_slider(proc_priv); val &= ~SLIDER_MASK; val |= FIELD_PREP(SLIDER_MASK, slider) | BIT(SLIDER_ENABLE_BIT); + if (slider == SOC_SLIDER_VALUE_MINIMUM || slider == SOC_SLIDER_VALUE_MAXIMUM) + offset = 0; + else + offset = slider_offset; + /* Set the slider offset from module params */ val &= ~SLIDER_OFFSET_MASK; - val |= FIELD_PREP(SLIDER_OFFSET_MASK, slider_offset); + val |= FIELD_PREP(SLIDER_OFFSET_MASK, offset); write_soc_slider(proc_priv, val); } diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 8caecfc85d933c..77fe0588fd6bd5 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -175,7 +175,9 @@ static unsigned int __maybe_unused serial_icr_read(struct uart_8250_port *up, return value; } +void serial8250_clear_fifos(struct uart_8250_port *p); void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p); +void serial8250_fifo_wait_for_lsr_thre(struct uart_8250_port *up, unsigned int count); void serial8250_rpm_get(struct uart_8250_port *p); void serial8250_rpm_put(struct uart_8250_port *p); @@ -400,6 +402,26 @@ static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) return dma && dma->tx_running; } + +static inline void serial8250_tx_dma_pause(struct uart_8250_port *p) +{ + struct uart_8250_dma *dma = p->dma; + + if (!dma->tx_running) + return; + + dmaengine_pause(dma->txchan); +} + +static inline void serial8250_tx_dma_resume(struct uart_8250_port *p) +{ + struct uart_8250_dma *dma = p->dma; + + if (!dma->tx_running) + return; + + dmaengine_resume(dma->txchan); +} #else static inline int serial8250_tx_dma(struct uart_8250_port *p) { @@ -421,6 +443,9 @@ static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) { return false; } + +static inline void serial8250_tx_dma_pause(struct uart_8250_port *p) { } +static inline void serial8250_tx_dma_resume(struct uart_8250_port *p) { } #endif static inline int ns16550a_goto_highspeed(struct uart_8250_port *up) diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c index bdd26c9f34bdf2..3b6452e759d5b5 100644 --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c @@ -162,7 +162,22 @@ void serial8250_tx_dma_flush(struct uart_8250_port *p) */ dma->tx_size = 0; + /* + * We can't use `dmaengine_terminate_sync` because `uart_flush_buffer` is + * holding the uart port spinlock. + */ dmaengine_terminate_async(dma->txchan); + + /* + * The callback might or might not run. If it doesn't run, we need to ensure + * that `tx_running` is cleared so that we can schedule new transactions. + * If it does run, then the zombie callback will clear `tx_running` again + * and perform a no-op since `tx_size` was cleared above. + * + * In either case, we ASSUME the DMA transaction will terminate before we + * issue a new `serial8250_tx_dma`. + */ + dma->tx_running = 0; } int serial8250_rx_dma(struct uart_8250_port *p) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index db73b2ae17fa33..94beadb4024df3 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -9,10 +9,14 @@ * LCR is written whilst busy. If it is, then a busy detect interrupt is * raised, the LCR needs to be rewritten and the uart status register read. */ +#include +#include +#include #include #include #include #include +#include #include #include #include @@ -40,8 +44,12 @@ #define RZN1_UART_RDMACR 0x110 /* DMA Control Register Receive Mode */ /* DesignWare specific register fields */ +#define DW_UART_IIR_IID GENMASK(3, 0) + #define DW_UART_MCR_SIRE BIT(6) +#define DW_UART_USR_BUSY BIT(0) + /* Renesas specific register fields */ #define RZN1_UART_xDMACR_DMA_EN BIT(0) #define RZN1_UART_xDMACR_1_WORD_BURST (0 << 1) @@ -56,6 +64,13 @@ #define DW_UART_QUIRK_IS_DMA_FC BIT(3) #define DW_UART_QUIRK_APMC0D08 BIT(4) #define DW_UART_QUIRK_CPR_VALUE BIT(5) +#define DW_UART_QUIRK_IER_KICK BIT(6) + +/* + * Number of consecutive IIR_NO_INT interrupts required to trigger interrupt + * storm prevention code. + */ +#define DW_UART_QUIRK_IER_KICK_THRES 4 struct dw8250_platform_data { u8 usr_reg; @@ -77,6 +92,9 @@ struct dw8250_data { unsigned int skip_autocfg:1; unsigned int uart_16550_compatible:1; + unsigned int in_idle:1; + + u8 no_int_count; }; static inline struct dw8250_data *to_dw8250_data(struct dw8250_port_data *data) @@ -107,78 +125,167 @@ static inline u32 dw8250_modify_msr(struct uart_port *p, unsigned int offset, u3 return value; } +static void dw8250_idle_exit(struct uart_port *p) +{ + struct dw8250_data *d = to_dw8250_data(p->private_data); + struct uart_8250_port *up = up_to_u8250p(p); + + if (d->uart_16550_compatible) + return; + + if (up->capabilities & UART_CAP_FIFO) + serial_port_out(p, UART_FCR, up->fcr); + serial_port_out(p, UART_MCR, up->mcr); + serial_port_out(p, UART_IER, up->ier); + + /* DMA Rx is restarted by IRQ handler as needed. */ + if (up->dma) + serial8250_tx_dma_resume(up); + + d->in_idle = 0; +} + /* - * This function is being called as part of the uart_port::serial_out() - * routine. Hence, it must not call serial_port_out() or serial_out() - * against the modified registers here, i.e. LCR. + * Ensure BUSY is not asserted. If DW UART is configured with + * !uart_16550_compatible, the writes to LCR, DLL, and DLH fail while + * BUSY is asserted. + * + * Context: port's lock must be held */ -static void dw8250_force_idle(struct uart_port *p) +static int dw8250_idle_enter(struct uart_port *p) { + struct dw8250_data *d = to_dw8250_data(p->private_data); + unsigned int usr_reg = d->pdata ? d->pdata->usr_reg : DW_UART_USR; struct uart_8250_port *up = up_to_u8250p(p); - unsigned int lsr; + int retries; + u32 lsr; - /* - * The following call currently performs serial_out() - * against the FCR register. Because it differs to LCR - * there will be no infinite loop, but if it ever gets - * modified, we might need a new custom version of it - * that avoids infinite recursion. - */ - serial8250_clear_and_reinit_fifos(up); + lockdep_assert_held_once(&p->lock); + + if (d->uart_16550_compatible) + return 0; + + d->in_idle = 1; + + /* Prevent triggering interrupt from RBR filling */ + serial_port_out(p, UART_IER, 0); + + if (up->dma) { + serial8250_rx_dma_flush(up); + if (serial8250_tx_dma_running(up)) + serial8250_tx_dma_pause(up); + } /* - * With PSLVERR_RESP_EN parameter set to 1, the device generates an - * error response when an attempt to read an empty RBR with FIFO - * enabled. + * Wait until Tx becomes empty + one extra frame time to ensure all bits + * have been sent on the wire. + * + * FIXME: frame_time delay is too long with very low baudrates. */ - if (up->fcr & UART_FCR_ENABLE_FIFO) { - lsr = serial_port_in(p, UART_LSR); - if (!(lsr & UART_LSR_DR)) - return; + serial8250_fifo_wait_for_lsr_thre(up, p->fifosize); + ndelay(p->frame_time); + + serial_port_out(p, UART_MCR, up->mcr | UART_MCR_LOOP); + + retries = 4; /* Arbitrary limit, 2 was always enough in tests */ + do { + serial8250_clear_fifos(up); + if (!(serial_port_in(p, usr_reg) & DW_UART_USR_BUSY)) + break; + /* FIXME: frame_time delay is too long with very low baudrates. */ + ndelay(p->frame_time); + } while (--retries); + + lsr = serial_lsr_in(up); + if (lsr & UART_LSR_DR) { + serial_port_in(p, UART_RX); + up->lsr_saved_flags = 0; + } + + /* Now guaranteed to have BUSY deasserted? Just sanity check */ + if (serial_port_in(p, usr_reg) & DW_UART_USR_BUSY) { + dw8250_idle_exit(p); + return -EBUSY; } - serial_port_in(p, UART_RX); + return 0; +} + +static void dw8250_set_divisor(struct uart_port *p, unsigned int baud, + unsigned int quot, unsigned int quot_frac) +{ + struct uart_8250_port *up = up_to_u8250p(p); + int ret; + + ret = dw8250_idle_enter(p); + if (ret < 0) + return; + + serial_port_out(p, UART_LCR, up->lcr | UART_LCR_DLAB); + if (!(serial_port_in(p, UART_LCR) & UART_LCR_DLAB)) + goto idle_failed; + + serial_dl_write(up, quot); + serial_port_out(p, UART_LCR, up->lcr); + +idle_failed: + dw8250_idle_exit(p); } /* * This function is being called as part of the uart_port::serial_out() - * routine. Hence, it must not call serial_port_out() or serial_out() - * against the modified registers here, i.e. LCR. + * routine. Hence, special care must be taken when serial_port_out() or + * serial_out() against the modified registers here, i.e. LCR (d->in_idle is + * used to break recursion loop). */ static void dw8250_check_lcr(struct uart_port *p, unsigned int offset, u32 value) { struct dw8250_data *d = to_dw8250_data(p->private_data); - void __iomem *addr = p->membase + (offset << p->regshift); - int tries = 1000; + u32 lcr; + int ret; if (offset != UART_LCR || d->uart_16550_compatible) return; + lcr = serial_port_in(p, UART_LCR); + /* Make sure LCR write wasn't ignored */ - while (tries--) { - u32 lcr = serial_port_in(p, offset); + if ((value & ~UART_LCR_SPAR) == (lcr & ~UART_LCR_SPAR)) + return; - if ((value & ~UART_LCR_SPAR) == (lcr & ~UART_LCR_SPAR)) - return; + if (d->in_idle) + goto write_err; - dw8250_force_idle(p); + ret = dw8250_idle_enter(p); + if (ret < 0) + goto write_err; -#ifdef CONFIG_64BIT - if (p->type == PORT_OCTEON) - __raw_writeq(value & 0xff, addr); - else -#endif - if (p->iotype == UPIO_MEM32) - writel(value, addr); - else if (p->iotype == UPIO_MEM32BE) - iowrite32be(value, addr); - else - writeb(value, addr); - } + serial_port_out(p, UART_LCR, value); + dw8250_idle_exit(p); + return; + +write_err: /* * FIXME: this deadlocks if port->lock is already held * dev_err(p->dev, "Couldn't set LCR to %d\n", value); */ + return; /* Silences "label at the end of compound statement" */ +} + +/* + * With BUSY, LCR writes can be very expensive (IRQ + complex retry logic). + * If the write does not change the value of the LCR register, skip it entirely. + */ +static bool dw8250_can_skip_reg_write(struct uart_port *p, unsigned int offset, u32 value) +{ + struct dw8250_data *d = to_dw8250_data(p->private_data); + u32 lcr; + + if (offset != UART_LCR || d->uart_16550_compatible) + return false; + + lcr = serial_port_in(p, offset); + return lcr == value; } /* Returns once the transmitter is empty or we run out of retries */ @@ -207,12 +314,18 @@ static void dw8250_tx_wait_empty(struct uart_port *p) static void dw8250_serial_out(struct uart_port *p, unsigned int offset, u32 value) { + if (dw8250_can_skip_reg_write(p, offset, value)) + return; + writeb(value, p->membase + (offset << p->regshift)); dw8250_check_lcr(p, offset, value); } static void dw8250_serial_out38x(struct uart_port *p, unsigned int offset, u32 value) { + if (dw8250_can_skip_reg_write(p, offset, value)) + return; + /* Allow the TX to drain before we reconfigure */ if (offset == UART_LCR) dw8250_tx_wait_empty(p); @@ -237,6 +350,9 @@ static u32 dw8250_serial_inq(struct uart_port *p, unsigned int offset) static void dw8250_serial_outq(struct uart_port *p, unsigned int offset, u32 value) { + if (dw8250_can_skip_reg_write(p, offset, value)) + return; + value &= 0xff; __raw_writeq(value, p->membase + (offset << p->regshift)); /* Read back to ensure register write ordering. */ @@ -248,6 +364,9 @@ static void dw8250_serial_outq(struct uart_port *p, unsigned int offset, u32 val static void dw8250_serial_out32(struct uart_port *p, unsigned int offset, u32 value) { + if (dw8250_can_skip_reg_write(p, offset, value)) + return; + writel(value, p->membase + (offset << p->regshift)); dw8250_check_lcr(p, offset, value); } @@ -261,6 +380,9 @@ static u32 dw8250_serial_in32(struct uart_port *p, unsigned int offset) static void dw8250_serial_out32be(struct uart_port *p, unsigned int offset, u32 value) { + if (dw8250_can_skip_reg_write(p, offset, value)) + return; + iowrite32be(value, p->membase + (offset << p->regshift)); dw8250_check_lcr(p, offset, value); } @@ -272,6 +394,29 @@ static u32 dw8250_serial_in32be(struct uart_port *p, unsigned int offset) return dw8250_modify_msr(p, offset, value); } +/* + * INTC10EE UART can IRQ storm while reporting IIR_NO_INT. Inducing IIR value + * change has been observed to break the storm. + * + * If Tx is empty (THRE asserted), we use here IER_THRI to cause IIR_NO_INT -> + * IIR_THRI transition. + */ +static void dw8250_quirk_ier_kick(struct uart_port *p) +{ + struct uart_8250_port *up = up_to_u8250p(p); + u32 lsr; + + if (up->ier & UART_IER_THRI) + return; + + lsr = serial_lsr_in(up); + if (!(lsr & UART_LSR_THRE)) + return; + + serial_port_out(p, UART_IER, up->ier | UART_IER_THRI); + serial_port_in(p, UART_LCR); /* safe, no side-effects */ + serial_port_out(p, UART_IER, up->ier); +} static int dw8250_handle_irq(struct uart_port *p) { @@ -281,7 +426,31 @@ static int dw8250_handle_irq(struct uart_port *p) bool rx_timeout = (iir & 0x3f) == UART_IIR_RX_TIMEOUT; unsigned int quirks = d->pdata->quirks; unsigned int status; - unsigned long flags; + + guard(uart_port_lock_irqsave)(p); + + switch (FIELD_GET(DW_UART_IIR_IID, iir)) { + case UART_IIR_NO_INT: + if (d->uart_16550_compatible || up->dma) + return 0; + + if (quirks & DW_UART_QUIRK_IER_KICK && + d->no_int_count == (DW_UART_QUIRK_IER_KICK_THRES - 1)) + dw8250_quirk_ier_kick(p); + d->no_int_count = (d->no_int_count + 1) % DW_UART_QUIRK_IER_KICK_THRES; + + return 0; + + case UART_IIR_BUSY: + /* Clear the USR */ + serial_port_in(p, d->pdata->usr_reg); + + d->no_int_count = 0; + + return 1; + } + + d->no_int_count = 0; /* * There are ways to get Designware-based UARTs into a state where @@ -294,20 +463,15 @@ static int dw8250_handle_irq(struct uart_port *p) * so we limit the workaround only to non-DMA mode. */ if (!up->dma && rx_timeout) { - uart_port_lock_irqsave(p, &flags); status = serial_lsr_in(up); if (!(status & (UART_LSR_DR | UART_LSR_BI))) serial_port_in(p, UART_RX); - - uart_port_unlock_irqrestore(p, flags); } /* Manually stop the Rx DMA transfer when acting as flow controller */ if (quirks & DW_UART_QUIRK_IS_DMA_FC && up->dma && up->dma->rx_running && rx_timeout) { - uart_port_lock_irqsave(p, &flags); status = serial_lsr_in(up); - uart_port_unlock_irqrestore(p, flags); if (status & (UART_LSR_DR | UART_LSR_BI)) { dw8250_writel_ext(p, RZN1_UART_RDMACR, 0); @@ -315,17 +479,9 @@ static int dw8250_handle_irq(struct uart_port *p) } } - if (serial8250_handle_irq(p, iir)) - return 1; - - if ((iir & UART_IIR_BUSY) == UART_IIR_BUSY) { - /* Clear the USR */ - serial_port_in(p, d->pdata->usr_reg); - - return 1; - } + serial8250_handle_irq_locked(p, iir); - return 0; + return 1; } static void dw8250_clk_work_cb(struct work_struct *work) @@ -527,6 +683,14 @@ static void dw8250_reset_control_assert(void *data) reset_control_assert(data); } +static void dw8250_shutdown(struct uart_port *port) +{ + struct dw8250_data *d = to_dw8250_data(port->private_data); + + serial8250_do_shutdown(port); + d->no_int_count = 0; +} + static int dw8250_probe(struct platform_device *pdev) { struct uart_8250_port uart = {}, *up = &uart; @@ -545,8 +709,10 @@ static int dw8250_probe(struct platform_device *pdev) p->type = PORT_8250; p->flags = UPF_FIXED_PORT; p->dev = dev; + p->set_ldisc = dw8250_set_ldisc; p->set_termios = dw8250_set_termios; + p->set_divisor = dw8250_set_divisor; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) @@ -654,10 +820,12 @@ static int dw8250_probe(struct platform_device *pdev) dw8250_quirks(p, data); /* If the Busy Functionality is not implemented, don't handle it */ - if (data->uart_16550_compatible) + if (data->uart_16550_compatible) { p->handle_irq = NULL; - else if (data->pdata) + } else if (data->pdata) { p->handle_irq = dw8250_handle_irq; + p->shutdown = dw8250_shutdown; + } dw8250_setup_dma_filter(p, data); @@ -789,6 +957,11 @@ static const struct dw8250_platform_data dw8250_skip_set_rate_data = { .quirks = DW_UART_QUIRK_SKIP_SET_RATE, }; +static const struct dw8250_platform_data dw8250_intc10ee = { + .usr_reg = DW_UART_USR, + .quirks = DW_UART_QUIRK_IER_KICK, +}; + static const struct of_device_id dw8250_of_match[] = { { .compatible = "snps,dw-apb-uart", .data = &dw8250_dw_apb }, { .compatible = "cavium,octeon-3860-uart", .data = &dw8250_octeon_3860_data }, @@ -818,7 +991,7 @@ static const struct acpi_device_id dw8250_acpi_match[] = { { "INT33C5", (kernel_ulong_t)&dw8250_dw_apb }, { "INT3434", (kernel_ulong_t)&dw8250_dw_apb }, { "INT3435", (kernel_ulong_t)&dw8250_dw_apb }, - { "INTC10EE", (kernel_ulong_t)&dw8250_dw_apb }, + { "INTC10EE", (kernel_ulong_t)&dw8250_intc10ee }, { }, }; MODULE_DEVICE_TABLE(acpi, dw8250_acpi_match); @@ -836,6 +1009,7 @@ static struct platform_driver dw8250_platform_driver = { module_platform_driver(dw8250_platform_driver); +MODULE_IMPORT_NS("SERIAL_8250"); MODULE_AUTHOR("Jamie Iles"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Synopsys DesignWare 8250 serial port driver"); diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index aa1ab4da9ff1b5..6cfd1b2af5b770 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -137,6 +137,8 @@ struct serial_private { }; #define PCI_DEVICE_ID_HPE_PCI_SERIAL 0x37e +#define PCIE_VENDOR_ID_ASIX 0x125B +#define PCIE_DEVICE_ID_AX99100 0x9100 static const struct pci_device_id pci_use_msi[] = { { PCI_DEVICE_SUB(PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9900, @@ -149,6 +151,8 @@ static const struct pci_device_id pci_use_msi[] = { 0xA000, 0x1000) }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_HP_3PAR, PCI_DEVICE_ID_HPE_PCI_SERIAL, PCI_ANY_ID, PCI_ANY_ID) }, + { PCI_DEVICE_SUB(PCIE_VENDOR_ID_ASIX, PCIE_DEVICE_ID_AX99100, + 0xA000, 0x1000) }, { } }; @@ -920,6 +924,7 @@ static int pci_netmos_init(struct pci_dev *dev) case PCI_DEVICE_ID_NETMOS_9912: case PCI_DEVICE_ID_NETMOS_9922: case PCI_DEVICE_ID_NETMOS_9900: + case PCIE_DEVICE_ID_AX99100: num_serial = pci_netmos_9900_numports(dev); break; @@ -2544,6 +2549,14 @@ static struct pci_serial_quirk pci_serial_quirks[] = { .init = pci_netmos_init, .setup = pci_netmos_9900_setup, }, + { + .vendor = PCIE_VENDOR_ID_ASIX, + .device = PCI_ANY_ID, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .init = pci_netmos_init, + .setup = pci_netmos_9900_setup, + }, /* * EndRun Technologies */ @@ -6065,6 +6078,10 @@ static const struct pci_device_id serial_pci_tbl[] = { 0xA000, 0x3002, 0, 0, pbn_NETMOS9900_2s_115200 }, + { PCIE_VENDOR_ID_ASIX, PCIE_DEVICE_ID_AX99100, + 0xA000, 0x1000, + 0, 0, pbn_b0_1_115200 }, + /* * Best Connectivity and Rosewill PCI Multi I/O cards */ diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index cc94af2d578a64..328711b5df1a89 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -488,7 +489,7 @@ serial_port_out_sync(struct uart_port *p, int offset, int value) /* * FIFO support. */ -static void serial8250_clear_fifos(struct uart_8250_port *p) +void serial8250_clear_fifos(struct uart_8250_port *p) { if (p->capabilities & UART_CAP_FIFO) { serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO); @@ -497,6 +498,7 @@ static void serial8250_clear_fifos(struct uart_8250_port *p) serial_out(p, UART_FCR, 0); } } +EXPORT_SYMBOL_NS_GPL(serial8250_clear_fifos, "SERIAL_8250"); static enum hrtimer_restart serial8250_em485_handle_start_tx(struct hrtimer *t); static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t); @@ -1782,20 +1784,16 @@ static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir) } /* - * This handles the interrupt from one port. + * Context: port's lock must be held by the caller. */ -int serial8250_handle_irq(struct uart_port *port, unsigned int iir) +void serial8250_handle_irq_locked(struct uart_port *port, unsigned int iir) { struct uart_8250_port *up = up_to_u8250p(port); struct tty_port *tport = &port->state->port; bool skip_rx = false; - unsigned long flags; u16 status; - if (iir & UART_IIR_NO_INT) - return 0; - - uart_port_lock_irqsave(port, &flags); + lockdep_assert_held_once(&port->lock); status = serial_lsr_in(up); @@ -1828,8 +1826,19 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) else if (!up->dma->tx_running) __stop_tx(up); } +} +EXPORT_SYMBOL_NS_GPL(serial8250_handle_irq_locked, "SERIAL_8250"); + +/* + * This handles the interrupt from one port. + */ +int serial8250_handle_irq(struct uart_port *port, unsigned int iir) +{ + if (iir & UART_IIR_NO_INT) + return 0; - uart_unlock_and_check_sysrq_irqrestore(port, flags); + guard(uart_port_lock_irqsave)(port); + serial8250_handle_irq_locked(port, iir); return 1; } @@ -2147,8 +2156,7 @@ static void serial8250_THRE_test(struct uart_port *port) if (up->port.flags & UPF_NO_THRE_TEST) return; - if (port->irqflags & IRQF_SHARED) - disable_irq_nosync(port->irq); + disable_irq(port->irq); /* * Test for UARTs that do not reassert THRE when the transmitter is idle and the interrupt @@ -2170,8 +2178,7 @@ static void serial8250_THRE_test(struct uart_port *port) serial_port_out(port, UART_IER, 0); } - if (port->irqflags & IRQF_SHARED) - enable_irq(port->irq); + enable_irq(port->irq); /* * If the interrupt is not reasserted, or we otherwise don't trust the iir, setup a timer to @@ -2350,6 +2357,7 @@ static int serial8250_startup(struct uart_port *port) void serial8250_do_shutdown(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); + u32 lcr; serial8250_rpm_get(up); /* @@ -2376,13 +2384,13 @@ void serial8250_do_shutdown(struct uart_port *port) port->mctrl &= ~TIOCM_OUT2; serial8250_set_mctrl(port, port->mctrl); + + /* Disable break condition */ + lcr = serial_port_in(port, UART_LCR); + lcr &= ~UART_LCR_SBC; + serial_port_out(port, UART_LCR, lcr); } - /* - * Disable break condition and FIFOs - */ - serial_port_out(port, UART_LCR, - serial_port_in(port, UART_LCR) & ~UART_LCR_SBC); serial8250_clear_fifos(up); rsa_disable(up); @@ -2392,6 +2400,12 @@ void serial8250_do_shutdown(struct uart_port *port) * the IRQ chain. */ serial_port_in(port, UART_RX); + /* + * LCR writes on DW UART can trigger late (unmaskable) IRQs. + * Handle them before releasing the handler. + */ + synchronize_irq(port->irq); + serial8250_rpm_put(up); up->ops->release_irq(up); @@ -3185,6 +3199,17 @@ void serial8250_set_defaults(struct uart_8250_port *up) } EXPORT_SYMBOL_GPL(serial8250_set_defaults); +void serial8250_fifo_wait_for_lsr_thre(struct uart_8250_port *up, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) { + if (wait_for_lsr(up, UART_LSR_THRE)) + return; + } +} +EXPORT_SYMBOL_NS_GPL(serial8250_fifo_wait_for_lsr_thre, "SERIAL_8250"); + #ifdef CONFIG_SERIAL_8250_CONSOLE static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) @@ -3226,16 +3251,6 @@ static void serial8250_console_restore(struct uart_8250_port *up) serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); } -static void fifo_wait_for_lsr(struct uart_8250_port *up, unsigned int count) -{ - unsigned int i; - - for (i = 0; i < count; i++) { - if (wait_for_lsr(up, UART_LSR_THRE)) - return; - } -} - /* * Print a string to the serial port using the device FIFO * @@ -3254,7 +3269,7 @@ static void serial8250_console_fifo_write(struct uart_8250_port *up, while (s != end) { /* Allow timeout for each byte of a possibly full FIFO */ - fifo_wait_for_lsr(up, fifosize); + serial8250_fifo_wait_for_lsr_thre(up, fifosize); for (i = 0; i < fifosize && s != end; ++i) { if (*s == '\n' && !cr_sent) { @@ -3272,7 +3287,7 @@ static void serial8250_console_fifo_write(struct uart_8250_port *up, * Allow timeout for each byte written since the caller will only wait * for UART_LSR_BOTH_EMPTY using the timeout of a single character */ - fifo_wait_for_lsr(up, tx_count); + serial8250_fifo_wait_for_lsr_thre(up, tx_count); } /* diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 487756947a9629..89cebdd278410a 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -643,7 +643,10 @@ static unsigned int uart_write_room(struct tty_struct *tty) unsigned int ret; port = uart_port_ref_lock(state, &flags); - ret = kfifo_avail(&state->port.xmit_fifo); + if (!state->port.xmit_buf) + ret = 0; + else + ret = kfifo_avail(&state->port.xmit_fifo); uart_port_unlock_deref(port, flags); return ret; } diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c index 39c1fd1ff9cedd..6240c3d4dfd798 100644 --- a/drivers/tty/serial/uartlite.c +++ b/drivers/tty/serial/uartlite.c @@ -878,6 +878,7 @@ static int ulite_probe(struct platform_device *pdev) pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_set_autosuspend_delay(&pdev->dev, UART_AUTOSUSPEND_TIMEOUT); pm_runtime_set_active(&pdev->dev); + pm_runtime_get_noresume(&pdev->dev); pm_runtime_enable(&pdev->dev); ret = ulite_assign(&pdev->dev, id, res->start, irq, pdata); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index c1f152d8b03b6a..e2df99e3d45809 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1339,6 +1339,8 @@ struct vc_data *vc_deallocate(unsigned int currcons) kfree(vc->vc_saved_screen); vc->vc_saved_screen = NULL; } + vc_uniscr_free(vc->vc_saved_uni_lines); + vc->vc_saved_uni_lines = NULL; } return vc; } @@ -1884,6 +1886,8 @@ static void enter_alt_screen(struct vc_data *vc) vc->vc_saved_screen = kmemdup((u16 *)vc->vc_origin, size, GFP_KERNEL); if (vc->vc_saved_screen == NULL) return; + vc->vc_saved_uni_lines = vc->vc_uni_lines; + vc->vc_uni_lines = NULL; vc->vc_saved_rows = vc->vc_rows; vc->vc_saved_cols = vc->vc_cols; save_cur(vc); @@ -1905,6 +1909,8 @@ static void leave_alt_screen(struct vc_data *vc) dest = ((u16 *)vc->vc_origin) + r * vc->vc_cols; memcpy(dest, src, 2 * cols); } + vc_uniscr_set(vc, vc->vc_saved_uni_lines); + vc->vc_saved_uni_lines = NULL; restore_cur(vc); /* Update the entire screen */ if (con_should_update(vc)) @@ -2227,6 +2233,8 @@ static void reset_terminal(struct vc_data *vc, int do_clear) if (vc->vc_saved_screen != NULL) { kfree(vc->vc_saved_screen); vc->vc_saved_screen = NULL; + vc_uniscr_free(vc->vc_saved_uni_lines); + vc->vc_saved_uni_lines = NULL; vc->vc_saved_rows = 0; vc->vc_saved_cols = 0; } diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 335692d41617a2..fbca7ce1c6bf0f 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2912,10 +2912,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); * @data: the token identifying the buffer. * @gfp: how to do memory allocations (if necessary). * - * Same as virtqueue_add_inbuf but passes DMA_ATTR_CPU_CACHE_CLEAN to indicate - * that the CPU will not dirty any cacheline overlapping this buffer while it - * is available, and to suppress overlapping cacheline warnings in DMA debug - * builds. + * Same as virtqueue_add_inbuf but passes DMA_ATTR_DEBUGGING_IGNORE_CACHELINES + * to indicate that the CPU will not dirty any cacheline overlapping this buffer + * while it is available, and to suppress overlapping cacheline warnings in DMA + * debug builds. * * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). @@ -2928,7 +2928,7 @@ int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq, gfp_t gfp) { return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp, - DMA_ATTR_CPU_CACHE_CLEAN); + DMA_ATTR_DEBUGGING_IGNORE_CACHELINES); } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_cache_clean); diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 1759cc18753ff8..bbf9ee21306cde 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -30,7 +31,10 @@ #include #include #include +#include +#include #include +#include #include #include @@ -46,6 +50,7 @@ #include #include #include +#include #ifdef CONFIG_XEN_ACPI #include #endif @@ -68,10 +73,20 @@ module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint, MODULE_PARM_DESC(dm_op_buf_max_size, "Maximum size of a dm_op hypercall buffer"); +static bool unrestricted; +module_param(unrestricted, bool, 0); +MODULE_PARM_DESC(unrestricted, + "Don't restrict hypercalls to target domain if running in a domU"); + struct privcmd_data { domid_t domid; }; +/* DOMID_INVALID implies no restriction */ +static domid_t target_domain = DOMID_INVALID; +static bool restrict_wait; +static DECLARE_WAIT_QUEUE_HEAD(restrict_wait_wq); + static int privcmd_vma_range_is_mapped( struct vm_area_struct *vma, unsigned long addr, @@ -1563,13 +1578,16 @@ static long privcmd_ioctl(struct file *file, static int privcmd_open(struct inode *ino, struct file *file) { - struct privcmd_data *data = kzalloc_obj(*data); + struct privcmd_data *data; + + if (wait_event_interruptible(restrict_wait_wq, !restrict_wait) < 0) + return -EINTR; + data = kzalloc_obj(*data); if (!data) return -ENOMEM; - /* DOMID_INVALID implies no restriction */ - data->domid = DOMID_INVALID; + data->domid = target_domain; file->private_data = data; return 0; @@ -1662,6 +1680,52 @@ static struct miscdevice privcmd_dev = { .fops = &xen_privcmd_fops, }; +static int init_restrict(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + char *target; + unsigned int domid; + + /* Default to an guaranteed unused domain-id. */ + target_domain = DOMID_IDLE; + + target = xenbus_read(XBT_NIL, "target", "", NULL); + if (IS_ERR(target) || kstrtouint(target, 10, &domid)) { + pr_err("No target domain found, blocking all hypercalls\n"); + goto out; + } + + target_domain = domid; + + out: + if (!IS_ERR(target)) + kfree(target); + + restrict_wait = false; + wake_up_all(&restrict_wait_wq); + + return NOTIFY_DONE; +} + +static struct notifier_block xenstore_notifier = { + .notifier_call = init_restrict, +}; + +static void __init restrict_driver(void) +{ + if (unrestricted) { + if (security_locked_down(LOCKDOWN_XEN_USER_ACTIONS)) + pr_warn("Kernel is locked down, parameter \"unrestricted\" ignored\n"); + else + return; + } + + restrict_wait = true; + + register_xenstore_notifier(&xenstore_notifier); +} + static int __init privcmd_init(void) { int err; @@ -1669,6 +1733,9 @@ static int __init privcmd_init(void) if (!xen_domain()) return -ENODEV; + if (!xen_initial_domain()) + restrict_driver(); + err = misc_register(&privcmd_dev); if (err != 0) { pr_err("Could not register Xen privcmd device\n"); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 95b1d085213542..95b65aab7daa97 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -595,6 +595,12 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, #ifdef ELF_HWCAP2 nitems++; #endif +#ifdef ELF_HWCAP3 + nitems++; +#endif +#ifdef ELF_HWCAP4 + nitems++; +#endif csp = sp; sp -= nitems * 2 * sizeof(unsigned long); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index af98421e79227d..0428557fd77bff 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1393,6 +1393,13 @@ static int find_parent_nodes(struct btrfs_backref_walk_ctx *ctx, .indirect_missing_keys = PREFTREE_INIT }; + if (unlikely(!root)) { + btrfs_err(ctx->fs_info, + "missing extent root for extent at bytenr %llu", + ctx->bytenr); + return -EUCLEAN; + } + /* Roots ulist is not needed when using a sharedness check context. */ if (sc) ASSERT(ctx->roots == NULL); @@ -2204,6 +2211,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, struct btrfs_extent_item *ei; struct btrfs_key key; + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + logical); + return -EUCLEAN; + } + key.objectid = logical; if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) key.type = BTRFS_METADATA_ITEM_KEY; @@ -2851,6 +2865,13 @@ int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr) struct btrfs_key key; int ret; + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; key.type = BTRFS_METADATA_ITEM_KEY; key.offset = (u64)-1; @@ -2987,6 +3008,13 @@ int btrfs_backref_iter_next(struct btrfs_backref_iter *iter) /* We're at keyed items, there is no inline item, go to the next one */ extent_root = btrfs_extent_root(iter->fs_info, iter->bytenr); + if (unlikely(!extent_root)) { + btrfs_err(iter->fs_info, + "missing extent root for extent at bytenr %llu", + iter->bytenr); + return -EUCLEAN; + } + ret = btrfs_next_item(extent_root, iter->path); if (ret) return ret; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 2a886bece8105a..ebf5079096af98 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -739,6 +739,12 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl) last = max_t(u64, block_group->start, BTRFS_SUPER_INFO_OFFSET); extent_root = btrfs_extent_root(fs_info, last); + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for block group at offset %llu", + block_group->start); + return -EUCLEAN; + } #ifdef CONFIG_BTRFS_DEBUG /* @@ -1061,6 +1067,11 @@ static int remove_block_group_item(struct btrfs_trans_handle *trans, int ret; root = btrfs_block_group_root(fs_info); + if (unlikely(!root)) { + btrfs_err(fs_info, "missing block group root"); + return -EUCLEAN; + } + key.objectid = block_group->start; key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; key.offset = block_group->length; @@ -1349,6 +1360,11 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_chunk_map *map; unsigned int num_items; + if (unlikely(!root)) { + btrfs_err(fs_info, "missing block group root"); + return ERR_PTR(-EUCLEAN); + } + map = btrfs_find_chunk_map(fs_info, chunk_offset, 1); ASSERT(map != NULL); ASSERT(map->start == chunk_offset); @@ -2140,6 +2156,11 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info, int ret; struct btrfs_key found_key; + if (unlikely(!root)) { + btrfs_err(fs_info, "missing block group root"); + return -EUCLEAN; + } + btrfs_for_each_slot(root, key, &found_key, path, ret) { if (found_key.objectid >= key->objectid && found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { @@ -2713,6 +2734,11 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans, size_t size; int ret; + if (unlikely(!root)) { + btrfs_err(fs_info, "missing block group root"); + return -EUCLEAN; + } + spin_lock(&block_group->lock); btrfs_set_stack_block_group_v2_used(&bgi, block_group->used); btrfs_set_stack_block_group_v2_chunk_objectid(&bgi, block_group->global_root_id); @@ -3048,6 +3074,11 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, int ret; bool dirty_bg_running; + if (unlikely(!root)) { + btrfs_err(fs_info, "missing block group root"); + return -EUCLEAN; + } + /* * This can only happen when we are doing read-only scrub on read-only * mount. @@ -3192,6 +3223,11 @@ static int update_block_group_item(struct btrfs_trans_handle *trans, u64 used, remap_bytes; u32 identity_remap_count; + if (unlikely(!root)) { + btrfs_err(fs_info, "missing block group root"); + return -EUCLEAN; + } + /* * Block group items update can be triggered out of commit transaction * critical section, thus we need a consistent view of used bytes. diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 790518a8c80334..85199944c1ebe5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -320,10 +320,16 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered, ASSERT(IS_ALIGNED(ordered->file_offset, fs_info->sectorsize)); ASSERT(IS_ALIGNED(ordered->num_bytes, fs_info->sectorsize)); - ASSERT(cb->writeback); + /* + * This flag determines if we should clear the writeback flag from the + * page cache. But this function is only utilized by encoded writes, it + * never goes through the page cache. + */ + ASSERT(!cb->writeback); cb->start = ordered->file_offset; cb->len = ordered->num_bytes; + ASSERT(cb->bbio.bio.bi_iter.bi_size == ordered->disk_num_bytes); cb->compressed_len = ordered->disk_num_bytes; cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT; cb->bbio.ordered = ordered; @@ -345,8 +351,7 @@ struct compressed_bio *btrfs_alloc_compressed_write(struct btrfs_inode *inode, cb = alloc_compressed_bio(inode, start, REQ_OP_WRITE, end_bbio_compressed_write); cb->start = start; cb->len = len; - cb->writeback = true; - + cb->writeback = false; return cb; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 407830d86d0dac..01f2dbb698326d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1591,7 +1591,7 @@ static int find_newest_super_backup(struct btrfs_fs_info *info) * this will bump the backup pointer by one when it is * done */ -static void backup_super_roots(struct btrfs_fs_info *info) +static int backup_super_roots(struct btrfs_fs_info *info) { const int next_backup = info->backup_root_index; struct btrfs_root_backup *root_backup; @@ -1623,6 +1623,15 @@ static void backup_super_roots(struct btrfs_fs_info *info) struct btrfs_root *extent_root = btrfs_extent_root(info, 0); struct btrfs_root *csum_root = btrfs_csum_root(info, 0); + if (unlikely(!extent_root)) { + btrfs_err(info, "missing extent root for extent at bytenr 0"); + return -EUCLEAN; + } + if (unlikely(!csum_root)) { + btrfs_err(info, "missing csum root for extent at bytenr 0"); + return -EUCLEAN; + } + btrfs_set_backup_extent_root(root_backup, extent_root->node->start); btrfs_set_backup_extent_root_gen(root_backup, @@ -1670,6 +1679,8 @@ static void backup_super_roots(struct btrfs_fs_info *info) memcpy(&info->super_copy->super_roots, &info->super_for_commit->super_roots, sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS); + + return 0; } /* @@ -4051,8 +4062,11 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) * not from fsync where the tree roots in fs_info have not * been consistent on disk. */ - if (max_mirrors == 0) - backup_super_roots(fs_info); + if (max_mirrors == 0) { + ret = backup_super_roots(fs_info); + if (ret < 0) + return ret; + } sb = fs_info->super_for_commit; dev_item = &sb->dev_item; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b0d9baf5b41288..85ee5c79759d8f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -75,6 +75,12 @@ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len) struct btrfs_key key; BTRFS_PATH_AUTO_FREE(path); + if (unlikely(!root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", start); + return -EUCLEAN; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -131,6 +137,12 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, key.offset = offset; extent_root = btrfs_extent_root(fs_info, bytenr); + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) return ret; @@ -436,6 +448,12 @@ static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, int recow; int ret; + if (unlikely(!root)) { + btrfs_err(trans->fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; if (parent) { key.type = BTRFS_SHARED_DATA_REF_KEY; @@ -510,6 +528,12 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, u32 num_refs; int ret; + if (unlikely(!root)) { + btrfs_err(trans->fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; if (node->parent) { key.type = BTRFS_SHARED_DATA_REF_KEY; @@ -668,6 +692,12 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, struct btrfs_key key; int ret; + if (unlikely(!root)) { + btrfs_err(trans->fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; if (parent) { key.type = BTRFS_SHARED_BLOCK_REF_KEY; @@ -692,6 +722,12 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, struct btrfs_key key; int ret; + if (unlikely(!root)) { + btrfs_err(trans->fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; if (node->parent) { key.type = BTRFS_SHARED_BLOCK_REF_KEY; @@ -782,6 +818,12 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); int needed; + if (unlikely(!root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = num_bytes; @@ -1680,6 +1722,12 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, } root = btrfs_extent_root(fs_info, key.objectid); + if (unlikely(!root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + key.objectid); + return -EUCLEAN; + } again: ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret < 0) { @@ -1926,8 +1974,15 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, struct btrfs_root *csum_root; csum_root = btrfs_csum_root(fs_info, head->bytenr); - ret = btrfs_del_csums(trans, csum_root, head->bytenr, - head->num_bytes); + if (unlikely(!csum_root)) { + btrfs_err(fs_info, + "missing csum root for extent at bytenr %llu", + head->bytenr); + ret = -EUCLEAN; + } else { + ret = btrfs_del_csums(trans, csum_root, head->bytenr, + head->num_bytes); + } } } @@ -2379,6 +2434,12 @@ static noinline int check_committed_ref(struct btrfs_inode *inode, int type; int ret; + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = (u64)-1; @@ -3093,6 +3154,15 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans, struct btrfs_root *csum_root; csum_root = btrfs_csum_root(trans->fs_info, bytenr); + if (unlikely(!csum_root)) { + ret = -EUCLEAN; + btrfs_abort_transaction(trans, ret); + btrfs_err(trans->fs_info, + "missing csum root for extent at bytenr %llu", + bytenr); + return ret; + } + ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); @@ -3222,7 +3292,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, u64 delayed_ref_root = href->owning_root; extent_root = btrfs_extent_root(info, bytenr); - ASSERT(extent_root); + if (unlikely(!extent_root)) { + btrfs_err(info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } path = btrfs_alloc_path(); if (!path) @@ -4939,11 +5013,18 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY); size += btrfs_extent_inline_ref_size(type); + extent_root = btrfs_extent_root(fs_info, ins->objectid); + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + ins->objectid); + return -EUCLEAN; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; - extent_root = btrfs_extent_root(fs_info, ins->objectid); ret = btrfs_insert_empty_item(trans, extent_root, path, ins, size); if (ret) { btrfs_free_path(path); @@ -5019,11 +5100,18 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, size += sizeof(*block_info); } + extent_root = btrfs_extent_root(fs_info, extent_key.objectid); + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + extent_key.objectid); + return -EUCLEAN; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; - extent_root = btrfs_extent_root(fs_info, extent_key.objectid); ret = btrfs_insert_empty_item(trans, extent_root, path, &extent_key, size); if (ret) { diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 927324a2175aab..ed8ecf44fbd0d7 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -308,6 +308,13 @@ static int search_csum_tree(struct btrfs_fs_info *fs_info, /* Current item doesn't contain the desired range, search again */ btrfs_release_path(path); csum_root = btrfs_csum_root(fs_info, disk_bytenr); + if (unlikely(!csum_root)) { + btrfs_err(fs_info, + "missing csum root for extent at bytenr %llu", + disk_bytenr); + return -EUCLEAN; + } + item = btrfs_lookup_csum(NULL, csum_root, path, disk_bytenr, 0); if (IS_ERR(item)) { ret = PTR_ERR(item); diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index ecddfca92b2b53..9efd1ec90f031f 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1073,6 +1073,14 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, if (ret) return ret; + extent_root = btrfs_extent_root(trans->fs_info, block_group->start); + if (unlikely(!extent_root)) { + btrfs_err(trans->fs_info, + "missing extent root for block group at offset %llu", + block_group->start); + return -EUCLEAN; + } + mutex_lock(&block_group->free_space_lock); /* @@ -1086,7 +1094,6 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = 0; - extent_root = btrfs_extent_root(trans->fs_info, key.objectid); ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); if (ret < 0) goto out_locked; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a6da98435ef7cb..f643a05208720d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2012,6 +2012,13 @@ static int can_nocow_file_extent(struct btrfs_path *path, */ csum_root = btrfs_csum_root(root->fs_info, io_start); + if (unlikely(!csum_root)) { + btrfs_err(root->fs_info, + "missing csum root for extent at bytenr %llu", io_start); + ret = -EUCLEAN; + goto out; + } + ret = btrfs_lookup_csums_list(csum_root, io_start, io_start + args->file_extent.num_bytes - 1, NULL, nowait); @@ -2749,10 +2756,17 @@ static int add_pending_csums(struct btrfs_trans_handle *trans, int ret; list_for_each_entry(sum, list, list) { - trans->adding_csums = true; - if (!csum_root) + if (!csum_root) { csum_root = btrfs_csum_root(trans->fs_info, sum->logical); + if (unlikely(!csum_root)) { + btrfs_err(trans->fs_info, + "missing csum root for extent at bytenr %llu", + sum->logical); + return -EUCLEAN; + } + } + trans->adding_csums = true; ret = btrfs_csum_file_blocks(trans, csum_root, sum); trans->adding_csums = false; if (ret) @@ -9874,6 +9888,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, int compression; size_t orig_count; const u32 min_folio_size = btrfs_min_folio_size(fs_info); + const u32 blocksize = fs_info->sectorsize; u64 start, end; u64 num_bytes, ram_bytes, disk_num_bytes; struct btrfs_key ins; @@ -9984,9 +9999,9 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, ret = -EFAULT; goto out_cb; } - if (bytes < min_folio_size) - folio_zero_range(folio, bytes, min_folio_size - bytes); - ret = bio_add_folio(&cb->bbio.bio, folio, folio_size(folio), 0); + if (!IS_ALIGNED(bytes, blocksize)) + folio_zero_range(folio, bytes, round_up(bytes, blocksize) - bytes); + ret = bio_add_folio(&cb->bbio.bio, folio, round_up(bytes, blocksize), 0); if (unlikely(!ret)) { folio_put(folio); ret = -EINVAL; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b805dd9227efa2..d75d31b606e499 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3617,7 +3617,8 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) } } - trans = btrfs_join_transaction(root); + /* 2 BTRFS_QGROUP_RELATION_KEY items. */ + trans = btrfs_start_transaction(root, 2); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; @@ -3689,7 +3690,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) goto out; } - trans = btrfs_join_transaction(root); + /* + * 1 BTRFS_QGROUP_INFO_KEY item. + * 1 BTRFS_QGROUP_LIMIT_KEY item. + */ + trans = btrfs_start_transaction(root, 2); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; @@ -3738,7 +3743,8 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) goto drop_write; } - trans = btrfs_join_transaction(root); + /* 1 BTRFS_QGROUP_LIMIT_KEY item. */ + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 049a940ba4499f..79642e02181b93 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -429,7 +429,7 @@ static void copy_compressed_segment(struct compressed_bio *cb, int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) { struct workspace *workspace = list_entry(ws, struct workspace, list); - const struct btrfs_fs_info *fs_info = cb->bbio.inode->root->fs_info; + struct btrfs_fs_info *fs_info = cb->bbio.inode->root->fs_info; const u32 sectorsize = fs_info->sectorsize; struct folio_iter fi; char *kaddr; @@ -447,7 +447,7 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) /* There must be a compressed folio and matches the sectorsize. */ if (unlikely(!fi.folio)) return -EINVAL; - ASSERT(folio_size(fi.folio) == sectorsize); + ASSERT(folio_size(fi.folio) == btrfs_min_folio_size(fs_info)); kaddr = kmap_local_folio(fi.folio, 0); len_in = read_compress_length(kaddr); kunmap_local(kaddr); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 3b2a6517d0b537..41589ce663718a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3739,6 +3739,14 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, mutex_lock(&fs_info->qgroup_rescan_lock); extent_root = btrfs_extent_root(fs_info, fs_info->qgroup_rescan_progress.objectid); + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + fs_info->qgroup_rescan_progress.objectid); + mutex_unlock(&fs_info->qgroup_rescan_lock); + return -EUCLEAN; + } + ret = btrfs_search_slot_for_read(extent_root, &fs_info->qgroup_rescan_progress, path, 1, 0); diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index b4511f560e9292..02105d68accb94 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -2297,8 +2297,7 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, static void fill_data_csums(struct btrfs_raid_bio *rbio) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; - struct btrfs_root *csum_root = btrfs_csum_root(fs_info, - rbio->bioc->full_stripe_logical); + struct btrfs_root *csum_root; const u64 start = rbio->bioc->full_stripe_logical; const u32 len = (rbio->nr_data * rbio->stripe_nsectors) << fs_info->sectorsize_bits; @@ -2331,6 +2330,15 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio) goto error; } + csum_root = btrfs_csum_root(fs_info, rbio->bioc->full_stripe_logical); + if (unlikely(!csum_root)) { + btrfs_err(fs_info, + "missing csum root for extent at bytenr %llu", + rbio->bioc->full_stripe_logical); + ret = -EUCLEAN; + goto error; + } + ret = btrfs_lookup_csums_bitmap(csum_root, NULL, start, start + len - 1, rbio->csum_buf, rbio->csum_bitmap); if (ret < 0) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b2343aed7a5d0d..033f74fd6225cd 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4185,6 +4185,8 @@ static int move_existing_remap(struct btrfs_fs_info *fs_info, dest_addr = ins.objectid; dest_length = ins.offset; + dest_bg = btrfs_lookup_block_group(fs_info, dest_addr); + if (!is_data && !IS_ALIGNED(dest_length, fs_info->nodesize)) { u64 new_length = ALIGN_DOWN(dest_length, fs_info->nodesize); @@ -4295,15 +4297,12 @@ static int move_existing_remap(struct btrfs_fs_info *fs_info, if (unlikely(ret)) goto end; - dest_bg = btrfs_lookup_block_group(fs_info, dest_addr); - adjust_block_group_remap_bytes(trans, dest_bg, dest_length); mutex_lock(&dest_bg->free_space_lock); bg_needs_free_space = test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &dest_bg->runtime_flags); mutex_unlock(&dest_bg->free_space_lock); - btrfs_put_block_group(dest_bg); if (bg_needs_free_space) { ret = btrfs_add_block_group_free_space(trans, dest_bg); @@ -4333,13 +4332,13 @@ static int move_existing_remap(struct btrfs_fs_info *fs_info, btrfs_end_transaction(trans); } } else { - dest_bg = btrfs_lookup_block_group(fs_info, dest_addr); btrfs_free_reserved_bytes(dest_bg, dest_length, 0); - btrfs_put_block_group(dest_bg); ret = btrfs_commit_transaction(trans); } + btrfs_put_block_group(dest_bg); + return ret; } @@ -4954,6 +4953,12 @@ static int do_remap_reloc_trans(struct btrfs_fs_info *fs_info, struct btrfs_space_info *sinfo = src_bg->space_info; extent_root = btrfs_extent_root(fs_info, src_bg->start); + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for block group at offset %llu", + src_bg->start); + return -EUCLEAN; + } trans = btrfs_start_transaction(extent_root, 0); if (IS_ERR(trans)) @@ -5306,6 +5311,13 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start, int ret; bool bg_is_ro = false; + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for block group at offset %llu", + group_start); + return -EUCLEAN; + } + /* * This only gets set if we had a half-deleted snapshot on mount. We * cannot allow relocation to start while we're still trying to clean up @@ -5536,12 +5548,17 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info) goto out; } + rc->extent_root = btrfs_extent_root(fs_info, 0); + if (unlikely(!rc->extent_root)) { + btrfs_err(fs_info, "missing extent root for extent at bytenr 0"); + ret = -EUCLEAN; + goto out; + } + ret = reloc_chunk_start(fs_info); if (ret < 0) goto out_end; - rc->extent_root = btrfs_extent_root(fs_info, 0); - set_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root); @@ -5636,6 +5653,14 @@ int btrfs_reloc_clone_csums(struct btrfs_ordered_extent *ordered) LIST_HEAD(list); int ret; + if (unlikely(!csum_root)) { + btrfs_mark_ordered_extent_error(ordered); + btrfs_err(fs_info, + "missing csum root for extent at bytenr %llu", + disk_bytenr); + return -EUCLEAN; + } + ret = btrfs_lookup_csums_list(csum_root, disk_bytenr, disk_bytenr + ordered->num_bytes - 1, &list, false); diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 516ef62c8f43de..b4e114efff4562 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1288,6 +1288,23 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key, btrfs_root_drop_level(&ri), BTRFS_MAX_LEVEL - 1); return -EUCLEAN; } + /* + * If drop_progress.objectid is non-zero, a btrfs_drop_snapshot() was + * interrupted and the resume point was recorded in drop_progress and + * drop_level. In that case drop_level must be >= 1: level 0 is the + * leaf level and drop_snapshot never saves a checkpoint there (it + * only records checkpoints at internal node levels in DROP_REFERENCE + * stage). A zero drop_level combined with a non-zero drop_progress + * objectid indicates on-disk corruption and would cause a BUG_ON in + * merge_reloc_root() and btrfs_drop_snapshot() at mount time. + */ + if (unlikely(btrfs_disk_key_objectid(&ri.drop_progress) != 0 && + btrfs_root_drop_level(&ri) == 0)) { + generic_err(leaf, slot, + "invalid root drop_level 0 with non-zero drop_progress objectid %llu", + btrfs_disk_key_objectid(&ri.drop_progress)); + return -EUCLEAN; + } /* Flags check */ if (unlikely(btrfs_root_flags(&ri) & ~valid_root_flags)) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 552fef3c385a5f..ab0d460c713950 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -984,6 +984,13 @@ static noinline int replay_one_extent(struct walk_control *wc) sums = list_first_entry(&ordered_sums, struct btrfs_ordered_sum, list); csum_root = btrfs_csum_root(fs_info, sums->logical); + if (unlikely(!csum_root)) { + btrfs_err(fs_info, + "missing csum root for extent at bytenr %llu", + sums->logical); + ret = -EUCLEAN; + } + if (!ret) { ret = btrfs_del_csums(trans, csum_root, sums->logical, sums->len); @@ -4890,6 +4897,13 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, } csum_root = btrfs_csum_root(trans->fs_info, disk_bytenr); + if (unlikely(!csum_root)) { + btrfs_err(trans->fs_info, + "missing csum root for extent at bytenr %llu", + disk_bytenr); + return -EUCLEAN; + } + disk_bytenr += extent_offset; ret = btrfs_lookup_csums_list(csum_root, disk_bytenr, disk_bytenr + extent_num_bytes - 1, @@ -5086,6 +5100,13 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, /* block start is already adjusted for the file extent offset. */ block_start = btrfs_extent_map_block_start(em); csum_root = btrfs_csum_root(trans->fs_info, block_start); + if (unlikely(!csum_root)) { + btrfs_err(trans->fs_info, + "missing csum root for extent at bytenr %llu", + block_start); + return -EUCLEAN; + } + ret = btrfs_lookup_csums_list(csum_root, block_start + csum_offset, block_start + csum_offset + csum_len - 1, &ordered_sums, false); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index be8975ef8b249d..ab51e6ecfdefa5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4277,20 +4277,29 @@ static int balance_remap_chunks(struct btrfs_fs_info *fs_info, struct btrfs_path end: while (!list_empty(chunks)) { bool is_unused; + struct btrfs_block_group *bg; rci = list_first_entry(chunks, struct remap_chunk_info, list); - spin_lock(&rci->bg->lock); - is_unused = !btrfs_is_block_group_used(rci->bg); - spin_unlock(&rci->bg->lock); + bg = rci->bg; + if (bg) { + /* + * This is a bit racy and the 'used' status can change + * but this is not a problem as later functions will + * verify it again. + */ + spin_lock(&bg->lock); + is_unused = !btrfs_is_block_group_used(bg); + spin_unlock(&bg->lock); - if (is_unused) - btrfs_mark_bg_unused(rci->bg); + if (is_unused) + btrfs_mark_bg_unused(bg); - if (rci->made_ro) - btrfs_dec_block_group_ro(rci->bg); + if (rci->made_ro) + btrfs_dec_block_group_ro(bg); - btrfs_put_block_group(rci->bg); + btrfs_put_block_group(bg); + } list_del(&rci->list); kfree(rci); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 817ca4fb9efa9e..0cd7fd3fcfa3af 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1261,6 +1261,13 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache, key.offset = 0; root = btrfs_extent_root(fs_info, key.objectid); + if (unlikely(!root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + key.objectid); + return -EUCLEAN; + } + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); /* We should not find the exact match */ if (unlikely(!ret)) diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 6f076fa8f3f9ab..3e847b91dae3fb 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -600,7 +600,7 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) bio_first_folio(&fi, &cb->bbio.bio, 0); if (unlikely(!fi.folio)) return -EINVAL; - ASSERT(folio_size(fi.folio) == blocksize); + ASSERT(folio_size(fi.folio) == min_folio_size); stream = zstd_init_dstream( ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index a9f645f57bb23f..97c48ebe84584f 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -16,22 +16,36 @@ config EROFS_FS select ZLIB_INFLATE if EROFS_FS_ZIP_DEFLATE select ZSTD_DECOMPRESS if EROFS_FS_ZIP_ZSTD help - EROFS (Enhanced Read-Only File System) is a lightweight read-only - file system with modern designs (e.g. no buffer heads, inline - xattrs/data, chunk-based deduplication, multiple devices, etc.) for - scenarios which need high-performance read-only solutions, e.g. - smartphones with Android OS, LiveCDs and high-density hosts with - numerous containers; - - It also provides transparent compression and deduplication support to - improve storage density and maintain relatively high compression - ratios, and it implements in-place decompression to temporarily reuse - page cache for compressed data using proper strategies, which is - quite useful for ensuring guaranteed end-to-end runtime decompression + EROFS (Enhanced Read-Only File System) is a modern, lightweight, + secure read-only filesystem for various use cases, such as immutable + system images, container images, application sandboxes, and datasets. + + EROFS uses a flexible, hierarchical on-disk design so that features + can be enabled on demand: the core on-disk format is block-aligned in + order to perform optimally on all kinds of devices, including block + and memory-backed devices; the format is easy to parse and has zero + metadata redundancy, unlike generic filesystems, making it ideal for + filesystem auditing and remote access; inline data, random-access + friendly directory data, inline/shared extended attributes and + chunk-based deduplication ensure space efficiency while maintaining + high performance. + + Optionally, it supports multiple devices to reference external data, + enabling data sharing for container images. + + It also has advanced encoded on-disk layouts, particularly for data + compression and fine-grained deduplication. It utilizes fixed-size + output compression to improve storage density while keeping relatively + high compression ratios. Furthermore, it implements in-place + decompression to reuse file pages to keep compressed data temporarily + with proper strategies, which ensures guaranteed end-to-end runtime performance under extreme memory pressure without extra cost. - See the documentation at - and the web pages at for more details. + For more details, see the web pages at + and the documentation at . + + To compile EROFS filesystem support as a module, choose M here. The + module will be called erofs. If unsure, say N. @@ -105,7 +119,8 @@ config EROFS_FS_ZIP depends on EROFS_FS default y help - Enable transparent compression support for EROFS file systems. + Enable EROFS compression layouts so that filesystems containing + compressed files can be parsed by the kernel. If you don't want to enable compression feature, say N. diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index abe873f01297b8..98cdaa1cd1a7ca 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -25,10 +25,8 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) container_of(iocb, struct erofs_fileio_rq, iocb); struct folio_iter fi; - if (ret >= 0 && ret != rq->bio.bi_iter.bi_size) { - bio_advance(&rq->bio, ret); - zero_fill_bio(&rq->bio); - } + if (ret >= 0 && ret != rq->bio.bi_iter.bi_size) + ret = -EIO; if (!rq->bio.bi_end_io) { bio_for_each_folio_all(fi, &rq->bio) { DBG_BUGON(folio_test_uptodate(fi.folio)); diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c index 829d50d5c717df..ec433bacc59252 100644 --- a/fs/erofs/ishare.c +++ b/fs/erofs/ishare.c @@ -200,8 +200,19 @@ struct inode *erofs_real_inode(struct inode *inode, bool *need_iput) int __init erofs_init_ishare(void) { - erofs_ishare_mnt = kern_mount(&erofs_anon_fs_type); - return PTR_ERR_OR_ZERO(erofs_ishare_mnt); + struct vfsmount *mnt; + int ret; + + mnt = kern_mount(&erofs_anon_fs_type); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + /* generic_fadvise() doesn't work if s_bdi == &noop_backing_dev_info */ + ret = super_setup_bdi(mnt->mnt_sb); + if (ret) + kern_unmount(mnt); + else + erofs_ishare_mnt = mnt; + return ret; } void erofs_exit_ishare(void) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 3977e42b951686..fe8121df9ef2f2 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1445,6 +1445,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, int bios) { struct erofs_sb_info *const sbi = EROFS_SB(io->sb); + int gfp_flag; /* wake up the caller thread for sync decompression */ if (io->sync) { @@ -1477,7 +1478,9 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, sbi->sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON; return; } + gfp_flag = memalloc_noio_save(); z_erofs_decompressqueue_work(&io->u.work); + memalloc_noio_restore(gfp_flag); } static void z_erofs_fill_bio_vec(struct bio_vec *bvec, diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8fdbba7cad9644..8e8a76a44ff0b1 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -36,19 +36,30 @@ * second map contains a reference to the entry in the first map. */ +static struct workqueue_struct *nfsd_export_wq; + #define EXPKEY_HASHBITS 8 #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) -static void expkey_put(struct kref *ref) +static void expkey_release(struct work_struct *work) { - struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); + struct svc_expkey *key = container_of(to_rcu_work(work), + struct svc_expkey, ek_rwork); if (test_bit(CACHE_VALID, &key->h.flags) && !test_bit(CACHE_NEGATIVE, &key->h.flags)) path_put(&key->ek_path); auth_domain_put(key->ek_client); - kfree_rcu(key, ek_rcu); + kfree(key); +} + +static void expkey_put(struct kref *ref) +{ + struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); + + INIT_RCU_WORK(&key->ek_rwork, expkey_release); + queue_rcu_work(nfsd_export_wq, &key->ek_rwork); } static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) @@ -353,11 +364,13 @@ static void export_stats_destroy(struct export_stats *stats) EXP_STATS_COUNTERS_NUM); } -static void svc_export_release(struct rcu_head *rcu_head) +static void svc_export_release(struct work_struct *work) { - struct svc_export *exp = container_of(rcu_head, struct svc_export, - ex_rcu); + struct svc_export *exp = container_of(to_rcu_work(work), + struct svc_export, ex_rwork); + path_put(&exp->ex_path); + auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); export_stats_destroy(exp->ex_stats); kfree(exp->ex_stats); @@ -369,9 +382,8 @@ static void svc_export_put(struct kref *ref) { struct svc_export *exp = container_of(ref, struct svc_export, h.ref); - path_put(&exp->ex_path); - auth_domain_put(exp->ex_client); - call_rcu(&exp->ex_rcu, svc_export_release); + INIT_RCU_WORK(&exp->ex_rwork, svc_export_release); + queue_rcu_work(nfsd_export_wq, &exp->ex_rwork); } static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) @@ -1479,6 +1491,36 @@ const struct seq_operations nfs_exports_op = { .show = e_show, }; +/** + * nfsd_export_wq_init - allocate the export release workqueue + * + * Called once at module load. The workqueue runs deferred svc_export and + * svc_expkey release work scheduled by queue_rcu_work() in the cache put + * callbacks. + * + * Return values: + * %0: workqueue allocated + * %-ENOMEM: allocation failed + */ +int nfsd_export_wq_init(void) +{ + nfsd_export_wq = alloc_workqueue("nfsd_export", WQ_UNBOUND, 0); + if (!nfsd_export_wq) + return -ENOMEM; + return 0; +} + +/** + * nfsd_export_wq_shutdown - drain and free the export release workqueue + * + * Called once at module unload. Per-namespace teardown in + * nfsd_export_shutdown() has already drained all deferred work. + */ +void nfsd_export_wq_shutdown(void) +{ + destroy_workqueue(nfsd_export_wq); +} + /* * Initialize the exports module. */ @@ -1540,6 +1582,9 @@ nfsd_export_shutdown(struct net *net) cache_unregister_net(nn->svc_expkey_cache, net); cache_unregister_net(nn->svc_export_cache, net); + /* Drain deferred export and expkey release work. */ + rcu_barrier(); + flush_workqueue(nfsd_export_wq); cache_destroy_net(nn->svc_expkey_cache, net); cache_destroy_net(nn->svc_export_cache, net); svcauth_unix_purge(net); diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index d2b09cd761453d..b053993745749e 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -75,7 +76,7 @@ struct svc_export { u32 ex_layout_types; struct nfsd4_deviceid_map *ex_devid_map; struct cache_detail *cd; - struct rcu_head ex_rcu; + struct rcu_work ex_rwork; unsigned long ex_xprtsec_modes; struct export_stats *ex_stats; }; @@ -92,7 +93,7 @@ struct svc_expkey { u32 ek_fsid[6]; struct path ek_path; - struct rcu_head ek_rcu; + struct rcu_work ek_rwork; }; #define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) @@ -110,6 +111,8 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp, /* * Function declarations */ +int nfsd_export_wq_init(void); +void nfsd_export_wq_shutdown(void); int nfsd_export_init(struct net *); void nfsd_export_shutdown(struct net *); void nfsd_export_flush(struct net *); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 41dfba5ab8b81a..9d234913100b9b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -6281,9 +6281,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) int len = xdr->buf->len - (op_status_offset + XDR_UNIT); so->so_replay.rp_status = op->status; - so->so_replay.rp_buflen = len; - read_bytes_from_xdr_buf(xdr->buf, op_status_offset + XDR_UNIT, + if (len <= NFSD4_REPLAY_ISIZE) { + so->so_replay.rp_buflen = len; + read_bytes_from_xdr_buf(xdr->buf, + op_status_offset + XDR_UNIT, so->so_replay.rp_buf, len); + } else { + so->so_replay.rp_buflen = 0; + } } status: op->status = nfsd4_map_status(op->status, diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 4cc8a58fa56a0d..71aabdaa1d1594 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -149,9 +149,19 @@ static int exports_net_open(struct net *net, struct file *file) seq = file->private_data; seq->private = nn->svc_export_cache; + get_net(net); return 0; } +static int exports_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct cache_detail *cd = seq->private; + + put_net(cd->net); + return seq_release(inode, file); +} + static int exports_nfsd_open(struct inode *inode, struct file *file) { return exports_net_open(inode->i_sb->s_fs_info, file); @@ -161,7 +171,7 @@ static const struct file_operations exports_nfsd_operations = { .open = exports_nfsd_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = exports_release, }; static int export_features_show(struct seq_file *m, void *v) @@ -1376,7 +1386,7 @@ static const struct proc_ops exports_proc_ops = { .proc_open = exports_proc_open, .proc_read = seq_read, .proc_lseek = seq_lseek, - .proc_release = seq_release, + .proc_release = exports_release, }; static int create_proc_exports_entry(void) @@ -2259,9 +2269,12 @@ static int __init init_nfsd(void) if (retval) goto out_free_pnfs; nfsd_lockd_init(); /* lockd->nfsd callbacks */ + retval = nfsd_export_wq_init(); + if (retval) + goto out_free_lockd; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) - goto out_free_lockd; + goto out_free_export_wq; retval = register_cld_notifier(); if (retval) goto out_free_subsys; @@ -2290,6 +2303,8 @@ static int __init init_nfsd(void) unregister_cld_notifier(); out_free_subsys: unregister_pernet_subsys(&nfsd_net_ops); +out_free_export_wq: + nfsd_export_wq_shutdown(); out_free_lockd: nfsd_lockd_shutdown(); nfsd_drc_slab_free(); @@ -2310,6 +2325,7 @@ static void __exit exit_nfsd(void) nfsd4_destroy_laundry_wq(); unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); + nfsd_export_wq_shutdown(); nfsd_drc_slab_free(); nfsd_lockd_shutdown(); nfsd4_free_slabs(); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 6fcbf1e427d4d5..c0ca115c3b74bb 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -541,11 +541,18 @@ struct nfs4_client_reclaim { struct xdr_netobj cr_princhash; }; -/* A reasonable value for REPLAY_ISIZE was estimated as follows: - * The OPEN response, typically the largest, requires - * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) + - * 4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) + - * 20(deleg. space limit) + ~32(deleg. ace) = 112 bytes +/* + * REPLAY_ISIZE is sized for an OPEN response with delegation: + * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + + * 8(verifier) + 4(deleg. type) + 8(deleg. stateid) + + * 4(deleg. recall flag) + 20(deleg. space limit) + + * ~32(deleg. ace) = 112 bytes + * + * Some responses can exceed this. A LOCK denial includes the conflicting + * lock owner, which can be up to 1024 bytes (NFS4_OPAQUE_LIMIT). Responses + * larger than REPLAY_ISIZE are not cached in rp_ibuf; only rp_status is + * saved. Enlarging this constant increases the size of every + * nfs4_stateowner. */ #define NFSD4_REPLAY_ISIZE 112 diff --git a/fs/smb/client/Makefile b/fs/smb/client/Makefile index 26b6105f04d1bc..1a6e1e1c9764dd 100644 --- a/fs/smb/client/Makefile +++ b/fs/smb/client/Makefile @@ -48,8 +48,8 @@ cifs-$(CONFIG_CIFS_COMPRESSION) += compress.o compress/lz77.o # Build the SMB2 error mapping table from smb2status.h # $(obj)/smb2_mapping_table.c: $(src)/../common/smb2status.h \ - $(src)/gen_smb2_mapping - $(call cmd,gen_smb2_mapping) + $(src)/gen_smb2_mapping FORCE + $(call if_changed,gen_smb2_mapping) $(obj)/smb2maperror.o: $(obj)/smb2_mapping_table.c @@ -58,4 +58,5 @@ quiet_cmd_gen_smb2_mapping = GEN $@ obj-$(CONFIG_SMB_KUNIT_TESTS) += smb2maperror_test.o -clean-files += smb2_mapping_table.c +# Let Kbuild handle tracking and cleaning +targets += smb2_mapping_table.c diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 7877d327dbb030..709e96e077916d 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -2386,4 +2386,10 @@ static inline int cifs_open_create_options(unsigned int oflags, int opts) return opts; } +/* + * The number of blocks is not related to (i_size / i_blksize), but instead + * 512 byte (2**9) size is required for calculating num blocks. + */ +#define CIFS_INO_BLOCKS(size) DIV_ROUND_UP_ULL((u64)(size), 512) + #endif /* _CIFS_GLOB_H */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 3bad2c5c523dbd..69b38f0ccf2b26 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1955,6 +1955,10 @@ static int match_session(struct cifs_ses *ses, case Kerberos: if (!uid_eq(ctx->cred_uid, ses->cred_uid)) return 0; + if (strncmp(ses->user_name ?: "", + ctx->username ?: "", + CIFS_MAX_USERNAME_LEN)) + return 0; break; case NTLMv2: case RawNTLMSSP: diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 27f61fe7e4e280..a69e05f86d7e2f 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -993,7 +993,6 @@ static int cifs_do_truncate(const unsigned int xid, struct dentry *dentry) if (!rc) { netfs_resize_file(&cinode->netfs, 0, true); cifs_setsize(inode, 0); - inode->i_blocks = 0; } } if (cfile) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 143fa2e665ed4c..888f9e35f14b8a 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -219,13 +219,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr, */ if (is_size_safe_to_change(cifs_i, fattr->cf_eof, from_readdir)) { i_size_write(inode, fattr->cf_eof); - - /* - * i_blocks is not related to (i_size / i_blksize), - * but instead 512 byte (2**9) size is required for - * calculating num blocks. - */ - inode->i_blocks = (512 - 1 + fattr->cf_bytes) >> 9; + inode->i_blocks = CIFS_INO_BLOCKS(fattr->cf_bytes); } if (S_ISLNK(fattr->cf_mode) && fattr->cf_symlink_target) { @@ -3015,6 +3009,11 @@ void cifs_setsize(struct inode *inode, loff_t offset) { spin_lock(&inode->i_lock); i_size_write(inode, offset); + /* + * Until we can query the server for actual allocation size, + * this is best estimate we have for blocks allocated for a file. + */ + inode->i_blocks = CIFS_INO_BLOCKS(offset); spin_unlock(&inode->i_lock); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); truncate_pagecache(inode, offset); @@ -3087,14 +3086,6 @@ int cifs_file_set_size(const unsigned int xid, struct dentry *dentry, if (rc == 0) { netfs_resize_file(&cifsInode->netfs, size, true); cifs_setsize(inode, size); - /* - * i_blocks is not related to (i_size / i_blksize), but instead - * 512 byte (2**9) size is required for calculating num blocks. - * Until we can query the server for actual allocation size, - * this is best estimate we have for blocks allocated for a file - * Number of blocks must be rounded up so size 1 is not 0 blocks - */ - inode->i_blocks = (512 - 1 + size) >> 9; } return rc; diff --git a/fs/smb/client/smb1transport.c b/fs/smb/client/smb1transport.c index 38d6d5538b96b5..53abb29fe71bdd 100644 --- a/fs/smb/client/smb1transport.c +++ b/fs/smb/client/smb1transport.c @@ -460,7 +460,7 @@ check_smb_hdr(struct smb_hdr *smb) return 0; /* - * Windows NT server returns error resposne (e.g. STATUS_DELETE_PENDING + * Windows NT server returns error response (e.g. STATUS_DELETE_PENDING * or STATUS_OBJECT_NAME_NOT_FOUND or ERRDOS/ERRbadfile or any other) * for some TRANS2 requests without the RESPONSE flag set in header. */ diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 98ac4e86bf997c..509fcea28a429d 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1497,6 +1497,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, { struct smb2_file_network_open_info file_inf; struct inode *inode; + u64 asize; int rc; rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid, @@ -1520,14 +1521,9 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, inode_set_atime_to_ts(inode, cifs_NTtimeToUnix(file_inf.LastAccessTime)); - /* - * i_blocks is not related to (i_size / i_blksize), - * but instead 512 byte (2**9) size is required for - * calculating num blocks. - */ - if (le64_to_cpu(file_inf.AllocationSize) > 4096) - inode->i_blocks = - (512 - 1 + le64_to_cpu(file_inf.AllocationSize)) >> 9; + asize = le64_to_cpu(file_inf.AllocationSize); + if (asize > 4096) + inode->i_blocks = CIFS_INO_BLOCKS(asize); /* End of file and Attributes should not have to be updated on close */ spin_unlock(&inode->i_lock); @@ -2204,14 +2200,6 @@ smb2_duplicate_extents(const unsigned int xid, rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false); if (rc) goto duplicate_extents_out; - - /* - * Although also could set plausible allocation size (i_blocks) - * here in addition to setting the file size, in reflink - * it is likely that the target file is sparse. Its allocation - * size will be queried on next revalidate, but it is important - * to make sure that file's cached size is updated immediately - */ netfs_resize_file(netfs_inode(inode), dest_off + len, true); cifs_setsize(inode, dest_off + len); } diff --git a/fs/smb/server/mgmt/tree_connect.c b/fs/smb/server/mgmt/tree_connect.c index a72d7e42a6c278..58e5b8592da46f 100644 --- a/fs/smb/server/mgmt/tree_connect.c +++ b/fs/smb/server/mgmt/tree_connect.c @@ -102,8 +102,10 @@ ksmbd_tree_conn_connect(struct ksmbd_work *work, const char *share_name) void ksmbd_tree_connect_put(struct ksmbd_tree_connect *tcon) { - if (atomic_dec_and_test(&tcon->refcount)) + if (atomic_dec_and_test(&tcon->refcount)) { + ksmbd_share_config_put(tcon->share_conf); kfree(tcon); + } } static int __ksmbd_tree_conn_disconnect(struct ksmbd_session *sess, @@ -113,10 +115,11 @@ static int __ksmbd_tree_conn_disconnect(struct ksmbd_session *sess, ret = ksmbd_ipc_tree_disconnect_request(sess->id, tree_conn->id); ksmbd_release_tree_conn_id(sess, tree_conn->id); - ksmbd_share_config_put(tree_conn->share_conf); ksmbd_counter_dec(KSMBD_COUNTER_TREE_CONNS); - if (atomic_dec_and_test(&tree_conn->refcount)) + if (atomic_dec_and_test(&tree_conn->refcount)) { + ksmbd_share_config_put(tree_conn->share_conf); kfree(tree_conn); + } return ret; } diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 9f7ff7491e9a88..9c44e71e3c3bac 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -126,6 +126,8 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work) pr_err("The first operation in the compound does not have tcon\n"); return -EINVAL; } + if (work->tcon->t_state != TREE_CONNECTED) + return -ENOENT; if (tree_id != UINT_MAX && work->tcon->id != tree_id) { pr_err("tree id(%u) is different with id(%u) in first operation\n", tree_id, work->tcon->id); @@ -1948,6 +1950,7 @@ int smb2_sess_setup(struct ksmbd_work *work) } } smb2_set_err_rsp(work); + conn->binding = false; } else { unsigned int iov_len; @@ -2828,7 +2831,11 @@ static int parse_durable_handle_context(struct ksmbd_work *work, goto out; } - dh_info->fp->conn = conn; + if (dh_info->fp->conn) { + ksmbd_put_durable_fd(dh_info->fp); + err = -EBADF; + goto out; + } dh_info->reconnected = true; goto out; } @@ -5452,7 +5459,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, struct smb2_query_info_req *req, struct smb2_query_info_rsp *rsp) { - struct ksmbd_session *sess = work->sess; struct ksmbd_conn *conn = work->conn; struct ksmbd_share_config *share = work->tcon->share_conf; int fsinfoclass = 0; @@ -5589,10 +5595,11 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, info = (struct object_id_info *)(rsp->Buffer); - if (!user_guest(sess->user)) - memcpy(info->objid, user_passkey(sess->user), 16); + if (path.mnt->mnt_sb->s_uuid_len == 16) + memcpy(info->objid, path.mnt->mnt_sb->s_uuid.b, + path.mnt->mnt_sb->s_uuid_len); else - memset(info->objid, 0, 16); + memcpy(info->objid, &stfs.f_fsid, sizeof(stfs.f_fsid)); info->extended_info.magic = cpu_to_le32(EXTENDED_INFO_MAGIC); info->extended_info.version = cpu_to_le32(1); diff --git a/fs/tests/exec_kunit.c b/fs/tests/exec_kunit.c index f412d1a0f6bba9..1c32cac098cf51 100644 --- a/fs/tests/exec_kunit.c +++ b/fs/tests/exec_kunit.c @@ -94,9 +94,6 @@ static const struct bprm_stack_limits_result bprm_stack_limits_results[] = { { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * 3 + sizeof(void *)), .argc = 0, .envc = 0 }, .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) }, - { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * + sizeof(void *)), - .argc = 0, .envc = 0 }, - .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) }, { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * _STK_LIM, .argc = 0, .envc = 0 }, .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) }, diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index 056ef7b6b36002..1823a290a7b7a7 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -477,7 +477,6 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */ #define HVCALL_NOTIFY_PARTITION_EVENT 0x0087 #define HVCALL_ENTER_SLEEP_STATE 0x0084 #define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b -#define HVCALL_SCRUB_PARTITION 0x008d #define HVCALL_REGISTER_INTERCEPT_RESULT 0x0091 #define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094 #define HVCALL_CREATE_PORT 0x0095 @@ -1121,6 +1120,8 @@ enum hv_register_name { HV_X64_REGISTER_MSR_MTRR_FIX4KF8000 = 0x0008007A, HV_X64_REGISTER_REG_PAGE = 0x0009001C, +#elif defined(CONFIG_ARM64) + HV_ARM64_REGISTER_SINT_RESERVED_INTERRUPT_ID = 0x00070001, #endif }; diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h index 407f7005e6d60a..8bcb9b72626286 100644 --- a/include/linux/auxvec.h +++ b/include/linux/auxvec.h @@ -4,6 +4,6 @@ #include -#define AT_VECTOR_SIZE_BASE 22 /* NEW_AUX_ENT entries in auxiliary table */ +#define AT_VECTOR_SIZE_BASE 24 /* NEW_AUX_ENT entries in auxiliary table */ /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */ #endif /* _LINUX_AUXVEC_H */ diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index 2cfbb4c65c784a..d3dc5dc5f916ff 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -32,7 +32,8 @@ /** * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied * error message. - * @condition: the condition which the compiler should know is false. + * @cond: the condition which the compiler should know is false. + * @msg: build-time error message * * See BUILD_BUG_ON for description. */ @@ -60,6 +61,7 @@ /** * static_assert - check integer constant expression at build time + * @expr: expression to be checked * * static_assert() is a wrapper for the C11 _Static_assert, with a * little macro magic to make the message optional (defaulting to the diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 13b35637bd5aee..d5ca855116df45 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -160,6 +160,7 @@ struct vc_data { struct uni_pagedict **uni_pagedict_loc; /* [!] Location of uni_pagedict variable for this console */ u32 **vc_uni_lines; /* unicode screen content */ u16 *vc_saved_screen; + u32 **vc_saved_uni_lines; unsigned int vc_saved_cols; unsigned int vc_saved_rows; /* additional information is in vt_kern.h */ diff --git a/include/linux/damon.h b/include/linux/damon.h index a4fea23da8576a..be3d198043ff9f 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -810,6 +810,12 @@ struct damon_ctx { struct damos_walk_control *walk_control; struct mutex walk_control_lock; + /* + * indicate if this may be corrupted. Currentonly this is set only for + * damon_commit_ctx() failure. + */ + bool maybe_corrupted; + /* Working thread of the given DAMON context */ struct task_struct *kdamond; /* Protects @kdamond field access */ diff --git a/include/linux/device.h b/include/linux/device.h index 0be95294b6e615..e65d564f01cd7c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -483,6 +483,8 @@ struct device_physical_location { * on. This shrinks the "Board Support Packages" (BSPs) and * minimizes board-specific #ifdefs in drivers. * @driver_data: Private pointer for driver specific info. + * @driver_override: Driver name to force a match. Do not touch directly; use + * device_set_driver_override() instead. * @links: Links to suppliers and consumers of this device. * @power: For device power management. * See Documentation/driver-api/pm/devices.rst for details. @@ -576,6 +578,10 @@ struct device { core doesn't touch it */ void *driver_data; /* Driver data, set and get with dev_set_drvdata/dev_get_drvdata */ + struct { + const char *name; + spinlock_t lock; + } driver_override; struct mutex mutex; /* mutex to synchronize calls to * its driver. */ @@ -701,6 +707,54 @@ struct device_link { #define kobj_to_dev(__kobj) container_of_const(__kobj, struct device, kobj) +int __device_set_driver_override(struct device *dev, const char *s, size_t len); + +/** + * device_set_driver_override() - Helper to set or clear driver override. + * @dev: Device to change + * @s: NUL-terminated string, new driver name to force a match, pass empty + * string to clear it ("" or "\n", where the latter is only for sysfs + * interface). + * + * Helper to set or clear driver override of a device. + * + * Returns: 0 on success or a negative error code on failure. + */ +static inline int device_set_driver_override(struct device *dev, const char *s) +{ + return __device_set_driver_override(dev, s, s ? strlen(s) : 0); +} + +/** + * device_has_driver_override() - Check if a driver override has been set. + * @dev: device to check + * + * Returns true if a driver override has been set for this device. + */ +static inline bool device_has_driver_override(struct device *dev) +{ + guard(spinlock)(&dev->driver_override.lock); + return !!dev->driver_override.name; +} + +/** + * device_match_driver_override() - Match a driver against the device's driver_override. + * @dev: device to check + * @drv: driver to match against + * + * Returns > 0 if a driver override is set and matches the given driver, 0 if a + * driver override is set but does not match, or < 0 if a driver override is not + * set at all. + */ +static inline int device_match_driver_override(struct device *dev, + const struct device_driver *drv) +{ + guard(spinlock)(&dev->driver_override.lock); + if (dev->driver_override.name) + return !strcmp(dev->driver_override.name, drv->name); + return -1; +} + /** * device_iommu_mapped - Returns true when the device DMA is translated * by an IOMMU diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 63de5f053c331c..c1b463cd6464d8 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -65,6 +65,9 @@ struct fwnode_handle; * this bus. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. + * @driver_override: Set to true if this bus supports the driver_override + * mechanism, which allows userspace to force a specific + * driver to bind to a device via a sysfs attribute. * @need_parent_lock: When probing or removing a device on this bus, the * device core should lock the device's parent. * @@ -106,6 +109,7 @@ struct bus_type { const struct dev_pm_ops *pm; + bool driver_override; bool need_parent_lock; }; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 29973baa058168..99ef042ecdb45b 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -80,11 +80,18 @@ #define DMA_ATTR_MMIO (1UL << 10) /* - * DMA_ATTR_CPU_CACHE_CLEAN: Indicates the CPU will not dirty any cacheline - * overlapping this buffer while it is mapped for DMA. All mappings sharing - * a cacheline must have this attribute for this to be considered safe. + * DMA_ATTR_DEBUGGING_IGNORE_CACHELINES: Indicates the CPU cache line can be + * overlapped. All mappings sharing a cacheline must have this attribute for + * this to be considered safe. */ -#define DMA_ATTR_CPU_CACHE_CLEAN (1UL << 11) +#define DMA_ATTR_DEBUGGING_IGNORE_CACHELINES (1UL << 11) + +/* + * DMA_ATTR_REQUIRE_COHERENT: Indicates that DMA coherency is required. + * All mappings that carry this attribute can't work with SWIOTLB and cache + * flushing. + */ +#define DMA_ATTR_REQUIRE_COHERENT (1UL << 12) /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can @@ -248,8 +255,8 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, { return NULL; } -static void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_handle, unsigned long attrs) +static inline void dma_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) { } static inline void *dmam_alloc_attrs(struct device *dev, size_t size, diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 9a1eacf35d3708..df8f88f63a7063 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -42,7 +42,8 @@ extern const struct header_ops eth_header_ops; int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned len); -int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); +int eth_header_parse(const struct sk_buff *skb, const struct net_device *dev, + unsigned char *haddr); int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev, diff --git a/include/linux/hid.h b/include/linux/hid.h index 2990b9f94cb57d..31324609af4df1 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -682,6 +682,7 @@ struct hid_device { __s32 battery_charge_status; enum hid_battery_status battery_status; bool battery_avoid_query; + bool battery_present; ktime_t battery_ratelimit_time; #endif diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 61b7335aa037c7..ca9afa824aa4fa 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -40,7 +40,8 @@ static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb) return (struct ethhdr *)skb_inner_mac_header(skb); } -int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); +int eth_header_parse(const struct sk_buff *skb, const struct net_device *dev, + unsigned char *haddr); extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 7a1516011ccf72..e19872e37e067f 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -53,7 +53,7 @@ struct iommu_flush_ops { * tables. * @ias: Input address (iova) size, in bits. * @oas: Output address (paddr) size, in bits. - * @coherent_walk A flag to indicate whether or not page table walks made + * @coherent_walk: A flag to indicate whether or not page table walks made * by the IOMMU are coherent with the CPU caches. * @tlb: TLB management callbacks for this set of tables. * @iommu_dev: The device representing the DMA configuration for the @@ -136,6 +136,7 @@ struct io_pgtable_cfg { void (*free)(void *cookie, void *pages, size_t size); /* Low-level data specific to the table format */ + /* private: */ union { struct { u64 ttbr; @@ -203,6 +204,9 @@ struct arm_lpae_io_pgtable_walk_data { * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. * @pgtable_walk: (optional) Perform a page table walk for a given iova. + * @read_and_clear_dirty: Record dirty info per IOVA. If an IOVA is dirty, + * clear its dirty state from the PTE unless the + * IOMMU_DIRTY_NO_CLEAR flag is passed in. * * These functions map directly onto the iommu_ops member functions with * the same names. @@ -231,7 +235,9 @@ struct io_pgtable_ops { * the configuration actually provided by the allocator (e.g. the * pgsize_bitmap may be restricted). * @cookie: An opaque token provided by the IOMMU driver and passed back to - * the callback routines in cfg->tlb. + * the callback routines. + * + * Returns: Pointer to the &struct io_pgtable_ops for this set of page tables. */ struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, struct io_pgtable_cfg *cfg, diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index dd1420bfcb7350..214fdbd490522d 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -541,6 +541,7 @@ enum { REQ_F_BL_NO_RECYCLE_BIT, REQ_F_BUFFERS_COMMIT_BIT, REQ_F_BUF_NODE_BIT, + REQ_F_BUF_MORE_BIT, REQ_F_HAS_METADATA_BIT, REQ_F_IMPORT_BUFFER_BIT, REQ_F_SQE_COPIED_BIT, @@ -626,6 +627,8 @@ enum { REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT), /* buf node is valid */ REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT), + /* incremental buffer consumption, more space available */ + REQ_F_BUF_MORE = IO_REQ_FLAG(REQ_F_BUF_MORE_BIT), /* request has read/write metadata assigned */ REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT), /* diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index eff711bf973f54..234be7f12c15e5 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -315,7 +315,7 @@ do { \ #endif /* CONFIG_PREEMPT_RT */ -#if defined(WARN_CONTEXT_ANALYSIS) +#if defined(WARN_CONTEXT_ANALYSIS) && !defined(__CHECKER__) /* * Because the compiler only knows about the base per-CPU variable, use this * helper function to make the compiler think we lock/unlock the @base variable, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ae269a2e7f4dd2..7ca01eb3f7d2b2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -311,7 +311,9 @@ struct header_ops { int (*create) (struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len); - int (*parse)(const struct sk_buff *skb, unsigned char *haddr); + int (*parse)(const struct sk_buff *skb, + const struct net_device *dev, + unsigned char *haddr); int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); void (*cache_update)(struct hh_cache *hh, const struct net_device *dev, @@ -2155,6 +2157,7 @@ struct net_device { unsigned long state; unsigned int flags; unsigned short hard_header_len; + enum netdev_stat_type pcpu_stat_type:8; netdev_features_t features; struct inet6_dev __rcu *ip6_ptr; __cacheline_group_end(net_device_read_txrx); @@ -2404,8 +2407,6 @@ struct net_device { void *ml_priv; enum netdev_ml_priv_type ml_priv_type; - enum netdev_stat_type pcpu_stat_type:8; - #if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; #endif @@ -3446,7 +3447,7 @@ static inline int dev_parse_header(const struct sk_buff *skb, if (!dev->header_ops || !dev->header_ops->parse) return 0; - return dev->header_ops->parse(skb, haddr); + return dev->header_ops->parse(skb, dev, haddr); } static inline __be16 dev_parse_header_protocol(const struct sk_buff *skb) diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 813da101b5bf8e..ed1d50d1c3c15c 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -31,11 +31,6 @@ struct platform_device { struct resource *resource; const struct platform_device_id *id_entry; - /* - * Driver name to force a match. Do not set directly, because core - * frees it. Use driver_set_override() to set or clear it. - */ - const char *driver_override; /* MFD cell pointer */ struct mfd_cell *mfd_cell; diff --git a/include/linux/security.h b/include/linux/security.h index 83a646d72f6f8f..ee88dd2d2d1f71 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -145,6 +145,7 @@ enum lockdown_reason { LOCKDOWN_BPF_WRITE_USER, LOCKDOWN_DBG_WRITE_KERNEL, LOCKDOWN_RTAS_ERROR_INJECTION, + LOCKDOWN_XEN_USER_ACTIONS, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 01efdce0fda078..a95b2d143d2489 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -195,6 +195,7 @@ void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl); void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud, unsigned int quot); int fsl8250_handle_irq(struct uart_port *port); +void serial8250_handle_irq_locked(struct uart_port *port, unsigned int iir); int serial8250_handle_irq(struct uart_port *port, unsigned int iir); u16 serial8250_rx_chars(struct uart_8250_port *up, u16 lsr); void serial8250_read_char(struct uart_8250_port *up, u16 lsr); diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index dec7cbe015aa7c..905b629e8fa38e 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -11,6 +11,7 @@ #ifndef _LINUX_SRCU_TINY_H #define _LINUX_SRCU_TINY_H +#include #include struct srcu_struct { @@ -24,18 +25,21 @@ struct srcu_struct { struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */ struct rcu_head **srcu_cb_tail; /* Pending callbacks: Tail. */ struct work_struct srcu_work; /* For driving grace periods. */ + struct irq_work srcu_irq_work; /* Defer schedule_work() to irq work. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ }; void srcu_drive_gp(struct work_struct *wp); +void srcu_tiny_irq_work(struct irq_work *irq_work); #define __SRCU_STRUCT_INIT(name, __ignored, ___ignored, ____ignored) \ { \ .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ .srcu_cb_tail = &name.srcu_cb_head, \ .srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp), \ + .srcu_irq_work = { .func = srcu_tiny_irq_work }, \ __SRCU_DEP_MAP_INIT(name) \ } diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 958cb7ef41cbb3..be76fa4fc1700c 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -34,7 +34,7 @@ struct srcu_data { /* Values: SRCU_READ_FLAVOR_.* */ /* Update-side state. */ - spinlock_t __private lock ____cacheline_internodealigned_in_smp; + raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp; struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ @@ -55,7 +55,7 @@ struct srcu_data { * Node in SRCU combining tree, similar in function to rcu_data. */ struct srcu_node { - spinlock_t __private lock; + raw_spinlock_t __private lock; unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */ /* if greater than ->srcu_gp_seq. */ unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */ @@ -74,7 +74,7 @@ struct srcu_usage { /* First node at each level. */ int srcu_size_state; /* Small-to-big transition state. */ struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ - spinlock_t __private lock; /* Protect counters and size state. */ + raw_spinlock_t __private lock; /* Protect counters and size state. */ struct mutex srcu_gp_mutex; /* Serialize GP work. */ unsigned long srcu_gp_seq; /* Grace-period seq #. */ unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ @@ -95,6 +95,7 @@ struct srcu_usage { unsigned long reschedule_jiffies; unsigned long reschedule_count; struct delayed_work work; + struct irq_work irq_work; struct srcu_struct *srcu_ssp; }; @@ -156,7 +157,7 @@ struct srcu_struct { #define __SRCU_USAGE_INIT(name) \ { \ - .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL, \ .srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE, \ .srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL, \ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 75dabb763c6504..f36d21b5bc19e2 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -207,6 +207,39 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, return __virtio_net_hdr_to_skb(skb, hdr, little_endian, hdr->gso_type); } +/* This function must be called after virtio_net_hdr_from_skb(). */ +static inline void __virtio_net_set_hdrlen(const struct sk_buff *skb, + struct virtio_net_hdr *hdr, + bool little_endian) +{ + u16 hdr_len; + + hdr_len = skb_transport_offset(skb); + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) + hdr_len += sizeof(struct udphdr); + else + hdr_len += tcp_hdrlen(skb); + + hdr->hdr_len = __cpu_to_virtio16(little_endian, hdr_len); +} + +/* This function must be called after virtio_net_hdr_from_skb(). */ +static inline void __virtio_net_set_tnl_hdrlen(const struct sk_buff *skb, + struct virtio_net_hdr *hdr) +{ + u16 hdr_len; + + hdr_len = skb_inner_transport_offset(skb); + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) + hdr_len += sizeof(struct udphdr); + else + hdr_len += inner_tcp_hdrlen(skb); + + hdr->hdr_len = __cpu_to_virtio16(true, hdr_len); +} + static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian, @@ -385,7 +418,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, bool tnl_hdr_negotiated, bool little_endian, int vlan_hlen, - bool has_data_valid) + bool has_data_valid, + bool feature_hdrlen) { struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr; unsigned int inner_nh, outer_th; @@ -394,9 +428,17 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM); - if (!tnl_gso_type) - return virtio_net_hdr_from_skb(skb, hdr, little_endian, - has_data_valid, vlan_hlen); + if (!tnl_gso_type) { + ret = virtio_net_hdr_from_skb(skb, hdr, little_endian, + has_data_valid, vlan_hlen); + if (ret) + return ret; + + if (feature_hdrlen && hdr->hdr_len) + __virtio_net_set_hdrlen(skb, hdr, little_endian); + + return ret; + } /* Tunnel support not negotiated but skb ask for it. */ if (!tnl_hdr_negotiated) @@ -414,6 +456,9 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, if (ret) return ret; + if (feature_hdrlen && hdr->hdr_len) + __virtio_net_set_tnl_hdrlen(skb, hdr); + if (skb->protocol == htons(ETH_P_IPV6)) hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6; else diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 010f1a8fd15f82..5172afee549433 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -658,6 +658,7 @@ struct l2cap_conn { struct sk_buff *rx_skb; __u32 rx_len; struct ida tx_ida; + __u8 tx_ident; struct sk_buff_head pending_rx; struct work_struct pending_rx_work; diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index 78a27ac730700b..b2c359c6dd1b84 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h @@ -158,6 +158,7 @@ static struct sk_buff *codel_dequeue(void *ctx, bool drop; if (!skb) { + vars->first_above_time = 0; vars->dropping = false; return skb; } diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 5a979dcab53839..6d936e9f2fd32c 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -264,6 +264,20 @@ inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk, return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } +static inline bool inet_use_hash2_on_bind(const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + return false; + + if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + return true; + } +#endif + return sk->sk_rcv_saddr != htonl(INADDR_ANY); +} + struct inet_bind_hashbucket * inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 88b0dd4d8e094d..9f8b6814a96a04 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -507,12 +507,14 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt, void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); +void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now); void fib6_run_gc(unsigned long expires, struct net *net, bool force); - void fib6_gc_cleanup(void); int fib6_init(void); +#if IS_ENABLED(CONFIG_IPV6) /* Add the route to the gc list if it is not already there * * The callers should hold f6i->fib6_table->tb6_lock. @@ -545,6 +547,23 @@ static inline void fib6_remove_gc_list(struct fib6_info *f6i) hlist_del_init(&f6i->gc_link); } +static inline void fib6_may_remove_gc_list(struct net *net, + struct fib6_info *f6i) +{ + struct fib6_gc_args gc_args; + + if (hlist_unhashed(&f6i->gc_link)) + return; + + gc_args.timeout = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_interval); + gc_args.more = 0; + + rcu_read_lock(); + fib6_age_exceptions(f6i, &gc_args, jiffies); + rcu_read_unlock(); +} +#endif + struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 80662f81208039..1f577a4f8ce9b1 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -665,13 +665,29 @@ static inline int iptunnel_pull_offloads(struct sk_buff *skb) static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) { if (pkt_len > 0) { - struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); - - u64_stats_update_begin(&tstats->syncp); - u64_stats_add(&tstats->tx_bytes, pkt_len); - u64_stats_inc(&tstats->tx_packets); - u64_stats_update_end(&tstats->syncp); - put_cpu_ptr(tstats); + if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_DSTATS) { + struct pcpu_dstats *dstats = get_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_add(&dstats->tx_bytes, pkt_len); + u64_stats_inc(&dstats->tx_packets); + u64_stats_update_end(&dstats->syncp); + put_cpu_ptr(dstats); + return; + } + if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) { + struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + u64_stats_add(&tstats->tx_bytes, pkt_len); + u64_stats_inc(&tstats->tx_packets); + u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); + return; + } + pr_err_once("iptunnel_xmit_stats pcpu_stat_type=%d\n", + dev->pcpu_stat_type); + WARN_ON_ONCE(1); return; } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7f9d96939a4ea7..adce2144a6788a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7407,7 +7407,9 @@ void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif, * @band: the band to transmit on * @sta: optional pointer to get the station to send the frame to * - * Return: %true if the skb was prepared, %false otherwise + * Return: %true if the skb was prepared, %false otherwise. + * On failure, the skb is freed by this function; callers must not + * free it again. * * Note: must be called under RCU lock */ diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3384859a892101..8883575adcc1e7 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -83,6 +83,11 @@ void nf_conntrack_lock(spinlock_t *lock); extern spinlock_t nf_conntrack_expect_lock; +static inline void lockdep_nfct_expect_lock_held(void) +{ + lockdep_assert_held(&nf_conntrack_expect_lock); +} + /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 165e7a03b8e9dc..e9a8350e7ccfb0 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -22,10 +22,16 @@ struct nf_conntrack_expect { /* Hash member */ struct hlist_node hnode; + /* Network namespace */ + possible_net_t net; + /* We expect this tuple, with the following mask */ struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_mask mask; +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone zone; +#endif /* Usage count. */ refcount_t use; @@ -40,7 +46,7 @@ struct nf_conntrack_expect { struct nf_conntrack_expect *this); /* Helper to assign to new connection */ - struct nf_conntrack_helper *helper; + struct nf_conntrack_helper __rcu *helper; /* The conntrack of the master connection */ struct nf_conn *master; @@ -62,7 +68,17 @@ struct nf_conntrack_expect { static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) { - return nf_ct_net(exp->master); + return read_pnet(&exp->net); +} + +static inline bool nf_ct_exp_zone_equal_any(const struct nf_conntrack_expect *a, + const struct nf_conntrack_zone *b) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + return a->zone.id == b->id; +#else + return true; +#endif } #define NF_CT_EXP_POLICY_NAME_LEN 16 diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e2d2bfc1f98930..ec8a8ec9c0aa69 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -277,8 +277,6 @@ struct nft_userdata { unsigned char data[]; }; -#define NFT_SET_ELEM_INTERNAL_LAST 0x1 - /* placeholder structure for opaque set element backend representation. */ struct nft_elem_priv { }; @@ -288,7 +286,6 @@ struct nft_elem_priv { }; * @key: element key * @key_end: closing element key * @data: element data - * @flags: flags * @priv: element private data and extensions */ struct nft_set_elem { @@ -304,7 +301,6 @@ struct nft_set_elem { u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; struct nft_data val; } data; - u32 flags; struct nft_elem_priv *priv; }; @@ -878,6 +874,8 @@ struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, u64 timeout, u64 expiration, gfp_t gfp); int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr_array[]); +void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, + struct nft_set_elem_expr *elem_expr); void nft_set_elem_destroy(const struct nft_set *set, const struct nft_elem_priv *elem_priv, bool destroy_expr); diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 23dd647fe0248c..b73983a17e0882 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -59,7 +59,7 @@ struct netns_xfrm { struct list_head inexact_bins; - struct sock *nlsk; + struct sock __rcu *nlsk; struct sock *nlsk_stash; u32 sysctl_aevent_etime; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d5d55cb21686dd..c3d657359a3d2d 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -716,6 +716,34 @@ void qdisc_destroy(struct Qdisc *qdisc); void qdisc_put(struct Qdisc *qdisc); void qdisc_put_unlocked(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len); + +static inline void dev_reset_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + bool nolock; + + qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); + if (!qdisc) + return; + + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); + spin_lock_bh(qdisc_lock(qdisc)); + + qdisc_reset(qdisc); + + spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); + spin_unlock_bh(&qdisc->seqlock); + } +} + #ifdef CONFIG_NET_SCHED int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, void *type_data); @@ -1429,6 +1457,11 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, struct tcf_block *block); +static inline bool mini_qdisc_pair_inited(struct mini_Qdisc_pair *miniqp) +{ + return !!miniqp->p_miniq; +} + void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx); int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index d9c6d04bb3b587..fc1fc43345b5d8 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -52,7 +52,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { - return 0; + return -EPFNOSUPPORT; } #endif diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index 33e99e792f1aa2..63597b00442471 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -32,7 +32,9 @@ TRACE_DEFINE_ENUM(DMA_NONE); { DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \ { DMA_ATTR_NO_WARN, "NO_WARN" }, \ { DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \ - { DMA_ATTR_MMIO, "MMIO" }) + { DMA_ATTR_MMIO, "MMIO" }, \ + { DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \ + { DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" }) DECLARE_EVENT_CLASS(dma_map, TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, diff --git a/include/trace/events/task.h b/include/trace/events/task.h index 4f0759634306c7..b9a129eb54d9eb 100644 --- a/include/trace/events/task.h +++ b/include/trace/events/task.h @@ -38,19 +38,22 @@ TRACE_EVENT(task_rename, TP_ARGS(task, comm), TP_STRUCT__entry( + __field( pid_t, pid) __array( char, oldcomm, TASK_COMM_LEN) __array( char, newcomm, TASK_COMM_LEN) __field( short, oom_score_adj) ), TP_fast_assign( + __entry->pid = task->pid; memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN); strscpy(entry->newcomm, comm, TASK_COMM_LEN); __entry->oom_score_adj = task->signal->oom_score_adj; ), - TP_printk("oldcomm=%s newcomm=%s oom_score_adj=%hd", - __entry->oldcomm, __entry->newcomm, __entry->oom_score_adj) + TP_printk("pid=%d oldcomm=%s newcomm=%s oom_score_adj=%hd", + __entry->pid, __entry->oldcomm, + __entry->newcomm, __entry->oom_score_adj) ); /** diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 26071021e986f6..56b6b60a814f5e 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -159,5 +159,9 @@ enum ip_conntrack_expect_events { #define NF_CT_EXPECT_INACTIVE 0x2 #define NF_CT_EXPECT_USERSPACE 0x4 +#ifdef __KERNEL__ +#define NF_CT_EXPECT_MASK (NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE | \ + NF_CT_EXPECT_USERSPACE) +#endif #endif /* _UAPI_NF_CONNTRACK_COMMON_H */ diff --git a/init/Kconfig b/init/Kconfig index 444ce811ea674a..7484cd703bc1ab 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -146,7 +146,7 @@ config CC_HAS_COUNTED_BY config CC_HAS_COUNTED_BY_PTR bool # supported since clang 22 - default y if CC_IS_CLANG && CLANG_VERSION >= 220000 + default y if CC_IS_CLANG && CLANG_VERSION >= 220100 # supported since gcc 16.0.0 default y if CC_IS_GCC && GCC_VERSION >= 160000 diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 26813b0f1dfd14..5257b3aad39510 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -34,6 +34,10 @@ struct io_provide_buf { static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) { + /* No data consumed, return false early to avoid consuming the buffer */ + if (!len) + return false; + while (len) { struct io_uring_buf *buf; u32 buf_len, this_len; @@ -212,7 +216,8 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, sel.addr = u64_to_user_ptr(READ_ONCE(buf->addr)); if (io_should_commit(req, issue_flags)) { - io_kbuf_commit(req, sel.buf_list, *len, 1); + if (!io_kbuf_commit(req, sel.buf_list, *len, 1)) + req->flags |= REQ_F_BUF_MORE; sel.buf_list = NULL; } return sel; @@ -345,7 +350,8 @@ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, */ if (ret > 0) { req->flags |= REQ_F_BUFFERS_COMMIT | REQ_F_BL_NO_RECYCLE; - io_kbuf_commit(req, sel->buf_list, arg->out_len, ret); + if (!io_kbuf_commit(req, sel->buf_list, arg->out_len, ret)) + req->flags |= REQ_F_BUF_MORE; } } else { ret = io_provided_buffers_select(req, &arg->out_len, sel->buf_list, arg->iovs); @@ -391,8 +397,10 @@ static inline bool __io_put_kbuf_ring(struct io_kiocb *req, if (bl) ret = io_kbuf_commit(req, bl, len, nr); + if (ret && (req->flags & REQ_F_BUF_MORE)) + ret = false; - req->flags &= ~REQ_F_BUFFER_RING; + req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUF_MORE); return ret; } diff --git a/io_uring/poll.c b/io_uring/poll.c index b671b84657d9d3..2e9ee47d74bfbd 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -272,6 +272,7 @@ static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw) atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs); v &= ~IO_POLL_RETRY_FLAG; } + v &= IO_POLL_REF_MASK; } /* the mask was stashed in __io_poll_execute */ @@ -304,8 +305,13 @@ static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw) return IOU_POLL_REMOVE_POLL_USE_RES; } } else { - int ret = io_poll_issue(req, tw); + int ret; + /* multiple refs and HUP, ensure we loop once more */ + if ((req->cqe.res & (POLLHUP | POLLRDHUP)) && v != 1) + v--; + + ret = io_poll_issue(req, tw); if (ret == IOU_COMPLETE) return IOU_POLL_REMOVE_POLL_USE_RES; else if (ret == IOU_REQUEUE) @@ -321,7 +327,6 @@ static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw) * Release all references, retry if someone tried to restart * task_work while we were executing it. */ - v &= IO_POLL_REF_MASK; } while (atomic_sub_return(v, &req->poll_refs) & IO_POLL_REF_MASK); io_napi_add(req); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 4872d2a6c42d3a..71f9143fe90f3b 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1787,7 +1787,16 @@ static void btf_free_id(struct btf *btf) * of the _bh() version. */ spin_lock_irqsave(&btf_idr_lock, flags); - idr_remove(&btf_idr, btf->id); + if (btf->id) { + idr_remove(&btf_idr, btf->id); + /* + * Clear the id here to make this function idempotent, since it will get + * called a couple of times for module BTFs: on module unload, and then + * the final btf_put(). btf_alloc_id() starts IDs with 1, so we can use + * 0 as sentinel value. + */ + WRITE_ONCE(btf->id, 0); + } spin_unlock_irqrestore(&btf_idr_lock, flags); } @@ -8115,7 +8124,7 @@ static void bpf_btf_show_fdinfo(struct seq_file *m, struct file *filp) { const struct btf *btf = filp->private_data; - seq_printf(m, "btf_id:\t%u\n", btf->id); + seq_printf(m, "btf_id:\t%u\n", READ_ONCE(btf->id)); } #endif @@ -8197,7 +8206,7 @@ int btf_get_info_by_fd(const struct btf *btf, if (copy_from_user(&info, uinfo, info_copy)) return -EFAULT; - info.id = btf->id; + info.id = READ_ONCE(btf->id); ubtf = u64_to_user_ptr(info.btf); btf_copy = min_t(u32, btf->data_size, info.btf_size); if (copy_to_user(ubtf, btf->data, btf_copy)) @@ -8260,7 +8269,7 @@ int btf_get_fd_by_id(u32 id) u32 btf_obj_id(const struct btf *btf) { - return btf->id; + return READ_ONCE(btf->id); } bool btf_is_kernel(const struct btf *btf) @@ -8382,6 +8391,13 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, if (btf_mod->module != module) continue; + /* + * For modules, we do the freeing of BTF IDR as soon as + * module goes away to disable BTF discovery, since the + * btf_try_get_module() on such BTFs will fail. This may + * be called again on btf_put(), but it's ok to do so. + */ + btf_free_id(btf_mod->btf); list_del(&btf_mod->list); if (btf_mod->sysfs_attr) sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 3ece2da55625cb..7b675a451ec8ef 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1422,6 +1422,27 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); *to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off); break; + + case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: + case BPF_ST | BPF_PROBE_MEM32 | BPF_W: + case BPF_ST | BPF_PROBE_MEM32 | BPF_H: + case BPF_ST | BPF_PROBE_MEM32 | BPF_B: + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ + from->imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + /* + * Cannot use BPF_STX_MEM() macro here as it + * hardcodes BPF_MEM mode, losing PROBE_MEM32 + * and breaking arena addressing in the JIT. + */ + *to++ = (struct bpf_insn) { + .code = BPF_STX | BPF_PROBE_MEM32 | + BPF_SIZE(from->code), + .dst_reg = from->dst_reg, + .src_reg = BPF_REG_AX, + .off = from->off, + }; + break; } out: return to - to_buff; @@ -1736,6 +1757,12 @@ bool bpf_opcode_in_insntable(u8 code) } #ifndef CONFIG_BPF_JIT_ALWAYS_ON +/* Absolute value of s32 without undefined behavior for S32_MIN */ +static u32 abs_s32(s32 x) +{ + return x >= 0 ? (u32)x : -(u32)x; +} + /** * ___bpf_prog_run - run eBPF program on a given context * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers @@ -1900,8 +1927,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) DST = do_div(AX, (u32) SRC); break; case 1: - AX = abs((s32)DST); - AX = do_div(AX, abs((s32)SRC)); + AX = abs_s32((s32)DST); + AX = do_div(AX, abs_s32((s32)SRC)); if ((s32)DST < 0) DST = (u32)-AX; else @@ -1928,8 +1955,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) DST = do_div(AX, (u32) IMM); break; case 1: - AX = abs((s32)DST); - AX = do_div(AX, abs((s32)IMM)); + AX = abs_s32((s32)DST); + AX = do_div(AX, abs_s32((s32)IMM)); if ((s32)DST < 0) DST = (u32)-AX; else @@ -1955,8 +1982,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) DST = (u32) AX; break; case 1: - AX = abs((s32)DST); - do_div(AX, abs((s32)SRC)); + AX = abs_s32((s32)DST); + do_div(AX, abs_s32((s32)SRC)); if (((s32)DST < 0) == ((s32)SRC < 0)) DST = (u32)AX; else @@ -1982,8 +2009,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) DST = (u32) AX; break; case 1: - AX = abs((s32)DST); - do_div(AX, abs((s32)IMM)); + AX = abs_s32((s32)DST); + do_div(AX, abs_s32((s32)IMM)); if (((s32)DST < 0) == ((s32)IMM < 0)) DST = (u32)AX; else diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 159b25f8269ddf..f108c01ff6d02c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15910,6 +15910,13 @@ static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *ins /* Apply bswap if alu64 or switch between big-endian and little-endian machines */ bool need_bswap = alu64 || (to_le == is_big_endian); + /* + * If the register is mutated, manually reset its scalar ID to break + * any existing ties and avoid incorrect bounds propagation. + */ + if (need_bswap || insn->imm == 16 || insn->imm == 32) + dst_reg->id = 0; + if (need_bswap) { if (insn->imm == 16) dst_reg->var_off = tnum_bswap16(dst_reg->var_off); @@ -15992,7 +15999,7 @@ static int maybe_fork_scalars(struct bpf_verifier_env *env, struct bpf_insn *ins else return 0; - branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false); + branch = push_stack(env, env->insn_idx, env->insn_idx, false); if (IS_ERR(branch)) return PTR_ERR(branch); @@ -17408,6 +17415,12 @@ static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_s continue; if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST)) continue; + /* + * Skip mixed 32/64-bit links: the delta relationship doesn't + * hold across different ALU widths. + */ + if (((reg->id ^ known_reg->id) & BPF_ADD_CONST) == BPF_ADD_CONST) + continue; if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) || reg->off == known_reg->off) { s32 saved_subreg_def = reg->subreg_def; @@ -17435,7 +17448,7 @@ static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_s scalar32_min_max_add(reg, &fake_reg); scalar_min_max_add(reg, &fake_reg); reg->var_off = tnum_add(reg->var_off, fake_reg.var_off); - if (known_reg->id & BPF_ADD_CONST32) + if ((reg->id | known_reg->id) & BPF_ADD_CONST32) zext_32_to_64(reg); reg_bounds_sync(reg); } @@ -19863,11 +19876,14 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, * Also verify that new value satisfies old value range knowledge. */ - /* ADD_CONST mismatch: different linking semantics */ - if ((rold->id & BPF_ADD_CONST) && !(rcur->id & BPF_ADD_CONST)) - return false; - - if (rold->id && !(rold->id & BPF_ADD_CONST) && (rcur->id & BPF_ADD_CONST)) + /* + * ADD_CONST flags must match exactly: BPF_ADD_CONST32 and + * BPF_ADD_CONST64 have different linking semantics in + * sync_linked_regs() (alu32 zero-extends, alu64 does not), + * so pruning across different flag types is unsafe. + */ + if (rold->id && + (rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST)) return false; /* Both have offset linkage: offsets must match */ @@ -20904,7 +20920,8 @@ static int process_bpf_exit_full(struct bpf_verifier_env *env, * state when it exits. */ int err = check_resource_leak(env, exception_exit, - !env->cur_state->curframe, + exception_exit || !env->cur_state->curframe, + exception_exit ? "bpf_throw" : "BPF_EXIT instruction in main prog"); if (err) return err; diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c index 1f4067fbdb9438..a20d4097744aee 100644 --- a/kernel/crash_dump_dm_crypt.c +++ b/kernel/crash_dump_dm_crypt.c @@ -168,8 +168,8 @@ static int read_key_from_user_keying(struct dm_crypt_key *dm_key) memcpy(dm_key->data, ukp->data, ukp->datalen); dm_key->key_size = ukp->datalen; - kexec_dprintk("Get dm crypt key (size=%u) %s: %8ph\n", dm_key->key_size, - dm_key->key_desc, dm_key->data); + kexec_dprintk("Get dm crypt key (size=%u) %s\n", dm_key->key_size, + dm_key->key_desc); out: up_read(&key->sem); diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 86f87e43438c33..0677918f06a80c 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -453,7 +453,7 @@ static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) return overlap; } -static void active_cacheline_inc_overlap(phys_addr_t cln) +static void active_cacheline_inc_overlap(phys_addr_t cln, bool is_cache_clean) { int overlap = active_cacheline_read_overlap(cln); @@ -462,7 +462,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln) /* If we overflowed the overlap counter then we're potentially * leaking dma-mappings. */ - WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, + WARN_ONCE(!is_cache_clean && overlap > ACTIVE_CACHELINE_MAX_OVERLAP, pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"), ACTIVE_CACHELINE_MAX_OVERLAP, &cln); } @@ -495,7 +495,7 @@ static int active_cacheline_insert(struct dma_debug_entry *entry, if (rc == -EEXIST) { struct dma_debug_entry *existing; - active_cacheline_inc_overlap(cln); + active_cacheline_inc_overlap(cln, entry->is_cache_clean); existing = radix_tree_lookup(&dma_active_cacheline, cln); /* A lookup failure here after we got -EEXIST is unexpected. */ WARN_ON(!existing); @@ -601,7 +601,8 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs) unsigned long flags; int rc; - entry->is_cache_clean = !!(attrs & DMA_ATTR_CPU_CACHE_CLEAN); + entry->is_cache_clean = attrs & (DMA_ATTR_DEBUGGING_IGNORE_CACHELINES | + DMA_ATTR_REQUIRE_COHERENT); bucket = get_hash_bucket(entry, &flags); hash_bucket_add(bucket, entry); diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index e89f175e9c2d01..6184ff303f0809 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -84,7 +84,7 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev, dma_addr_t dma_addr; if (is_swiotlb_force_bounce(dev)) { - if (attrs & DMA_ATTR_MMIO) + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) return DMA_MAPPING_ERROR; return swiotlb_map(dev, phys, size, dir, attrs); @@ -98,7 +98,8 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev, dma_addr = phys_to_dma(dev, phys); if (unlikely(!dma_capable(dev, dma_addr, size, true)) || dma_kmalloc_needs_bounce(dev, size, dir)) { - if (is_swiotlb_active(dev)) + if (is_swiotlb_active(dev) && + !(attrs & DMA_ATTR_REQUIRE_COHERENT)) return swiotlb_map(dev, phys, size, dir, attrs); goto err_overflow; @@ -123,7 +124,7 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr, { phys_addr_t phys; - if (attrs & DMA_ATTR_MMIO) + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) /* nothing to do: uncached and no swiotlb */ return; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 3928a509c44c28..6d3dd0bd3a8862 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -164,6 +164,9 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, if (WARN_ON_ONCE(!dev->dma_mask)) return DMA_MAPPING_ERROR; + if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT)) + return DMA_MAPPING_ERROR; + if (dma_map_direct(dev, ops) || (!is_mmio && arch_dma_map_phys_direct(dev, phys + size))) addr = dma_direct_map_phys(dev, phys, size, dir, attrs); @@ -235,6 +238,9 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); + if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT)) + return -EOPNOTSUPP; + if (WARN_ON_ONCE(!dev->dma_mask)) return 0; diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index d8e6f1d889d55d..9fd73700ddcff2 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -901,10 +902,19 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size local_irq_save(flags); page = pfn_to_page(pfn); - if (dir == DMA_TO_DEVICE) + if (dir == DMA_TO_DEVICE) { + /* + * Ideally, kmsan_check_highmem_page() + * could be used here to detect infoleaks, + * but callers may map uninitialized buffers + * that will be written by the device, + * causing false positives. + */ memcpy_from_page(vaddr, page, offset, sz); - else + } else { + kmsan_unpoison_memory(vaddr, sz); memcpy_to_page(page, offset, vaddr, sz); + } local_irq_restore(flags); size -= sz; @@ -913,8 +923,15 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size offset = 0; } } else if (dir == DMA_TO_DEVICE) { + /* + * Ideally, kmsan_check_memory() could be used here to detect + * infoleaks (uninitialized data being sent to device), but + * callers may map uninitialized buffers that will be written + * by the device, causing false positives. + */ memcpy(vaddr, phys_to_virt(orig_addr), size); } else { + kmsan_unpoison_memory(vaddr, size); memcpy(phys_to_virt(orig_addr), vaddr, size); } } diff --git a/kernel/events/core.c b/kernel/events/core.c index 1f5699b339ec8a..89b40e43971773 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4813,7 +4813,7 @@ static void __perf_event_read(void *info) struct perf_event *sub, *event = data->event; struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); - struct pmu *pmu = event->pmu; + struct pmu *pmu; /* * If this is a task context, we need to check whether it is @@ -4825,7 +4825,7 @@ static void __perf_event_read(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - raw_spin_lock(&ctx->lock); + guard(raw_spinlock)(&ctx->lock); ctx_time_update_event(ctx, event); perf_event_update_time(event); @@ -4833,25 +4833,22 @@ static void __perf_event_read(void *info) perf_event_update_sibling_time(event); if (event->state != PERF_EVENT_STATE_ACTIVE) - goto unlock; + return; if (!data->group) { - pmu->read(event); + perf_pmu_read(event); data->ret = 0; - goto unlock; + return; } + pmu = event->pmu_ctx->pmu; pmu->start_txn(pmu, PERF_PMU_TXN_READ); - pmu->read(event); - + perf_pmu_read(event); for_each_sibling_event(sub, event) perf_pmu_read(sub); data->ret = pmu->commit_txn(pmu); - -unlock: - raw_spin_unlock(&ctx->lock); } static inline u64 perf_event_count(struct perf_event *event, bool self) @@ -14744,7 +14741,7 @@ inherit_event(struct perf_event *parent_event, get_ctx(child_ctx); child_event->ctx = child_ctx; - pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event); + pmu_ctx = find_get_pmu_context(parent_event->pmu_ctx->pmu, child_ctx, child_event); if (IS_ERR(pmu_ctx)) { free_event(child_event); return ERR_CAST(pmu_ctx); diff --git a/kernel/power/main.c b/kernel/power/main.c index 5f8c9e12eaec40..5429e9f19b655a 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -40,7 +40,7 @@ void pm_restore_gfp_mask(void) { WARN_ON(!mutex_is_locked(&system_transition_mutex)); - if (WARN_ON(!saved_gfp_count) || --saved_gfp_count) + if (!saved_gfp_count || --saved_gfp_count) return; gfp_allowed_mask = saved_gfp_mask; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 6e1321837c6687..a564650734dcdc 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -2855,6 +2855,17 @@ int snapshot_write_finalize(struct snapshot_handle *handle) { int error; + /* + * Call snapshot_write_next() to drain any trailing zero pages, + * but make sure we're in the data page region first. + * This function can return PAGE_SIZE if the kernel was expecting + * another copy page. Return -ENODATA in that situation. + */ + if (handle->cur > nr_meta_pages + 1) { + error = snapshot_write_next(handle); + if (error) + return error > 0 ? -ENODATA : error; + } copy_last_highmem_page(); error = hibernate_restore_protect_page(handle->buffer); /* Do that only if we have loaded the image entirely */ diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index dc5d614b372c1e..9b10b57b79ada7 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -502,6 +502,15 @@ do { \ ___locked; \ }) +#define raw_spin_trylock_irqsave_rcu_node(p, flags) \ +({ \ + bool ___locked = raw_spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ + \ + if (___locked) \ + smp_mb__after_unlock_lock(); \ + ___locked; \ +}) + #define raw_lockdep_assert_held_rcu_node(p) \ lockdep_assert_held(&ACCESS_PRIVATE(p, lock)) diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 3450c3751ef7ad..a2e2d516e51b9d 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -41,6 +42,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp) ssp->srcu_idx_max = 0; INIT_WORK(&ssp->srcu_work, srcu_drive_gp); INIT_LIST_HEAD(&ssp->srcu_work.entry); + init_irq_work(&ssp->srcu_irq_work, srcu_tiny_irq_work); return 0; } @@ -84,6 +86,7 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); void cleanup_srcu_struct(struct srcu_struct *ssp) { WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]); + irq_work_sync(&ssp->srcu_irq_work); flush_work(&ssp->srcu_work); WARN_ON(ssp->srcu_gp_running); WARN_ON(ssp->srcu_gp_waiting); @@ -177,6 +180,20 @@ void srcu_drive_gp(struct work_struct *wp) } EXPORT_SYMBOL_GPL(srcu_drive_gp); +/* + * Use an irq_work to defer schedule_work() to avoid acquiring the workqueue + * pool->lock while the caller might hold scheduler locks, causing lockdep + * splats due to workqueue_init() doing a wakeup. + */ +void srcu_tiny_irq_work(struct irq_work *irq_work) +{ + struct srcu_struct *ssp; + + ssp = container_of(irq_work, struct srcu_struct, srcu_irq_work); + schedule_work(&ssp->srcu_work); +} +EXPORT_SYMBOL_GPL(srcu_tiny_irq_work); + static void srcu_gp_start_if_needed(struct srcu_struct *ssp) { unsigned long cookie; @@ -189,7 +206,7 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp) WRITE_ONCE(ssp->srcu_idx_max, cookie); if (!READ_ONCE(ssp->srcu_gp_running)) { if (likely(srcu_init_done)) - schedule_work(&ssp->srcu_work); + irq_work_queue(&ssp->srcu_irq_work); else if (list_empty(&ssp->srcu_work.entry)) list_add(&ssp->srcu_work.entry, &srcu_boot_list); } diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index aef8e91ad33e4c..0d01cd8c4b4a7b 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -75,44 +76,9 @@ static bool __read_mostly srcu_init_done; static void srcu_invoke_callbacks(struct work_struct *work); static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay); static void process_srcu(struct work_struct *work); +static void srcu_irq_work(struct irq_work *work); static void srcu_delay_timer(struct timer_list *t); -/* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ -#define spin_lock_rcu_node(p) \ -do { \ - spin_lock(&ACCESS_PRIVATE(p, lock)); \ - smp_mb__after_unlock_lock(); \ -} while (0) - -#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock)) - -#define spin_lock_irq_rcu_node(p) \ -do { \ - spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ - smp_mb__after_unlock_lock(); \ -} while (0) - -#define spin_unlock_irq_rcu_node(p) \ - spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) - -#define spin_lock_irqsave_rcu_node(p, flags) \ -do { \ - spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ - smp_mb__after_unlock_lock(); \ -} while (0) - -#define spin_trylock_irqsave_rcu_node(p, flags) \ -({ \ - bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ - \ - if (___locked) \ - smp_mb__after_unlock_lock(); \ - ___locked; \ -}) - -#define spin_unlock_irqrestore_rcu_node(p, flags) \ - spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ - /* * Initialize SRCU per-CPU data. Note that statically allocated * srcu_struct structures might already have srcu_read_lock() and @@ -131,7 +97,7 @@ static void init_srcu_struct_data(struct srcu_struct *ssp) */ for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(ssp->sda, cpu); - spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); + raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); rcu_segcblist_init(&sdp->srcu_cblist); sdp->srcu_cblist_invoking = false; sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq; @@ -186,7 +152,7 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) /* Each pass through this loop initializes one srcu_node structure. */ srcu_for_each_node_breadth_first(ssp, snp) { - spin_lock_init(&ACCESS_PRIVATE(snp, lock)); + raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock)); BUILD_BUG_ON(ARRAY_SIZE(snp->srcu_have_cbs) != ARRAY_SIZE(snp->srcu_data_have_cbs)); for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { @@ -242,7 +208,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) if (!ssp->srcu_sup) return -ENOMEM; if (!is_static) - spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); + raw_spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL; ssp->srcu_sup->node = NULL; mutex_init(&ssp->srcu_sup->srcu_cb_mutex); @@ -252,6 +218,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) mutex_init(&ssp->srcu_sup->srcu_barrier_mutex); atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0); INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu); + init_irq_work(&ssp->srcu_sup->irq_work, srcu_irq_work); ssp->srcu_sup->sda_is_static = is_static; if (!is_static) { ssp->sda = alloc_percpu(struct srcu_data); @@ -263,9 +230,12 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) ssp->srcu_sup->srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL; ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns(); if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { - if (!init_srcu_struct_nodes(ssp, is_static ? GFP_ATOMIC : GFP_KERNEL)) + if (!preemptible()) + WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC); + else if (init_srcu_struct_nodes(ssp, GFP_KERNEL)) + WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); + else goto err_free_sda; - WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); } ssp->srcu_sup->srcu_ssp = ssp; smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, @@ -394,20 +364,20 @@ static void srcu_transition_to_big(struct srcu_struct *ssp) /* Double-checked locking on ->srcu_size-state. */ if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) return; - spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) { - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); return; } __srcu_transition_to_big(ssp); - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* * Check to see if the just-encountered contention event justifies * a transition to SRCU_SIZE_BIG. */ -static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp) +static void raw_spin_lock_irqsave_check_contention(struct srcu_struct *ssp) { unsigned long j; @@ -429,16 +399,16 @@ static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp) * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module * parameter permits this. */ -static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags) +static void raw_spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags) { struct srcu_struct *ssp = sdp->ssp; - if (spin_trylock_irqsave_rcu_node(sdp, *flags)) + if (raw_spin_trylock_irqsave_rcu_node(sdp, *flags)) return; - spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); - spin_lock_irqsave_check_contention(ssp); - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags); - spin_lock_irqsave_rcu_node(sdp, *flags); + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); + raw_spin_lock_irqsave_check_contention(ssp); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags); + raw_spin_lock_irqsave_rcu_node(sdp, *flags); } /* @@ -447,12 +417,12 @@ static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned lon * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module * parameter permits this. */ -static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags) +static void raw_spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags) { - if (spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags)) + if (raw_spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags)) return; - spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); - spin_lock_irqsave_check_contention(ssp); + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); + raw_spin_lock_irqsave_check_contention(ssp); } /* @@ -470,13 +440,13 @@ static void check_init_srcu_struct(struct srcu_struct *ssp) /* The smp_load_acquire() pairs with the smp_store_release(). */ if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/ return; /* Already initialized. */ - spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq_needed)) { - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); return; } init_srcu_struct_fields(ssp, true); - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* @@ -742,13 +712,15 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) unsigned long delay; struct srcu_usage *sup = ssp->srcu_sup; - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); delay = srcu_get_delay(ssp); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); if (WARN_ON(!delay)) return; /* Just leak it! */ if (WARN_ON(srcu_readers_active(ssp))) return; /* Just leak it! */ + /* Wait for irq_work to finish first as it may queue a new work. */ + irq_work_sync(&sup->irq_work); flush_delayed_work(&sup->work); for_each_possible_cpu(cpu) { struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); @@ -960,7 +932,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) mutex_lock(&sup->srcu_cb_mutex); /* End the current grace period. */ - spin_lock_irq_rcu_node(sup); + raw_spin_lock_irq_rcu_node(sup); idx = rcu_seq_state(sup->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); if (srcu_gp_is_expedited(ssp)) @@ -971,7 +943,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) gpseq = rcu_seq_current(&sup->srcu_gp_seq); if (ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, gpseq)) WRITE_ONCE(sup->srcu_gp_seq_needed_exp, gpseq); - spin_unlock_irq_rcu_node(sup); + raw_spin_unlock_irq_rcu_node(sup); mutex_unlock(&sup->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ @@ -983,7 +955,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) } else { idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); srcu_for_each_node_breadth_first(ssp, snp) { - spin_lock_irq_rcu_node(snp); + raw_spin_lock_irq_rcu_node(snp); cbs = false; last_lvl = snp >= sup->level[rcu_num_lvls - 1]; if (last_lvl) @@ -998,7 +970,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) else mask = snp->srcu_data_have_cbs[idx]; snp->srcu_data_have_cbs[idx] = 0; - spin_unlock_irq_rcu_node(snp); + raw_spin_unlock_irq_rcu_node(snp); if (cbs) srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay); } @@ -1008,27 +980,27 @@ static void srcu_gp_end(struct srcu_struct *ssp) if (!(gpseq & counter_wrap_check)) for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(ssp->sda, cpu); - spin_lock_irq_rcu_node(sdp); + raw_spin_lock_irq_rcu_node(sdp); if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100)) sdp->srcu_gp_seq_needed = gpseq; if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100)) sdp->srcu_gp_seq_needed_exp = gpseq; - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); } /* Callback initiation done, allow grace periods after next. */ mutex_unlock(&sup->srcu_cb_mutex); /* Start a new grace period if needed. */ - spin_lock_irq_rcu_node(sup); + raw_spin_lock_irq_rcu_node(sup); gpseq = rcu_seq_current(&sup->srcu_gp_seq); if (!rcu_seq_state(gpseq) && ULONG_CMP_LT(gpseq, sup->srcu_gp_seq_needed)) { srcu_gp_start(ssp); - spin_unlock_irq_rcu_node(sup); + raw_spin_unlock_irq_rcu_node(sup); srcu_reschedule(ssp, 0); } else { - spin_unlock_irq_rcu_node(sup); + raw_spin_unlock_irq_rcu_node(sup); } /* Transition to big if needed. */ @@ -1059,19 +1031,19 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) || (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s))) return; - spin_lock_irqsave_rcu_node(snp, flags); + raw_spin_lock_irqsave_rcu_node(snp, flags); sgsne = snp->srcu_gp_seq_needed_exp; if (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)) { - spin_unlock_irqrestore_rcu_node(snp, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); return; } WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); - spin_unlock_irqrestore_rcu_node(snp, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); } - spin_lock_irqsave_ssp_contention(ssp, &flags); + raw_spin_lock_irqsave_ssp_contention(ssp, &flags); if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s)) WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s); - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* @@ -1109,12 +1081,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) { if (WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) && snp != snp_leaf) return; /* GP already done and CBs recorded. */ - spin_lock_irqsave_rcu_node(snp, flags); + raw_spin_lock_irqsave_rcu_node(snp, flags); snp_seq = snp->srcu_have_cbs[idx]; if (!srcu_invl_snp_seq(snp_seq) && ULONG_CMP_GE(snp_seq, s)) { if (snp == snp_leaf && snp_seq == s) snp->srcu_data_have_cbs[idx] |= sdp->grpmask; - spin_unlock_irqrestore_rcu_node(snp, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); if (snp == snp_leaf && snp_seq != s) { srcu_schedule_cbs_sdp(sdp, do_norm ? SRCU_INTERVAL : 0); return; @@ -1129,11 +1101,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, sgsne = snp->srcu_gp_seq_needed_exp; if (!do_norm && (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, s))) WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); - spin_unlock_irqrestore_rcu_node(snp, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); } /* Top of tree, must ensure the grace period will be started. */ - spin_lock_irqsave_ssp_contention(ssp, &flags); + raw_spin_lock_irqsave_ssp_contention(ssp, &flags); if (ULONG_CMP_LT(sup->srcu_gp_seq_needed, s)) { /* * Record need for grace period s. Pair with load @@ -1154,13 +1126,17 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, // it isn't. And it does not have to be. After all, it // can only be executed during early boot when there is only // the one boot CPU running with interrupts still disabled. + // + // Use an irq_work here to avoid acquiring runqueue lock with + // srcu rcu_node::lock held. BPF instrument could introduce the + // opposite dependency, hence we need to break the possible + // locking dependency here. if (likely(srcu_init_done)) - queue_delayed_work(rcu_gp_wq, &sup->work, - !!srcu_get_delay(ssp)); + irq_work_queue(&sup->irq_work); else if (list_empty(&sup->work.work.entry)) list_add(&sup->work.work.entry, &srcu_boot_list); } - spin_unlock_irqrestore_rcu_node(sup, flags); + raw_spin_unlock_irqrestore_rcu_node(sup, flags); } /* @@ -1172,9 +1148,9 @@ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount) { unsigned long curdelay; - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); curdelay = !srcu_get_delay(ssp); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); for (;;) { if (srcu_readers_active_idx_check(ssp, idx)) @@ -1285,12 +1261,12 @@ static bool srcu_should_expedite(struct srcu_struct *ssp) return false; /* If the local srcu_data structure has callbacks, not idle. */ sdp = raw_cpu_ptr(ssp->sda); - spin_lock_irqsave_rcu_node(sdp, flags); + raw_spin_lock_irqsave_rcu_node(sdp, flags); if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); return false; /* Callbacks already present, so not idle. */ } - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); /* * No local callbacks, so probabilistically probe global state. @@ -1350,7 +1326,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id()); else sdp = raw_cpu_ptr(ssp->sda); - spin_lock_irqsave_sdp_contention(sdp, &flags); + raw_spin_lock_irqsave_sdp_contention(sdp, &flags); if (rhp) rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); /* @@ -1410,7 +1386,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, sdp->srcu_gp_seq_needed_exp = s; needexp = true; } - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); /* Ensure that snp node tree is fully initialized before traversing it */ if (ss_state < SRCU_SIZE_WAIT_BARRIER) @@ -1522,7 +1498,7 @@ static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm) /* * Make sure that later code is ordered after the SRCU grace - * period. This pairs with the spin_lock_irq_rcu_node() + * period. This pairs with the raw_spin_lock_irq_rcu_node() * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed * because the current CPU might have been totally uninvolved with * (and thus unordered against) that grace period. @@ -1701,7 +1677,7 @@ static void srcu_barrier_cb(struct rcu_head *rhp) */ static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp) { - spin_lock_irq_rcu_node(sdp); + raw_spin_lock_irq_rcu_node(sdp); atomic_inc(&ssp->srcu_sup->srcu_barrier_cpu_cnt); sdp->srcu_barrier_head.func = srcu_barrier_cb; debug_rcu_head_queue(&sdp->srcu_barrier_head); @@ -1710,7 +1686,7 @@ static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp) debug_rcu_head_unqueue(&sdp->srcu_barrier_head); atomic_dec(&ssp->srcu_sup->srcu_barrier_cpu_cnt); } - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); } /** @@ -1761,7 +1737,7 @@ static void srcu_expedite_current_cb(struct rcu_head *rhp) bool needcb = false; struct srcu_data *sdp = container_of(rhp, struct srcu_data, srcu_ec_head); - spin_lock_irqsave_sdp_contention(sdp, &flags); + raw_spin_lock_irqsave_sdp_contention(sdp, &flags); if (sdp->srcu_ec_state == SRCU_EC_IDLE) { WARN_ON_ONCE(1); } else if (sdp->srcu_ec_state == SRCU_EC_PENDING) { @@ -1771,7 +1747,7 @@ static void srcu_expedite_current_cb(struct rcu_head *rhp) sdp->srcu_ec_state = SRCU_EC_PENDING; needcb = true; } - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); // If needed, requeue ourselves as an expedited SRCU callback. if (needcb) __call_srcu(sdp->ssp, &sdp->srcu_ec_head, srcu_expedite_current_cb, false); @@ -1795,7 +1771,7 @@ void srcu_expedite_current(struct srcu_struct *ssp) migrate_disable(); sdp = this_cpu_ptr(ssp->sda); - spin_lock_irqsave_sdp_contention(sdp, &flags); + raw_spin_lock_irqsave_sdp_contention(sdp, &flags); if (sdp->srcu_ec_state == SRCU_EC_IDLE) { sdp->srcu_ec_state = SRCU_EC_PENDING; needcb = true; @@ -1804,7 +1780,7 @@ void srcu_expedite_current(struct srcu_struct *ssp) } else { WARN_ON_ONCE(sdp->srcu_ec_state != SRCU_EC_REPOST); } - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); // If needed, queue an expedited SRCU callback. if (needcb) __call_srcu(ssp, &sdp->srcu_ec_head, srcu_expedite_current_cb, false); @@ -1848,17 +1824,17 @@ static void srcu_advance_state(struct srcu_struct *ssp) */ idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); /* ^^^ */ if (idx == SRCU_STATE_IDLE) { - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) { WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq)); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; } idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)); if (idx == SRCU_STATE_IDLE) srcu_gp_start(ssp); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); if (idx != SRCU_STATE_IDLE) { mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; /* Someone else started the grace period. */ @@ -1872,10 +1848,10 @@ static void srcu_advance_state(struct srcu_struct *ssp) return; /* readers present, retry later. */ } srcu_flip(ssp); - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); rcu_seq_set_state(&ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2); ssp->srcu_sup->srcu_n_exp_nodelay = 0; - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); } if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) { @@ -1913,7 +1889,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) ssp = sdp->ssp; rcu_cblist_init(&ready_cbs); - spin_lock_irq_rcu_node(sdp); + raw_spin_lock_irq_rcu_node(sdp); WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL)); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); @@ -1924,7 +1900,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) */ if (sdp->srcu_cblist_invoking || !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); return; /* Someone else on the job or nothing to do. */ } @@ -1932,7 +1908,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) sdp->srcu_cblist_invoking = true; rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); len = ready_cbs.len; - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); rhp = rcu_cblist_dequeue(&ready_cbs); for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { debug_rcu_head_unqueue(rhp); @@ -1947,11 +1923,11 @@ static void srcu_invoke_callbacks(struct work_struct *work) * Update counts, accelerate new callbacks, and if needed, * schedule another round of callback invocation. */ - spin_lock_irq_rcu_node(sdp); + raw_spin_lock_irq_rcu_node(sdp); rcu_segcblist_add_len(&sdp->srcu_cblist, -len); sdp->srcu_cblist_invoking = false; more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); /* An SRCU barrier or callbacks from previous nesting work pending */ if (more) srcu_schedule_cbs_sdp(sdp, 0); @@ -1965,7 +1941,7 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) { bool pushgp = true; - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) { if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) { /* All requests fulfilled, time to go idle. */ @@ -1975,7 +1951,7 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) /* Outstanding request and no GP. Start one. */ srcu_gp_start(ssp); } - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); if (pushgp) queue_delayed_work(rcu_gp_wq, &ssp->srcu_sup->work, delay); @@ -1995,9 +1971,9 @@ static void process_srcu(struct work_struct *work) ssp = sup->srcu_ssp; srcu_advance_state(ssp); - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); curdelay = srcu_get_delay(ssp); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); if (curdelay) { WRITE_ONCE(sup->reschedule_count, 0); } else { @@ -2015,6 +1991,23 @@ static void process_srcu(struct work_struct *work) srcu_reschedule(ssp, curdelay); } +static void srcu_irq_work(struct irq_work *work) +{ + struct srcu_struct *ssp; + struct srcu_usage *sup; + unsigned long delay; + unsigned long flags; + + sup = container_of(work, struct srcu_usage, irq_work); + ssp = sup->srcu_ssp; + + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); + delay = srcu_get_delay(ssp); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + + queue_delayed_work(rcu_gp_wq, &sup->work, !!delay); +} + void srcutorture_get_gp_data(struct srcu_struct *ssp, int *flags, unsigned long *gp_seq) { diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index b9544916512273..a83be0c834ddb5 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -161,6 +161,14 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, return cpuidle_enter(drv, dev, next_state); } +static void idle_call_stop_or_retain_tick(bool stop_tick) +{ + if (stop_tick || tick_nohz_tick_stopped()) + tick_nohz_idle_stop_tick(); + else + tick_nohz_idle_retain_tick(); +} + /** * cpuidle_idle_call - the main idle function * @@ -170,7 +178,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, * set, and it returns with polling set. If it ever stops polling, it * must clear the polling bit. */ -static void cpuidle_idle_call(void) +static void cpuidle_idle_call(bool stop_tick) { struct cpuidle_device *dev = cpuidle_get_device(); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); @@ -186,7 +194,7 @@ static void cpuidle_idle_call(void) } if (cpuidle_not_available(drv, dev)) { - tick_nohz_idle_stop_tick(); + idle_call_stop_or_retain_tick(stop_tick); default_idle_call(); goto exit_idle; @@ -222,17 +230,19 @@ static void cpuidle_idle_call(void) next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns); call_cpuidle(drv, dev, next_state); } else if (drv->state_count > 1) { - bool stop_tick = true; + /* + * stop_tick is expected to be true by default by cpuidle + * governors, which allows them to select idle states with + * target residency above the tick period length. + */ + stop_tick = true; /* * Ask the cpuidle framework to choose a convenient idle state. */ next_state = cpuidle_select(drv, dev, &stop_tick); - if (stop_tick || tick_nohz_tick_stopped()) - tick_nohz_idle_stop_tick(); - else - tick_nohz_idle_retain_tick(); + idle_call_stop_or_retain_tick(stop_tick); entered_state = call_cpuidle(drv, dev, next_state); /* @@ -240,7 +250,7 @@ static void cpuidle_idle_call(void) */ cpuidle_reflect(dev, entered_state); } else { - tick_nohz_idle_retain_tick(); + idle_call_stop_or_retain_tick(stop_tick); /* * If there is only a single idle state (or none), there is @@ -268,6 +278,7 @@ static void cpuidle_idle_call(void) static void do_idle(void) { int cpu = smp_processor_id(); + bool got_tick = false; /* * Check if we need to update blocked load @@ -338,8 +349,9 @@ static void do_idle(void) tick_nohz_idle_restart_tick(); cpu_idle_poll(); } else { - cpuidle_idle_call(); + cpuidle_idle_call(got_tick); } + got_tick = tick_nohz_idle_got_tick(); arch_cpu_idle_exit(); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8df69e7027066c..41331091260984 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6606,9 +6606,9 @@ int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, b if (!orig_hash) goto unlock; - /* Enable the tmp_ops to have the same functions as the direct ops */ + /* Enable the tmp_ops to have the same functions as the hash object. */ ftrace_ops_init(&tmp_ops); - tmp_ops.func_hash = ops->func_hash; + tmp_ops.func_hash->filter_hash = hash; err = register_ftrace_function_nolock(&tmp_ops); if (err) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 17d0ea0cc3e6f6..170170bd83bd9c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2053,7 +2053,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) entries += ret; entry_bytes += local_read(&head_page->page->commit); - local_set(&cpu_buffer->head_page->entries, ret); + local_set(&head_page->entries, ret); if (head_page == cpu_buffer->commit_page) break; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ebd996f8710e6c..a626211ceb9a8b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -555,7 +555,7 @@ static bool update_marker_trace(struct trace_array *tr, int enabled) lockdep_assert_held(&event_mutex); if (enabled) { - if (!list_empty(&tr->marker_list)) + if (tr->trace_flags & TRACE_ITER(COPY_MARKER)) return false; list_add_rcu(&tr->marker_list, &marker_copies); @@ -563,10 +563,10 @@ static bool update_marker_trace(struct trace_array *tr, int enabled) return true; } - if (list_empty(&tr->marker_list)) + if (!(tr->trace_flags & TRACE_ITER(COPY_MARKER))) return false; - list_del_init(&tr->marker_list); + list_del_rcu(&tr->marker_list); tr->trace_flags &= ~TRACE_ITER(COPY_MARKER); return true; } @@ -6783,6 +6783,23 @@ char *trace_user_fault_read(struct trace_user_buf_info *tinfo, */ do { + /* + * It is possible that something is trying to migrate this + * task. What happens then, is when preemption is enabled, + * the migration thread will preempt this task, try to + * migrate it, fail, then let it run again. That will + * cause this to loop again and never succeed. + * On failures, enabled and disable preemption with + * migration enabled, to allow the migration thread to + * migrate this task. + */ + if (trys) { + preempt_enable_notrace(); + preempt_disable_notrace(); + cpu = smp_processor_id(); + buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf; + } + /* * If for some reason, copy_from_user() always causes a context * switch, this would then cause an infinite loop. @@ -9744,18 +9761,19 @@ static int __remove_instance(struct trace_array *tr) list_del(&tr->list); - /* Disable all the flags that were enabled coming in */ - for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { - if ((1ULL << i) & ZEROED_TRACE_FLAGS) - set_tracer_flag(tr, 1ULL << i, 0); - } - if (printk_trace == tr) update_printk_trace(&global_trace); + /* Must be done before disabling all the flags */ if (update_marker_trace(tr, 0)) synchronize_rcu(); + /* Disable all the flags that were enabled coming in */ + for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { + if ((1ULL << i) & ZEROED_TRACE_FLAGS) + set_tracer_flag(tr, 1ULL << i, 0); + } + tracing_set_nop(tr); clear_ftrace_function_probes(tr); event_trace_del_tracer(tr); diff --git a/lib/bootconfig.c b/lib/bootconfig.c index 2da049216fe0e2..e88d0221a8269c 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -723,7 +723,8 @@ static int __init xbc_parse_kv(char **k, char *v, int op) if (op == ':') { unsigned short nidx = child->next; - xbc_init_node(child, v, XBC_VALUE); + if (xbc_init_node(child, v, XBC_VALUE) < 0) + return xbc_parse_error("Failed to override value", v); child->next = nidx; /* keep subkeys */ goto array; } diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 725eef05b7588b..dc7a56f7287d6d 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -55,6 +55,9 @@ libaes-$(CONFIG_SPARC) += sparc/aes_asm.o libaes-$(CONFIG_X86) += x86/aes-aesni.o endif # CONFIG_CRYPTO_LIB_AES_ARCH +# clean-files must be defined unconditionally +clean-files += powerpc/aesp8-ppc.S + ################################################################################ obj-$(CONFIG_CRYPTO_LIB_AESCFB) += libaescfb.o diff --git a/mm/damon/core.c b/mm/damon/core.c index c1d1091d307e4b..3e1890d64d067a 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1252,6 +1252,7 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) { int err; + dst->maybe_corrupted = true; if (!is_power_of_2(src->min_region_sz)) return -EINVAL; @@ -1277,6 +1278,7 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) dst->addr_unit = src->addr_unit; dst->min_region_sz = src->min_region_sz; + dst->maybe_corrupted = false; return 0; } @@ -2678,6 +2680,8 @@ static void kdamond_call(struct damon_ctx *ctx, bool cancel) complete(&control->completion); else if (control->canceled && control->dealloc_on_cancel) kfree(control); + if (!cancel && ctx->maybe_corrupted) + break; } mutex_lock(&ctx->call_controls_lock); @@ -2707,6 +2711,8 @@ static int kdamond_wait_activation(struct damon_ctx *ctx) kdamond_usleep(min_wait_time); kdamond_call(ctx, false); + if (ctx->maybe_corrupted) + return -EINVAL; damos_walk_cancel(ctx); } return -EBUSY; @@ -2790,6 +2796,8 @@ static int kdamond_fn(void *data) * kdamond_merge_regions() if possible, to reduce overhead */ kdamond_call(ctx, false); + if (ctx->maybe_corrupted) + break; if (!list_empty(&ctx->schemes)) kdamond_apply_schemes(ctx); else diff --git a/mm/damon/stat.c b/mm/damon/stat.c index 25fb44ccf99d0c..cf2c5a541eeeaa 100644 --- a/mm/damon/stat.c +++ b/mm/damon/stat.c @@ -145,12 +145,59 @@ static int damon_stat_damon_call_fn(void *data) return 0; } +struct damon_stat_system_ram_range_walk_arg { + bool walked; + struct resource res; +}; + +static int damon_stat_system_ram_walk_fn(struct resource *res, void *arg) +{ + struct damon_stat_system_ram_range_walk_arg *a = arg; + + if (!a->walked) { + a->walked = true; + a->res.start = res->start; + } + a->res.end = res->end; + return 0; +} + +static unsigned long damon_stat_res_to_core_addr(resource_size_t ra, + unsigned long addr_unit) +{ + /* + * Use div_u64() for avoiding linking errors related with __udivdi3, + * __aeabi_uldivmod, or similar problems. This should also improve the + * performance optimization (read div_u64() comment for the detail). + */ + if (sizeof(ra) == 8 && sizeof(addr_unit) == 4) + return div_u64(ra, addr_unit); + return ra / addr_unit; +} + +static int damon_stat_set_monitoring_region(struct damon_target *t, + unsigned long addr_unit, unsigned long min_region_sz) +{ + struct damon_addr_range addr_range; + struct damon_stat_system_ram_range_walk_arg arg = {}; + + walk_system_ram_res(0, -1, &arg, damon_stat_system_ram_walk_fn); + if (!arg.walked) + return -EINVAL; + addr_range.start = damon_stat_res_to_core_addr( + arg.res.start, addr_unit); + addr_range.end = damon_stat_res_to_core_addr( + arg.res.end + 1, addr_unit); + if (addr_range.end <= addr_range.start) + return -EINVAL; + return damon_set_regions(t, &addr_range, 1, min_region_sz); +} + static struct damon_ctx *damon_stat_build_ctx(void) { struct damon_ctx *ctx; struct damon_attrs attrs; struct damon_target *target; - unsigned long start = 0, end = 0; ctx = damon_new_ctx(); if (!ctx) @@ -180,8 +227,8 @@ static struct damon_ctx *damon_stat_build_ctx(void) if (!target) goto free_out; damon_add_target(ctx, target); - if (damon_set_region_biggest_system_ram_default(target, &start, &end, - ctx->min_region_sz)) + if (damon_stat_set_monitoring_region(target, ctx->addr_unit, + ctx->min_region_sz)) goto free_out; return ctx; free_out: diff --git a/mm/hmm.c b/mm/hmm.c index f6c4ddff4bd611..5955f2f0c83db1 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -778,7 +778,7 @@ dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map, struct page *page = hmm_pfn_to_page(pfns[idx]); phys_addr_t paddr = hmm_pfn_to_phys(pfns[idx]); size_t offset = idx * map->dma_entry_size; - unsigned long attrs = 0; + unsigned long attrs = DMA_ATTR_REQUIRE_COHERENT; dma_addr_t dma_addr; int ret; @@ -871,7 +871,7 @@ bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx) struct dma_iova_state *state = &map->state; dma_addr_t *dma_addrs = map->dma_list; unsigned long *pfns = map->pfn_list; - unsigned long attrs = 0; + unsigned long attrs = DMA_ATTR_REQUIRE_COHERENT; if ((pfns[idx] & valid_dma) != valid_dma) return false; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 912c248a3f7e11..b298cba853ab95 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2797,7 +2797,8 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm _dst_pmd = pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma); } else { src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd); - _dst_pmd = folio_mk_pmd(src_folio, dst_vma->vm_page_prot); + _dst_pmd = move_soft_dirty_pmd(src_pmdval); + _dst_pmd = clear_uffd_wp_pmd(_dst_pmd); } set_pmd_at(mm, dst_addr, dst_pmd, _dst_pmd); diff --git a/mm/rmap.c b/mm/rmap.c index 0f00570d1b9e9b..8f08090d7eb9f2 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -457,6 +457,13 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma) list_del(&avc->same_vma); anon_vma_chain_free(avc); } + + /* + * The anon_vma assigned to this VMA is no longer valid, as we were not + * able to correctly clone AVC state. Avoid inconsistent anon_vma tree + * state by resetting. + */ + vma->anon_vma = NULL; } /** @@ -1955,7 +1962,14 @@ static inline unsigned int folio_unmap_pte_batch(struct folio *folio, if (userfaultfd_wp(vma)) return 1; - return folio_pte_batch(folio, pvmw->pte, pte, max_nr); + /* + * If unmap fails, we need to restore the ptes. To avoid accidentally + * upgrading write permissions for ptes that were not originally + * writable, and to avoid losing the soft-dirty bit, use the + * appropriate FPB flags. + */ + return folio_pte_batch_flags(folio, vma, pvmw->pte, &pte, max_nr, + FPB_RESPECT_WRITE | FPB_RESPECT_SOFT_DIRTY); } /* @@ -2443,11 +2457,17 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, __maybe_unused pmd_t pmdval; if (flags & TTU_SPLIT_HUGE_PMD) { + /* + * split_huge_pmd_locked() might leave the + * folio mapped through PTEs. Retry the walk + * so we can detect this scenario and properly + * abort the walk. + */ split_huge_pmd_locked(vma, pvmw.address, pvmw.pmd, true); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + flags &= ~TTU_SPLIT_HUGE_PMD; + page_vma_mapped_walk_restart(&pvmw); + continue; } #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION pmdval = pmdp_get(pvmw.pmd); diff --git a/mm/zswap.c b/mm/zswap.c index e6ec3295bdb01b..16b2ef7223e12a 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -942,9 +942,15 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) /* zswap entries of length PAGE_SIZE are not compressed. */ if (entry->length == PAGE_SIZE) { + void *dst; + WARN_ON_ONCE(input->length != PAGE_SIZE); - memcpy_from_sglist(kmap_local_folio(folio, 0), input, 0, PAGE_SIZE); + + dst = kmap_local_folio(folio, 0); + memcpy_from_sglist(dst, input, 0, PAGE_SIZE); dlen = PAGE_SIZE; + kunmap_local(dst); + flush_dcache_folio(folio); } else { sg_init_table(&output, 1); sg_set_folio(&output, folio, PAGE_SIZE, 0); diff --git a/net/atm/lec.c b/net/atm/lec.c index fb93c6e1c32942..10e260acf6027c 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -154,10 +154,19 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) /* 0x01 is topology change */ priv = netdev_priv(dev); - atm_force_charge(priv->lecd, skb2->truesize); - sk = sk_atm(priv->lecd); - skb_queue_tail(&sk->sk_receive_queue, skb2); - sk->sk_data_ready(sk); + struct atm_vcc *vcc; + + rcu_read_lock(); + vcc = rcu_dereference(priv->lecd); + if (vcc) { + atm_force_charge(vcc, skb2->truesize); + sk = sk_atm(vcc); + skb_queue_tail(&sk->sk_receive_queue, skb2); + sk->sk_data_ready(sk); + } else { + dev_kfree_skb(skb2); + } + rcu_read_unlock(); } } #endif /* IS_ENABLED(CONFIG_BRIDGE) */ @@ -216,7 +225,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb, int is_rdesc; pr_debug("called\n"); - if (!priv->lecd) { + if (!rcu_access_pointer(priv->lecd)) { pr_info("%s:No lecd attached\n", dev->name); dev->stats.tx_errors++; netif_stop_queue(dev); @@ -449,10 +458,19 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) break; skb2->len = sizeof(struct atmlec_msg); skb_copy_to_linear_data(skb2, mesg, sizeof(*mesg)); - atm_force_charge(priv->lecd, skb2->truesize); - sk = sk_atm(priv->lecd); - skb_queue_tail(&sk->sk_receive_queue, skb2); - sk->sk_data_ready(sk); + struct atm_vcc *vcc; + + rcu_read_lock(); + vcc = rcu_dereference(priv->lecd); + if (vcc) { + atm_force_charge(vcc, skb2->truesize); + sk = sk_atm(vcc); + skb_queue_tail(&sk->sk_receive_queue, skb2); + sk->sk_data_ready(sk); + } else { + dev_kfree_skb(skb2); + } + rcu_read_unlock(); } } #endif /* IS_ENABLED(CONFIG_BRIDGE) */ @@ -468,23 +486,16 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) static void lec_atm_close(struct atm_vcc *vcc) { - struct sk_buff *skb; struct net_device *dev = (struct net_device *)vcc->proto_data; struct lec_priv *priv = netdev_priv(dev); - priv->lecd = NULL; + rcu_assign_pointer(priv->lecd, NULL); + synchronize_rcu(); /* Do something needful? */ netif_stop_queue(dev); lec_arp_destroy(priv); - if (skb_peek(&sk_atm(vcc)->sk_receive_queue)) - pr_info("%s closing with messages pending\n", dev->name); - while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue))) { - atm_return(vcc, skb->truesize); - dev_kfree_skb(skb); - } - pr_info("%s: Shut down!\n", dev->name); module_put(THIS_MODULE); } @@ -510,12 +521,14 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, const unsigned char *mac_addr, const unsigned char *atm_addr, struct sk_buff *data) { + struct atm_vcc *vcc; struct sock *sk; struct sk_buff *skb; struct atmlec_msg *mesg; - if (!priv || !priv->lecd) + if (!priv || !rcu_access_pointer(priv->lecd)) return -1; + skb = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC); if (!skb) return -1; @@ -532,18 +545,27 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, if (atm_addr) memcpy(&mesg->content.normal.atm_addr, atm_addr, ATM_ESA_LEN); - atm_force_charge(priv->lecd, skb->truesize); - sk = sk_atm(priv->lecd); + rcu_read_lock(); + vcc = rcu_dereference(priv->lecd); + if (!vcc) { + rcu_read_unlock(); + kfree_skb(skb); + return -1; + } + + atm_force_charge(vcc, skb->truesize); + sk = sk_atm(vcc); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); if (data != NULL) { pr_debug("about to send %d bytes of data\n", data->len); - atm_force_charge(priv->lecd, data->truesize); + atm_force_charge(vcc, data->truesize); skb_queue_tail(&sk->sk_receive_queue, data); sk->sk_data_ready(sk); } + rcu_read_unlock(); return 0; } @@ -618,7 +640,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) atm_return(vcc, skb->truesize); if (*(__be16 *) skb->data == htons(priv->lecid) || - !priv->lecd || !(dev->flags & IFF_UP)) { + !rcu_access_pointer(priv->lecd) || !(dev->flags & IFF_UP)) { /* * Probably looping back, or if lecd is missing, * lecd has gone down @@ -753,12 +775,12 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) priv = netdev_priv(dev_lec[i]); } else { priv = netdev_priv(dev_lec[i]); - if (priv->lecd) + if (rcu_access_pointer(priv->lecd)) return -EADDRINUSE; } lec_arp_init(priv); priv->itfnum = i; /* LANE2 addition */ - priv->lecd = vcc; + rcu_assign_pointer(priv->lecd, vcc); vcc->dev = &lecatm_dev; vcc_insert_socket(sk_atm(vcc)); diff --git a/net/atm/lec.h b/net/atm/lec.h index be0e2667bd8c3f..ec85709bf81859 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -91,7 +91,7 @@ struct lec_priv { */ spinlock_t lec_arp_lock; struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */ - struct atm_vcc *lecd; + struct atm_vcc __rcu *lecd; struct delayed_work lec_arp_work; /* C10 */ unsigned int maximum_unknown_frame_count; /* diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index b75c2228e69a65..f28e9cbf8ad5f2 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -473,6 +473,9 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, if (aggregated_bytes > max_bytes) return false; + if (skb_tailroom(forw_packet->skb) < packet_len) + return false; + if (packet_num >= BATADV_MAX_AGGREGATION_PACKETS) return false; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 4719dac0719005..e6393f17576b0b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1944,6 +1944,8 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos) return false; done: + conn->iso_qos = *qos; + if (hci_cmd_sync_queue(hdev, set_cig_params_sync, UINT_PTR(qos->ucast.cig), NULL) < 0) return false; @@ -2013,8 +2015,6 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, } hci_conn_hold(cis); - - cis->iso_qos = *qos; cis->state = BT_BOUND; return cis; @@ -3095,7 +3095,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) * hci_connect_le serializes the connection attempts so only one * connection can be in BT_CONNECT at time. */ - if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) { + if (conn->state == BT_CONNECT && READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { switch (hci_skb_event(hdev->sent_cmd)) { case HCI_EV_CONN_COMPLETE: case HCI_EV_LE_CONN_COMPLETE: diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 31308c1de4ec3f..01f8ceeb1c0c84 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4126,7 +4126,7 @@ static int hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb); } - if (hdev->req_status == HCI_REQ_PEND && + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND && !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { kfree_skb(hdev->req_skb); hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 121dbc8208ec23..45d16639874a0b 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -25,11 +25,11 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, { bt_dev_dbg(hdev, "result 0x%2.2x", result); - if (hdev->req_status != HCI_REQ_PEND) + if (READ_ONCE(hdev->req_status) != HCI_REQ_PEND) return; hdev->req_result = result; - hdev->req_status = HCI_REQ_DONE; + WRITE_ONCE(hdev->req_status, HCI_REQ_DONE); /* Free the request command so it is not used as response */ kfree_skb(hdev->req_skb); @@ -167,20 +167,20 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, hci_cmd_sync_add(&req, opcode, plen, param, event, sk); - hdev->req_status = HCI_REQ_PEND; + WRITE_ONCE(hdev->req_status, HCI_REQ_PEND); err = hci_req_sync_run(&req); if (err < 0) return ERR_PTR(err); err = wait_event_interruptible_timeout(hdev->req_wait_q, - hdev->req_status != HCI_REQ_PEND, + READ_ONCE(hdev->req_status) != HCI_REQ_PEND, timeout); if (err == -ERESTARTSYS) return ERR_PTR(-EINTR); - switch (hdev->req_status) { + switch (READ_ONCE(hdev->req_status)) { case HCI_REQ_DONE: err = -bt_to_errno(hdev->req_result); break; @@ -194,7 +194,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, break; } - hdev->req_status = 0; + WRITE_ONCE(hdev->req_status, 0); hdev->req_result = 0; skb = hdev->req_rsp; hdev->req_rsp = NULL; @@ -665,9 +665,9 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); - if (hdev->req_status == HCI_REQ_PEND) { + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { hdev->req_result = err; - hdev->req_status = HCI_REQ_CANCELED; + WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED); queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); } @@ -683,12 +683,12 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); - if (hdev->req_status == HCI_REQ_PEND) { + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { /* req_result is __u32 so error must be positive to be properly * propagated. */ hdev->req_result = err < 0 ? -err : err; - hdev->req_status = HCI_REQ_CANCELED; + WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED); wake_up_interruptible(&hdev->req_wait_q); } @@ -6627,8 +6627,8 @@ static int hci_le_create_conn_sync(struct hci_dev *hdev, void *data) * state. */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { - hci_scan_disable_sync(hdev); hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED); + hci_scan_disable_sync(hdev); } /* Update random address, but set require_privacy to false so diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 6fe815241b0149..7bcf8c5ceaeedc 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -986,7 +986,8 @@ static void session_free(struct kref *ref) skb_queue_purge(&session->intr_transmit); fput(session->intr_sock->file); fput(session->ctrl_sock->file); - l2cap_conn_put(session->conn); + if (session->conn) + l2cap_conn_put(session->conn); kfree(session); } @@ -1164,6 +1165,15 @@ static void hidp_session_remove(struct l2cap_conn *conn, down_write(&hidp_session_sem); + /* Drop L2CAP reference immediately to indicate that + * l2cap_unregister_user() shall not be called as it is already + * considered removed. + */ + if (session->conn) { + l2cap_conn_put(session->conn); + session->conn = NULL; + } + hidp_session_terminate(session); cancel_work_sync(&session->dev_init); @@ -1301,7 +1311,9 @@ static int hidp_session_thread(void *arg) * Instead, this call has the same semantics as if user-space tried to * delete the session. */ - l2cap_unregister_user(session->conn, &session->user); + if (session->conn) + l2cap_unregister_user(session->conn, &session->user); + hidp_session_put(session); module_put_and_kthread_exit(0); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ad98db9632fd2c..95c65fece39bdf 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -926,16 +926,39 @@ int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator) static int l2cap_get_ident(struct l2cap_conn *conn) { + u8 max; + int ident; + /* LE link does not support tools like l2ping so use the full range */ if (conn->hcon->type == LE_LINK) - return ida_alloc_range(&conn->tx_ida, 1, 255, GFP_ATOMIC); - + max = 255; /* Get next available identificator. * 1 - 128 are used by kernel. * 129 - 199 are reserved. * 200 - 254 are used by utilities like l2ping, etc. */ - return ida_alloc_range(&conn->tx_ida, 1, 128, GFP_ATOMIC); + else + max = 128; + + /* Allocate ident using min as last used + 1 (cyclic) */ + ident = ida_alloc_range(&conn->tx_ida, READ_ONCE(conn->tx_ident) + 1, + max, GFP_ATOMIC); + /* Force min 1 to start over */ + if (ident <= 0) { + ident = ida_alloc_range(&conn->tx_ida, 1, max, GFP_ATOMIC); + if (ident <= 0) { + /* If all idents are in use, log an error, this is + * extremely unlikely to happen and would indicate a bug + * in the code that idents are not being freed properly. + */ + BT_ERR("Unable to allocate ident: %d", ident); + return 0; + } + } + + WRITE_ONCE(conn->tx_ident, ident); + + return ident; } static void l2cap_send_acl(struct l2cap_conn *conn, struct sk_buff *skb, @@ -1678,17 +1701,15 @@ static void l2cap_info_timeout(struct work_struct *work) int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user) { - struct hci_dev *hdev = conn->hcon->hdev; int ret; /* We need to check whether l2cap_conn is registered. If it is not, we - * must not register the l2cap_user. l2cap_conn_del() is unregisters - * l2cap_conn objects, but doesn't provide its own locking. Instead, it - * relies on the parent hci_conn object to be locked. This itself relies - * on the hci_dev object to be locked. So we must lock the hci device - * here, too. */ + * must not register the l2cap_user. l2cap_conn_del() unregisters + * l2cap_conn objects under conn->lock, and we use the same lock here + * to protect access to conn->users and conn->hchan. + */ - hci_dev_lock(hdev); + mutex_lock(&conn->lock); if (!list_empty(&user->list)) { ret = -EINVAL; @@ -1709,16 +1730,14 @@ int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user) ret = 0; out_unlock: - hci_dev_unlock(hdev); + mutex_unlock(&conn->lock); return ret; } EXPORT_SYMBOL(l2cap_register_user); void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user) { - struct hci_dev *hdev = conn->hcon->hdev; - - hci_dev_lock(hdev); + mutex_lock(&conn->lock); if (list_empty(&user->list)) goto out_unlock; @@ -1727,7 +1746,7 @@ void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user) user->remove(conn, user); out_unlock: - hci_dev_unlock(hdev); + mutex_unlock(&conn->lock); } EXPORT_SYMBOL(l2cap_unregister_user); @@ -1752,6 +1771,9 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); + disable_delayed_work_sync(&conn->info_timer); + disable_delayed_work_sync(&conn->id_addr_timer); + mutex_lock(&conn->lock); kfree_skb(conn->rx_skb); @@ -1767,8 +1789,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) ida_destroy(&conn->tx_ida); - cancel_delayed_work_sync(&conn->id_addr_timer); - l2cap_unregister_all_users(conn); /* Force the connection to be immediately dropped */ @@ -1787,9 +1807,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) l2cap_chan_put(chan); } - if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) - cancel_delayed_work_sync(&conn->info_timer); - hci_chan_del(conn->hchan); conn->hchan = NULL; @@ -2381,6 +2398,9 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan, /* Remote device may have requested smaller PDUs */ pdu_len = min_t(size_t, pdu_len, chan->remote_mps); + if (!pdu_len) + return -EINVAL; + if (len <= pdu_len) { sar = L2CAP_SAR_UNSEGMENTED; sdu_len = 0; @@ -4316,14 +4336,16 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, if (test_bit(CONF_INPUT_DONE, &chan->conf_state)) { set_default_fcs(chan); - if (chan->mode == L2CAP_MODE_ERTM || - chan->mode == L2CAP_MODE_STREAMING) - err = l2cap_ertm_init(chan); + if (chan->state != BT_CONNECTED) { + if (chan->mode == L2CAP_MODE_ERTM || + chan->mode == L2CAP_MODE_STREAMING) + err = l2cap_ertm_init(chan); - if (err < 0) - l2cap_send_disconn_req(chan, -err); - else - l2cap_chan_ready(chan); + if (err < 0) + l2cap_send_disconn_req(chan, -err); + else + l2cap_chan_ready(chan); + } goto unlock; } @@ -4616,7 +4638,8 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, switch (type) { case L2CAP_IT_FEAT_MASK: - conn->feat_mask = get_unaligned_le32(rsp->data); + if (cmd_len >= sizeof(*rsp) + sizeof(u32)) + conn->feat_mask = get_unaligned_le32(rsp->data); if (conn->feat_mask & L2CAP_FEAT_FIXED_CHAN) { struct l2cap_info_req req; @@ -4635,7 +4658,8 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, break; case L2CAP_IT_FIXED_CHAN: - conn->remote_fixed_chan = rsp->data[0]; + if (cmd_len >= sizeof(*rsp) + sizeof(rsp->data[0])) + conn->remote_fixed_chan = rsp->data[0]; conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; @@ -5059,7 +5083,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, u16 mtu, mps; __le16 psm; u8 result, rsp_len = 0; - int i, num_scid; + int i, num_scid = 0; bool defer = false; if (!enable_ecred) @@ -5072,17 +5096,25 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, goto response; } + /* Check if there are no pending channels with the same ident */ + __l2cap_chan_list_id(conn, cmd->ident, l2cap_ecred_list_defer, + &num_scid); + if (num_scid) { + result = L2CAP_CR_LE_INVALID_PARAMS; + goto response; + } + cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); - /* Always respond with the same number of scids as in the request */ - rsp_len = cmd_len; - if (num_scid > L2CAP_ECRED_MAX_CID) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; } + /* Always respond with the same number of scids as in the request */ + rsp_len = cmd_len; + mtu = __le16_to_cpu(req->mtu); mps = __le16_to_cpu(req->mps); @@ -5424,7 +5456,7 @@ static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn, u8 *data) { struct l2cap_chan *chan, *tmp; - struct l2cap_ecred_conn_rsp *rsp = (void *) data; + struct l2cap_ecred_reconf_rsp *rsp = (void *)data; u16 result; if (cmd_len < sizeof(*rsp)) @@ -5432,7 +5464,7 @@ static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn, result = __le16_to_cpu(rsp->result); - BT_DBG("result 0x%4.4x", rsp->result); + BT_DBG("result 0x%4.4x", result); if (!result) return 0; @@ -6601,6 +6633,10 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) struct l2cap_le_credits pkt; u16 return_credits = l2cap_le_rx_credits(chan); + if (chan->mode != L2CAP_MODE_LE_FLOWCTL && + chan->mode != L2CAP_MODE_EXT_FLOWCTL) + return; + if (chan->rx_credits >= return_credits) return; @@ -6662,8 +6698,10 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) return -ENOBUFS; } - if (chan->imtu < skb->len) { - BT_ERR("Too big LE L2CAP PDU"); + if (skb->len > chan->imtu) { + BT_ERR("Too big LE L2CAP PDU: len %u > %u", skb->len, + chan->imtu); + l2cap_send_disconn_req(chan, ECONNRESET); return -ENOBUFS; } @@ -6682,6 +6720,11 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) if (!chan->sdu) { u16 sdu_len; + if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE)) { + err = -EINVAL; + goto failed; + } + sdu_len = get_unaligned_le16(skb->data); skb_pull(skb, L2CAP_SDULEN_SIZE); @@ -6689,7 +6732,9 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) sdu_len, skb->len, chan->imtu); if (sdu_len > chan->imtu) { - BT_ERR("Too big LE L2CAP SDU length received"); + BT_ERR("Too big LE L2CAP SDU length: len %u > %u", + skb->len, sdu_len); + l2cap_send_disconn_req(chan, ECONNRESET); err = -EMSGSIZE; goto failed; } @@ -6725,6 +6770,7 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) if (chan->sdu->len + skb->len > chan->sdu_len) { BT_ERR("Too much LE L2CAP data received"); + l2cap_send_disconn_req(chan, ECONNRESET); err = -EINVAL; goto failed; } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 597686790371fe..71e8c1b45bcee1 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1698,6 +1698,9 @@ static void l2cap_sock_ready_cb(struct l2cap_chan *chan) struct sock *sk = chan->data; struct sock *parent; + if (!sk) + return; + lock_sock(sk); parent = bt_sk(sk)->parent; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a7238fd3b03bb5..e5f9287fb826ac 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2195,10 +2195,7 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) sk = cmd->sk; if (status) { - mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, - status); - mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true, - cmd_status_rsp, &status); + mgmt_cmd_status(cmd->sk, hdev->id, cmd->opcode, status); goto done; } @@ -5358,7 +5355,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, * hci_adv_monitors_clear is about to be called which will take care of * freeing the adv_monitor instances. */ - if (status == -ECANCELED && !mgmt_pending_valid(hdev, cmd)) + if (status == -ECANCELED || !mgmt_pending_valid(hdev, cmd)) return; monitor = cmd->user_data; @@ -5377,7 +5374,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); hci_dev_unlock(hdev); bt_dev_dbg(hdev, "add monitor %d complete, status %d", diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index e7db50165879c5..584e059de20a66 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -401,7 +401,7 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) struct sock *sk; sco_conn_lock(conn); - sk = conn->sk; + sk = sco_sock_hold(conn); sco_conn_unlock(conn); if (!sk) @@ -410,11 +410,15 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) BT_DBG("sk %p len %u", sk, skb->len); if (sk->sk_state != BT_CONNECTED) - goto drop; + goto drop_put; - if (!sock_queue_rcv_skb(sk, skb)) + if (!sock_queue_rcv_skb(sk, skb)) { + sock_put(sk); return; + } +drop_put: + sock_put(sk); drop: kfree_skb(skb); } diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index e67bf7b34ea75d..485e3468bd26a0 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2743,7 +2743,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) if (!test_bit(SMP_FLAG_DEBUG_KEY, &smp->flags) && !crypto_memneq(key, smp->local_pk, 64)) { bt_dev_err(hdev, "Remote and local public keys are identical"); - return SMP_UNSPECIFIED; + return SMP_DHKEY_CHECK_FAILED; } memcpy(smp->remote_pk, key, 64); diff --git a/net/bridge/br_cfm.c b/net/bridge/br_cfm.c index 2c70fe47de38af..118c7ea48c351d 100644 --- a/net/bridge/br_cfm.c +++ b/net/bridge/br_cfm.c @@ -576,7 +576,7 @@ static void mep_delete_implementation(struct net_bridge *br, /* Empty and free peer MEP list */ hlist_for_each_entry_safe(peer_mep, n_store, &mep->peer_mep_list, head) { - cancel_delayed_work_sync(&peer_mep->ccm_rx_dwork); + disable_delayed_work_sync(&peer_mep->ccm_rx_dwork); hlist_del_rcu(&peer_mep->head); kfree_rcu(peer_mep, rcu); } @@ -732,7 +732,7 @@ int br_cfm_cc_peer_mep_remove(struct net_bridge *br, const u32 instance, return -ENOENT; } - cc_peer_disable(peer_mep); + disable_delayed_work_sync(&peer_mep->ccm_rx_dwork); hlist_del_rcu(&peer_mep->head); kfree_rcu(peer_mep, rcu); diff --git a/net/can/af_can.c b/net/can/af_can.c index f70e2ba0aadc0d..7bc86b176b4de3 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -469,7 +469,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, rcv->can_id = can_id; rcv->mask = mask; - rcv->matches = 0; + atomic_long_set(&rcv->matches, 0); rcv->func = func; rcv->data = data; rcv->ident = ident; @@ -573,7 +573,7 @@ EXPORT_SYMBOL(can_rx_unregister); static inline void deliver(struct sk_buff *skb, struct receiver *rcv) { rcv->func(skb, rcv->data); - rcv->matches++; + atomic_long_inc(&rcv->matches); } static int can_rcv_filter(struct can_dev_rcv_lists *dev_rcv_lists, struct sk_buff *skb) diff --git a/net/can/af_can.h b/net/can/af_can.h index 22f3352c77fece..87887014f5628e 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -52,7 +52,7 @@ struct receiver { struct hlist_node list; canid_t can_id; canid_t mask; - unsigned long matches; + atomic_long_t matches; void (*func)(struct sk_buff *skb, void *data); void *data; char *ident; diff --git a/net/can/gw.c b/net/can/gw.c index 8ee4d67a07d390..0ec99f68aa452d 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -375,10 +375,10 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf, return; if (from <= to) { - for (i = crc8->from_idx; i <= crc8->to_idx; i++) + for (i = from; i <= to; i++) crc = crc8->crctab[crc ^ cf->data[i]]; } else { - for (i = crc8->from_idx; i >= crc8->to_idx; i--) + for (i = from; i >= to; i--) crc = crc8->crctab[crc ^ cf->data[i]]; } @@ -397,7 +397,7 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf, break; } - cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; + cf->data[res] = crc ^ crc8->final_xor_val; } static void cgw_csum_crc8_pos(struct canfd_frame *cf, diff --git a/net/can/isotp.c b/net/can/isotp.c index da3b72e7afccb7..2770f43f495188 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1248,12 +1248,6 @@ static int isotp_release(struct socket *sock) so->ifindex = 0; so->bound = 0; - if (so->rx.buf != so->rx.sbuf) - kfree(so->rx.buf); - - if (so->tx.buf != so->tx.sbuf) - kfree(so->tx.buf); - sock_orphan(sk); sock->sk = NULL; @@ -1622,6 +1616,21 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg, return NOTIFY_DONE; } +static void isotp_sock_destruct(struct sock *sk) +{ + struct isotp_sock *so = isotp_sk(sk); + + /* do the standard CAN sock destruct work */ + can_sock_destruct(sk); + + /* free potential extended PDU buffers */ + if (so->rx.buf != so->rx.sbuf) + kfree(so->rx.buf); + + if (so->tx.buf != so->tx.sbuf) + kfree(so->tx.buf); +} + static int isotp_init(struct sock *sk) { struct isotp_sock *so = isotp_sk(sk); @@ -1666,6 +1675,9 @@ static int isotp_init(struct sock *sk) list_add_tail(&so->notifier, &isotp_notifier_list); spin_unlock(&isotp_notifier_lock); + /* re-assign default can_sock_destruct() reference */ + sk->sk_destruct = isotp_sock_destruct; + return 0; } diff --git a/net/can/proc.c b/net/can/proc.c index 0938bf7dd646ac..de4d05ae345977 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -196,7 +196,8 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list, " %-5s %03x %08x %pK %pK %8ld %s\n"; seq_printf(m, fmt, DNAME(dev), r->can_id, r->mask, - r->func, r->data, r->matches, r->ident); + r->func, r->data, atomic_long_read(&r->matches), + r->ident); } } diff --git a/net/core/dev.c b/net/core/dev.c index 14a83f2035b933..fc555706241436 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3769,6 +3769,22 @@ static netdev_features_t dflt_features_check(struct sk_buff *skb, return vlan_features_check(skb, features); } +static bool skb_gso_has_extension_hdr(const struct sk_buff *skb) +{ + if (!skb->encapsulation) + return ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && + skb_transport_header_was_set(skb) && + skb_network_header_len(skb) != sizeof(struct ipv6hdr)); + else + return (!skb_inner_network_header_was_set(skb) || + ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + inner_ip_hdr(skb)->version == 6)) && + skb_inner_network_header_len(skb) != sizeof(struct ipv6hdr))); +} + static netdev_features_t gso_features_check(const struct sk_buff *skb, struct net_device *dev, netdev_features_t features) @@ -3816,11 +3832,7 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, * so neither does TSO that depends on it. */ if (features & NETIF_F_IPV6_CSUM && - (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || - (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && - vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && - skb_transport_header_was_set(skb) && - skb_network_header_len(skb) != sizeof(struct ipv6hdr)) + skb_gso_has_extension_hdr(skb)) features &= ~(NETIF_F_IPV6_CSUM | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4); return features; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dad4b1054955bd..fae8034efbff0a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -629,6 +629,9 @@ int rtnl_link_register(struct rtnl_link_ops *ops) unlock: mutex_unlock(&link_ops_mutex); + if (err) + cleanup_srcu_struct(&ops->srcu); + return err; } EXPORT_SYMBOL_GPL(rtnl_link_register); @@ -707,11 +710,14 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev) goto out; ops = master_dev->rtnl_link_ops; - if (!ops || !ops->get_slave_size) + if (!ops) + goto out; + size += nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_SLAVE_KIND */ + if (!ops->get_slave_size) goto out; /* IFLA_INFO_SLAVE_DATA + nested data */ - size = nla_total_size(sizeof(struct nlattr)) + - ops->get_slave_size(master_dev, dev); + size += nla_total_size(sizeof(struct nlattr)) + + ops->get_slave_size(master_dev, dev); out: rcu_read_unlock(); @@ -1267,6 +1273,21 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev) return size; } +static size_t rtnl_dev_parent_size(const struct net_device *dev) +{ + size_t size = 0; + + /* IFLA_PARENT_DEV_NAME */ + if (dev->dev.parent) + size += nla_total_size(strlen(dev_name(dev->dev.parent)) + 1); + + /* IFLA_PARENT_DEV_BUS_NAME */ + if (dev->dev.parent && dev->dev.parent->bus) + size += nla_total_size(strlen(dev->dev.parent->bus->name) + 1); + + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1328,6 +1349,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(8) /* IFLA_MAX_PACING_OFFLOAD_HORIZON */ + nla_total_size(2) /* IFLA_HEADROOM */ + nla_total_size(2) /* IFLA_TAILROOM */ + + rtnl_dev_parent_size(dev) + 0; if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS)) diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 13a63b48b7eeb8..d9faadbe9b6c86 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -193,14 +193,11 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) } EXPORT_SYMBOL(eth_type_trans); -/** - * eth_header_parse - extract hardware address from packet - * @skb: packet to extract header from - * @haddr: destination buffer - */ -int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr) +int eth_header_parse(const struct sk_buff *skb, const struct net_device *dev, + unsigned char *haddr) { const struct ethhdr *eth = eth_hdr(skb); + memcpy(haddr, eth->h_source, ETH_ALEN); return ETH_ALEN; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 2c922afadb8f6b..6dfc0bcdef6542 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -235,10 +235,13 @@ static void esp_output_done(void *data, int err) xfrm_dev_resume(skb); } else { if (!err && - x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) - esp_output_tail_tcp(x, skb); - else + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) { + err = esp_output_tail_tcp(x, skb); + if (err != -EINPROGRESS) + kfree_skb(skb); + } else { xfrm_output_resume(skb_to_full_sk(skb), skb, err); + } } } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a62b4c4033ccba..568bd1e95d447f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1079,10 +1079,12 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) static bool icmp_tag_validation(int proto) { + const struct net_protocol *ipprot; bool ok; rcu_read_lock(); - ok = rcu_dereference(inet_protos[proto])->icmp_strict_tag_validation; + ipprot = rcu_dereference(inet_protos[proto]); + ok = ipprot ? ipprot->icmp_strict_tag_validation : false; rcu_read_unlock(); return ok; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5dfac6ce1110b3..e961936b6be76b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -154,20 +154,6 @@ bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high) } EXPORT_SYMBOL(inet_sk_get_local_port_range); -static bool inet_use_bhash2_on_bind(const struct sock *sk) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) - return false; - - if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) - return true; - } -#endif - return sk->sk_rcv_saddr != htonl(INADDR_ANY); -} - static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, kuid_t uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) @@ -259,7 +245,7 @@ static int inet_csk_bind_conflict(const struct sock *sk, * checks separately because their spinlocks have to be acquired/released * independently of each other, to prevent possible deadlocks */ - if (inet_use_bhash2_on_bind(sk)) + if (inet_use_hash2_on_bind(sk)) return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok); @@ -376,7 +362,7 @@ inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret, head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock_bh(&head->lock); - if (inet_use_bhash2_on_bind(sk)) { + if (inet_use_hash2_on_bind(sk)) { if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false)) goto next_port; } @@ -562,7 +548,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) check_bind_conflict = false; } - if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) { + if (check_bind_conflict && inet_use_hash2_on_bind(sk)) { if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true)) goto fail_unlock; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e13244729ad8d5..35f0baa99d4092 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -919,7 +919,8 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, return -(t->hlen + sizeof(*iph)); } -static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) +static int ipgre_header_parse(const struct sk_buff *skb, const struct net_device *dev, + unsigned char *haddr) { const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb); memcpy(haddr, &iph->saddr, 4); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b60fad393e1820..cb99a3c27053ef 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -287,7 +287,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, } else { hslot = udp_hashslot(udptable, net, snum); spin_lock_bh(&hslot->lock); - if (hslot->count > 10) { + if (inet_use_hash2_on_bind(sk) && hslot->count > 10) { int exist; unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e55f139e05d5b..f4e23b543585f8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2862,7 +2862,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) fib6_add_gc_list(rt); } else { fib6_clean_expires(rt); - fib6_remove_gc_list(rt); + fib6_may_remove_gc_list(net, rt); } spin_unlock_bh(&table->tb6_lock); @@ -4840,7 +4840,7 @@ static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp, if (!(flags & RTF_EXPIRES)) { fib6_clean_expires(f6i); - fib6_remove_gc_list(f6i); + fib6_may_remove_gc_list(net, f6i); } else { fib6_set_expires(f6i, expires); fib6_add_gc_list(f6i); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e75da98f528387..9f75313734f8cd 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -271,10 +271,13 @@ static void esp_output_done(void *data, int err) xfrm_dev_resume(skb); } else { if (!err && - x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) - esp_output_tail_tcp(x, skb); - else + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) { + err = esp_output_tail_tcp(x, skb); + if (err != -EINPROGRESS) + kfree_skb(skb); + } else { xfrm_output_resume(skb_to_full_sk(skb), skb, err); + } } } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5e3610a926cfb6..95558fd6f447e3 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -379,6 +379,10 @@ static int ipv6_srh_rcv(struct sk_buff *skb) hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); idev = __in6_dev_get(skb->dev); + if (!idev) { + kfree_skb(skb); + return -1; + } accept_seg6 = min(READ_ONCE(net->ipv6.devconf_all->seg6_enabled), READ_ONCE(idev->cnf.seg6_enabled)); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 9058e71241dc37..dd26657b6a4acd 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1133,7 +1133,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) { fib6_clean_expires(iter); - fib6_remove_gc_list(iter); + fib6_may_remove_gc_list(info->nl_net, iter); } else { fib6_set_expires(iter, rt->expires); fib6_add_gc_list(iter); @@ -2348,6 +2348,17 @@ static void fib6_flush_trees(struct net *net) /* * Garbage collection */ +void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now) +{ + bool may_expire = rt->fib6_flags & RTF_EXPIRES && rt->expires; + int old_more = gc_args->more; + + rt6_age_exceptions(rt, gc_args, now); + + if (!may_expire && old_more == gc_args->more) + fib6_remove_gc_list(rt); +} static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) { @@ -2370,7 +2381,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) * Note, that clones are aged out * only if they are not in use now. */ - rt6_age_exceptions(rt, gc_args, now); + fib6_age_exceptions(rt, gc_args, now); return 0; } diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 4ad8b2032f1f92..5561bd9cea8185 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -157,6 +157,10 @@ static int rt_mt6_check(const struct xt_mtchk_param *par) pr_debug("unknown flags %X\n", rtinfo->invflags); return -EINVAL; } + if (rtinfo->addrnr > IP6T_RT_HOPS) { + pr_debug("too many addresses specified\n"); + return -EINVAL; + } if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) && (!(rtinfo->flags & IP6T_RT_TYP) || (rtinfo->rt_type != 0) || diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 08cd86f49bf963..cb521700cee7ed 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1033,7 +1033,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (!addrconf_finite_timeout(lifetime)) { fib6_clean_expires(rt); - fib6_remove_gc_list(rt); + fib6_may_remove_gc_list(net, rt); } else { fib6_set_expires(rt, jiffies + HZ * lifetime); fib6_add_gc_list(rt); diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index ee6bac0160acea..e6964c6b0d3810 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -184,6 +184,8 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb) int require_hmac; idev = __in6_dev_get(skb->dev); + if (!idev) + return false; srh = (struct ipv6_sr_hdr *)skb_transport_header(skb); diff --git a/net/key/af_key.c b/net/key/af_key.c index 0756bac62f7c04..72ac2ace419de4 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3518,7 +3518,7 @@ static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress * static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, - uint32_t reqid, uint8_t family, + uint32_t reqid, sa_family_t family, const xfrm_address_t *src, const xfrm_address_t *dst) { struct sadb_x_ipsecrequest *rq; @@ -3583,12 +3583,17 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, /* ipsecrequests */ for (i = 0, mp = m; i < num_bundles; i++, mp++) { - /* old locator pair */ - size_pol += sizeof(struct sadb_x_ipsecrequest) + - pfkey_sockaddr_pair_size(mp->old_family); - /* new locator pair */ - size_pol += sizeof(struct sadb_x_ipsecrequest) + - pfkey_sockaddr_pair_size(mp->new_family); + int pair_size; + + pair_size = pfkey_sockaddr_pair_size(mp->old_family); + if (!pair_size) + return -EINVAL; + size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size; + + pair_size = pfkey_sockaddr_pair_size(mp->new_family); + if (!pair_size) + return -EINVAL; + size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size; } size += sizeof(struct sadb_msg) + size_pol; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b92b4a5c2636d8..b85375ceb575d7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1904,12 +1904,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, __sta_info_flush(sdata, true, link_id, NULL); - ieee80211_remove_link_keys(link, &keys); - if (!list_empty(&keys)) { - synchronize_net(); - ieee80211_free_key_list(local, &keys); - } - ieee80211_stop_mbssid(sdata); RCU_INIT_POINTER(link_conf->tx_bss_conf, NULL); @@ -1921,6 +1915,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_BEACON_ENABLED); + ieee80211_remove_link_keys(link, &keys); + if (!list_empty(&keys)) { + synchronize_net(); + ieee80211_free_key_list(local, &keys); + } + if (sdata->wdev.links[link_id].cac_started) { chandef = link_conf->chanreq.oper; wiphy_hrtimer_work_cancel(wiphy, &link->dfs_cac_timer_work); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 4447cf03c41b93..05f45e66999b2d 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -561,14 +561,16 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local, rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) { - struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_sub_if_data *sdata; enum ieee80211_sta_rx_bandwidth new_sta_bw; unsigned int link_id; if (!ieee80211_sdata_running(sta->sdata)) continue; - for (link_id = 0; link_id < ARRAY_SIZE(sta->sdata->link); link_id++) { + sdata = get_bss_sdata(sta->sdata); + + for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link = rcu_dereference(sdata->link[link_id]); struct ieee80211_bss_conf *link_conf; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index d02f07368c5119..687a66cd49433f 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -320,7 +320,6 @@ static ssize_t aql_enable_read(struct file *file, char __user *user_buf, static ssize_t aql_enable_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { - bool aql_disabled = static_key_false(&aql_disable.key); char buf[3]; size_t len; @@ -335,15 +334,12 @@ static ssize_t aql_enable_write(struct file *file, const char __user *user_buf, if (len > 0 && buf[len - 1] == '\n') buf[len - 1] = 0; - if (buf[0] == '0' && buf[1] == '\0') { - if (!aql_disabled) - static_branch_inc(&aql_disable); - } else if (buf[0] == '1' && buf[1] == '\0') { - if (aql_disabled) - static_branch_dec(&aql_disable); - } else { + if (buf[0] == '0' && buf[1] == '\0') + static_branch_enable(&aql_disable); + else if (buf[0] == '1' && buf[1] == '\0') + static_branch_disable(&aql_disable); + else return -EINVAL; - } return count; } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 28624e57aa4991..8fdbdf9ba2a9ee 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -79,6 +79,9 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, * - MDA enabled * - Power management control on fc */ + if (!ie->mesh_config) + return false; + if (!(ifmsh->mesh_id_len == ie->mesh_id_len && memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 && (ifmsh->mesh_pp_id == ie->mesh_config->meshconf_psel) && diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 6dc22f1593be41..dd51a578fbc5c0 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2782,7 +2782,9 @@ static void sta_set_link_sinfo(struct sta_info *sta, } link_sinfo->inactive_time = - jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta, link_id)); + jiffies_delta_to_msecs(jiffies - + ieee80211_sta_last_active(sta, + link_id)); if (!(link_sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) | BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) { @@ -3015,7 +3017,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->connected_time = ktime_get_seconds() - sta->last_connected; sinfo->assoc_at = sta->assoc_at; sinfo->inactive_time = - jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta, -1)); + jiffies_delta_to_msecs(jiffies - + ieee80211_sta_last_active(sta, -1)); if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) | BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) { diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index dbbfe2d6842fbe..1dca2fae05a521 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1449,7 +1449,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, } sta = sta_info_get(sdata, peer); - if (!sta) + if (!sta || !sta->sta.tdls) return -ENOLINK; iee80211_tdls_recalc_chanctx(sdata, sta); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8cdbd417d7befb..b7aedaab848386 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1899,8 +1899,10 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, struct ieee80211_tx_data tx; struct sk_buff *skb2; - if (ieee80211_tx_prepare(sdata, &tx, NULL, skb) == TX_DROP) + if (ieee80211_tx_prepare(sdata, &tx, NULL, skb) == TX_DROP) { + kfree_skb(skb); return false; + } info->band = band; info->control.vif = vif; diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 9e4631fade90c9..000be60d958034 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -469,7 +469,9 @@ static int mac802154_header_create(struct sk_buff *skb, } static int -mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr) +mac802154_header_parse(const struct sk_buff *skb, + const struct net_device *dev, + unsigned char *haddr) { struct ieee802154_hdr hdr; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index ef9e749d5e08f4..d5417688f69e63 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2854,6 +2854,7 @@ static int __init mpls_init(void) rtnl_af_unregister(&mpls_af_ops); out_unregister_dev_type: dev_remove_pack(&mpls_packet_type); + unregister_netdevice_notifier(&mpls_dev_notifier); out_unregister_pernet: unregister_pernet_subsys(&mpls_net_ops); goto out; diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index b2b9df43960e91..82e59f9c6dd9ce 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -838,7 +838,7 @@ static struct lock_class_key mptcp_keys[2]; static int mptcp_pm_nl_create_listen_socket(struct sock *sk, struct mptcp_pm_addr_entry *entry) { - bool is_ipv6 = sk->sk_family == AF_INET6; + bool is_ipv6 = entry->addr.family == AF_INET6; int addrlen = sizeof(struct sockaddr_in); struct sockaddr_storage addr; struct sock *newsk, *ssk; diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index 6f3a6411f4af74..c20031891b8684 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -170,7 +170,7 @@ static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog, static const struct bpf_link_ops bpf_nf_link_lops = { .release = bpf_nf_link_release, - .dealloc = bpf_nf_link_dealloc, + .dealloc_deferred = bpf_nf_link_dealloc, .detach = bpf_nf_link_detach, .show_fdinfo = bpf_nf_link_show_info, .fill_link_info = bpf_nf_link_fill_link_info, diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index a7552a46d6acf5..4f39bf7c843f2d 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -21,6 +21,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, unsigned int timeout) { const struct nf_conntrack_helper *helper; + struct net *net = read_pnet(&ct->ct_net); struct nf_conntrack_expect *exp; struct iphdr *iph = ip_hdr(skb); struct rtable *rt = skb_rtable(skb); @@ -70,8 +71,11 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, exp->expectfn = NULL; exp->flags = NF_CT_EXPECT_PERMANENT; exp->class = NF_CT_EXPECT_CLASS_DEFAULT; - exp->helper = NULL; - + rcu_assign_pointer(exp->helper, helper); + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif nf_ct_expect_related(exp, 0); nf_ct_expect_put(exp); diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 81baf20826046e..9df159448b8979 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -247,6 +247,8 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; + lockdep_nfct_expect_lock_held(); + rcu_read_lock(); notify = rcu_dereference(net->ct.nf_conntrack_event_cb); if (!notify) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index cfc2daa3fc7f34..24d0576d84b7f6 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -51,6 +51,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, struct net *net = nf_ct_exp_net(exp); struct nf_conntrack_net *cnet; + lockdep_nfct_expect_lock_held(); WARN_ON(!master_help); WARN_ON(timer_pending(&exp->timeout)); @@ -112,12 +113,14 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple, const struct net *net) { return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - net_eq(net, nf_ct_net(i->master)) && - nf_ct_zone_equal_any(i->master, zone); + net_eq(net, read_pnet(&i->net)) && + nf_ct_exp_zone_equal_any(i, zone); } bool nf_ct_remove_expect(struct nf_conntrack_expect *exp) { + lockdep_nfct_expect_lock_held(); + if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); @@ -177,6 +180,8 @@ nf_ct_find_expectation(struct net *net, struct nf_conntrack_expect *i, *exp = NULL; unsigned int h; + lockdep_nfct_expect_lock_held(); + if (!cnet->expect_count) return NULL; @@ -309,12 +314,20 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) } EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); +/* This function can only be used from packet path, where accessing + * master's helper is safe, because the packet holds a reference on + * the conntrack object. Never use it from control plane. + */ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, u_int8_t family, const union nf_inet_addr *saddr, const union nf_inet_addr *daddr, u_int8_t proto, const __be16 *src, const __be16 *dst) { + struct nf_conntrack_helper *helper = NULL; + struct nf_conn *ct = exp->master; + struct net *net = read_pnet(&ct->ct_net); + struct nf_conn_help *help; int len; if (family == AF_INET) @@ -325,7 +338,16 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, exp->flags = 0; exp->class = class; exp->expectfn = NULL; - exp->helper = NULL; + + help = nfct_help(ct); + if (help) + helper = rcu_dereference(help->helper); + + rcu_assign_pointer(exp->helper, helper); + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif exp->tuple.src.l3num = family; exp->tuple.dst.protonum = proto; @@ -442,6 +464,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect, unsigned int h; int ret = 0; + lockdep_nfct_expect_lock_held(); + if (!master_help) { ret = -ESHUTDOWN; goto out; @@ -498,8 +522,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, nf_ct_expect_insert(expect); - spin_unlock_bh(&nf_conntrack_expect_lock); nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); + spin_unlock_bh(&nf_conntrack_expect_lock); + return 0; out: spin_unlock_bh(&nf_conntrack_expect_lock); @@ -627,11 +652,15 @@ static int exp_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_expect *expect; struct nf_conntrack_helper *helper; + struct net *net = seq_file_net(s); struct hlist_node *n = v; char *delim = ""; expect = hlist_entry(n, struct nf_conntrack_expect, hnode); + if (!net_eq(nf_ct_exp_net(expect), net)) + return 0; + if (expect->timeout.function) seq_printf(s, "%ld ", timer_pending(&expect->timeout) ? (long)(expect->timeout.expires - jiffies)/HZ : 0); @@ -654,7 +683,7 @@ static int exp_seq_show(struct seq_file *s, void *v) if (expect->flags & NF_CT_EXPECT_USERSPACE) seq_printf(s, "%sUSERSPACE", delim); - helper = rcu_dereference(nfct_help(expect->master)->helper); + helper = rcu_dereference(expect->helper); if (helper) { seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name); if (helper->expect_policy[expect->class].name[0]) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 62aa22a0787695..7b1497ed97d269 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -331,6 +331,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, if (nf_h323_error_boundary(bs, 0, 2)) return H323_ERROR_BOUND; len = get_bits(bs, 2) + 1; + if (nf_h323_error_boundary(bs, len, 0)) + return H323_ERROR_BOUND; BYTE_ALIGN(bs); if (base && (f->attr & DECODE)) { /* timeToLive */ unsigned int v = get_uint(bs, len) + f->lb; @@ -922,6 +924,8 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931) break; p++; len--; + if (len <= 0) + break; return DecodeH323_UserInformation(buf, p, len, &q931->UUIE); } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index a2a0e22ccee198..3f5c50455b716a 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - exp->helper = &nf_conntrack_helper_h245; + rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3 : NULL, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ nathook = rcu_dereference(nfct_h323_nat_hook); @@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_UDP, NULL, &port); - exp->helper = nf_conntrack_helper_ras; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect RAS "); @@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index ceb48c3ca0a439..1b330ba6613bb2 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -395,14 +395,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) { - struct nf_conn_help *help = nfct_help(exp->master); const struct nf_conntrack_helper *me = data; const struct nf_conntrack_helper *this; - if (exp->helper == me) - return true; - - this = rcu_dereference_protected(help->helper, + this = rcu_dereference_protected(exp->helper, lockdep_is_held(&nf_conntrack_expect_lock)); return this == me; } @@ -421,6 +417,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); + + /* nf_ct_iterate_destroy() does an unconditional synchronize_rcu() as + * last step, this ensures rcu readers of exp->helper are done. + * No need for another synchronize_rcu() here. + */ } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c9d725fc2d71c0..3f408f3713bb33 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -910,8 +910,8 @@ struct ctnetlink_filter { }; static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = { - [CTA_FILTER_ORIG_FLAGS] = { .type = NLA_U32 }, - [CTA_FILTER_REPLY_FLAGS] = { .type = NLA_U32 }, + [CTA_FILTER_ORIG_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), + [CTA_FILTER_REPLY_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), }; static int ctnetlink_parse_filter(const struct nlattr *attr, @@ -925,17 +925,11 @@ static int ctnetlink_parse_filter(const struct nlattr *attr, if (ret) return ret; - if (tb[CTA_FILTER_ORIG_FLAGS]) { + if (tb[CTA_FILTER_ORIG_FLAGS]) filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]); - if (filter->orig_flags & ~CTA_FILTER_F_ALL) - return -EOPNOTSUPP; - } - if (tb[CTA_FILTER_REPLY_FLAGS]) { + if (tb[CTA_FILTER_REPLY_FLAGS]) filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]); - if (filter->reply_flags & ~CTA_FILTER_F_ALL) - return -EOPNOTSUPP; - } return 0; } @@ -2634,7 +2628,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN - 1 }, [CTA_EXPECT_ZONE] = { .type = NLA_U16 }, - [CTA_EXPECT_FLAGS] = { .type = NLA_U32 }, + [CTA_EXPECT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_CT_EXPECT_MASK), [CTA_EXPECT_CLASS] = { .type = NLA_U32 }, [CTA_EXPECT_NAT] = { .type = NLA_NESTED }, [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING }, @@ -3012,7 +3006,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, { struct nf_conn *master = exp->master; long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ; - struct nf_conn_help *help; + struct nf_conntrack_helper *helper; #if IS_ENABLED(CONFIG_NF_NAT) struct nlattr *nest_parms; struct nf_conntrack_tuple nat_tuple = {}; @@ -3057,15 +3051,12 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; - help = nfct_help(master); - if (help) { - struct nf_conntrack_helper *helper; - helper = rcu_dereference(help->helper); - if (helper && - nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name)) - goto nla_put_failure; - } + helper = rcu_dereference(exp->helper); + if (helper && + nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name)) + goto nla_put_failure; + expfn = nf_ct_helper_expectfn_find_by_symbol(exp->expectfn); if (expfn != NULL && nla_put_string(skb, CTA_EXPECT_FN, expfn->name)) @@ -3212,7 +3203,7 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct nf_conn *ct = cb->data; - struct nf_conn_help *help = nfct_help(ct); + struct nf_conn_help *help; u_int8_t l3proto = nfmsg->nfgen_family; unsigned long last_id = cb->args[1]; struct nf_conntrack_expect *exp; @@ -3220,6 +3211,10 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) if (cb->args[0]) return 0; + help = nfct_help(ct); + if (!help) + return 0; + rcu_read_lock(); restart: @@ -3249,6 +3244,24 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static int ctnetlink_dump_exp_ct_start(struct netlink_callback *cb) +{ + struct nf_conn *ct = cb->data; + + if (!refcount_inc_not_zero(&ct->ct_general.use)) + return -ENOENT; + return 0; +} + +static int ctnetlink_dump_exp_ct_done(struct netlink_callback *cb) +{ + struct nf_conn *ct = cb->data; + + if (ct) + nf_ct_put(ct); + return 0; +} + static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -3264,6 +3277,8 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, struct nf_conntrack_zone zone; struct netlink_dump_control c = { .dump = ctnetlink_exp_ct_dump_table, + .start = ctnetlink_dump_exp_ct_start, + .done = ctnetlink_dump_exp_ct_done, }; err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, @@ -3334,31 +3349,37 @@ static int ctnetlink_get_expect(struct sk_buff *skb, if (err < 0) return err; + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + spin_lock_bh(&nf_conntrack_expect_lock); exp = nf_ct_expect_find_get(info->net, &zone, &tuple); - if (!exp) + if (!exp) { + spin_unlock_bh(&nf_conntrack_expect_lock); + kfree_skb(skb2); return -ENOENT; + } if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); + kfree_skb(skb2); return -ENOENT; } } - skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb2) { - nf_ct_expect_put(exp); - return -ENOMEM; - } - rcu_read_lock(); err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); + if (err <= 0) { kfree_skb(skb2); return -ENOMEM; @@ -3370,12 +3391,9 @@ static int ctnetlink_get_expect(struct sk_buff *skb, static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data) { struct nf_conntrack_helper *helper; - const struct nf_conn_help *m_help; const char *name = data; - m_help = nfct_help(exp->master); - - helper = rcu_dereference(m_help->helper); + helper = rcu_dereference(exp->helper); if (!helper) return false; @@ -3408,22 +3426,26 @@ static int ctnetlink_del_expect(struct sk_buff *skb, if (err < 0) return err; + spin_lock_bh(&nf_conntrack_expect_lock); + /* bump usage count to 2 */ exp = nf_ct_expect_find_get(info->net, &zone, &tuple); - if (!exp) + if (!exp) { + spin_unlock_bh(&nf_conntrack_expect_lock); return -ENOENT; + } if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); return -ENOENT; } } /* after list removal, usage count == 1 */ - spin_lock_bh(&nf_conntrack_expect_lock); if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); @@ -3465,7 +3487,7 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x, #if IS_ENABLED(CONFIG_NF_NAT) static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = { - [CTA_EXPECT_NAT_DIR] = { .type = NLA_U32 }, + [CTA_EXPECT_NAT_DIR] = NLA_POLICY_MAX(NLA_BE32, IP_CT_DIR_REPLY), [CTA_EXPECT_NAT_TUPLE] = { .type = NLA_NESTED }, }; #endif @@ -3510,9 +3532,10 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask) { - u_int32_t class = 0; + struct net *net = read_pnet(&ct->ct_net); struct nf_conntrack_expect *exp; struct nf_conn_help *help; + u32 class = 0; int err; help = nfct_help(ct); @@ -3549,7 +3572,13 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, exp->class = class; exp->master = ct; - exp->helper = helper; + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif + if (!helper) + helper = rcu_dereference(help->helper); + rcu_assign_pointer(exp->helper, helper); exp->tuple = *tuple; exp->mask.src.u3 = mask->src.u3; exp->mask.src.u.all = mask->src.u.all; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 7c6f7c9f73320d..645d2c43ebf7af 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -582,7 +582,8 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, } static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = { - [CTA_PROTOINFO_SCTP_STATE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_SCTP_STATE] = NLA_POLICY_MAX(NLA_U8, + SCTP_CONNTRACK_HEARTBEAT_SENT), [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NLA_U32 }, [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 }, }; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 0c1d086e96cb3f..b67426c2189b2d 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1385,9 +1385,9 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, } static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { - [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 }, - [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, - [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, + [CTA_PROTOINFO_TCP_STATE] = NLA_POLICY_MAX(NLA_U8, TCP_CONNTRACK_SYN_SENT2), + [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), + [CTA_PROTOINFO_TCP_WSCALE_REPLY] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, }; @@ -1414,10 +1414,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) if (err < 0) return err; - if (tb[CTA_PROTOINFO_TCP_STATE] && - nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) - return -EINVAL; - spin_lock_bh(&ct->lock); if (tb[CTA_PROTOINFO_TCP_STATE]) ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index ca748f8dbff130..939502ff7c8713 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -924,7 +924,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (!exp || exp->master == ct || - nfct_help(exp->master)->helper != nfct_help(ct)->helper || + exp->helper != nfct_help(ct)->helper || exp->class != class) break; #if IS_ENABLED(CONFIG_NF_NAT) @@ -1040,6 +1040,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, unsigned int port; const struct sdp_media_type *t; int ret = NF_ACCEPT; + bool have_rtp_addr = false; hooks = rcu_dereference(nf_nat_sip_hooks); @@ -1056,8 +1057,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, caddr_len = 0; if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen, SDP_HDR_CONNECTION, SDP_HDR_MEDIA, - &matchoff, &matchlen, &caddr) > 0) + &matchoff, &matchlen, &caddr) > 0) { caddr_len = matchlen; + memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); + have_rtp_addr = true; + } mediaoff = sdpoff; for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) { @@ -1091,9 +1095,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, &matchoff, &matchlen, &maddr) > 0) { maddr_len = matchlen; memcpy(&rtp_addr, &maddr, sizeof(rtp_addr)); - } else if (caddr_len) + have_rtp_addr = true; + } else if (caddr_len) { memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); - else { + have_rtp_addr = true; + } else { nf_ct_helper_log(skb, ct, "cannot parse SDP message"); return NF_DROP; } @@ -1125,7 +1131,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, /* Update session connection and owner addresses */ hooks = rcu_dereference(nf_nat_sip_hooks); - if (hooks && ct->status & IPS_NAT_MASK) + if (hooks && ct->status & IPS_NAT_MASK && have_rtp_addr) ret = hooks->sdp_session(skb, protoff, dataoff, dptr, datalen, sdpoff, &rtp_addr); @@ -1297,7 +1303,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct), saddr, &daddr, proto, NULL, &port); exp->timeout.expires = sip_timeout * HZ; - exp->helper = helper; + rcu_assign_pointer(exp->helper, helper); exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; hooks = rcu_dereference(nf_nat_sip_hooks); @@ -1534,11 +1540,12 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, { struct tcphdr *th, _tcph; unsigned int dataoff, datalen; - unsigned int matchoff, matchlen, clen; + unsigned int matchoff, matchlen; unsigned int msglen, origlen; const char *dptr, *end; s16 diff, tdiff = 0; int ret = NF_ACCEPT; + unsigned long clen; bool term; if (ctinfo != IP_CT_ESTABLISHED && @@ -1573,6 +1580,9 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, if (dptr + matchoff == end) break; + if (clen > datalen) + break; + term = false; for (; end + strlen("\r\n\r\n") <= dptr + datalen; end++) { if (end[0] == '\r' && end[1] == '\n' && diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 3fdb10d9bf7f2c..fd56d663cb5b02 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -738,6 +738,7 @@ static int nf_flow_encap_push(struct sk_buff *skb, switch (tuple->encap[i].proto) { case htons(ETH_P_8021Q): case htons(ETH_P_8021AD): + skb_reset_mac_header(skb); if (skb_vlan_push(skb, tuple->encap[i].proto, tuple->encap[i].id) < 0) return -1; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dacec5f8a11c46..3922cff1bb3d96 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6744,8 +6744,8 @@ static void __nft_set_elem_expr_destroy(const struct nft_ctx *ctx, } } -static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, - struct nft_set_elem_expr *elem_expr) +void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, + struct nft_set_elem_expr *elem_expr) { struct nft_expr *expr; u32 size; @@ -7156,8 +7156,7 @@ static u32 nft_set_maxsize(const struct nft_set *set) } static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, - const struct nlattr *attr, u32 nlmsg_flags, - bool last) + const struct nlattr *attr, u32 nlmsg_flags) { struct nft_expr *expr_array[NFT_SET_EXPR_MAX] = {}; struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; @@ -7444,11 +7443,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (flags) *nft_set_ext_flags(ext) = flags; - if (last) - elem.flags = NFT_SET_ELEM_INTERNAL_LAST; - else - elem.flags = 0; - if (obj) *nft_set_ext_obj(ext) = obj; @@ -7613,8 +7607,7 @@ static int nf_tables_newsetelem(struct sk_buff *skb, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { - err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags, - nla_is_last(attr, rem)); + err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags); if (err < 0) { NL_SET_BAD_ATTR(extack, attr); return err; @@ -7738,7 +7731,7 @@ static void nft_trans_elems_destroy_abort(const struct nft_ctx *ctx, } static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, - const struct nlattr *attr, bool last) + const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_set_ext_tmpl tmpl; @@ -7806,11 +7799,6 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (flags) *nft_set_ext_flags(ext) = flags; - if (last) - elem.flags = NFT_SET_ELEM_INTERNAL_LAST; - else - elem.flags = 0; - trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); if (trans == NULL) goto fail_trans; @@ -7961,8 +7949,7 @@ static int nf_tables_delsetelem(struct sk_buff *skb, return nft_set_flush(&ctx, set, genmask); nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { - err = nft_del_setelem(&ctx, set, attr, - nla_is_last(attr, rem)); + err = nft_del_setelem(&ctx, set, attr); if (err == -ENOENT && NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYSETELEM) continue; @@ -9216,6 +9203,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb, return 0; err_flowtable_hooks: + synchronize_rcu(); nft_trans_destroy(trans); err_flowtable_trans: nft_hooks_destroy(&flowtable->hook_list); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b35a90955e2efe..fcbe54940b2ec3 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -647,15 +647,11 @@ __build_packet_message(struct nfnl_log_net *log, if (data_len) { struct nlattr *nla; - int size = nla_attr_size(data_len); - if (skb_tailroom(inst->skb) < nla_total_size(data_len)) + nla = nla_reserve(inst->skb, NFULA_PAYLOAD, data_len); + if (!nla) goto nla_put_failure; - nla = skb_put(inst->skb, nla_total_size(data_len)); - nla->nla_type = NFULA_PAYLOAD; - nla->nla_len = size; - if (skb_copy_bits(skb, 0, nla_data(nla), data_len)) BUG(); } diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 94e3eac5743ae3..45d9ad231a9204 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -302,7 +302,9 @@ static int nfnl_osf_add_callback(struct sk_buff *skb, { struct nf_osf_user_finger *f; struct nf_osf_finger *kf = NULL, *sf; + unsigned int tot_opt_len = 0; int err = 0; + int i; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -318,6 +320,17 @@ static int nfnl_osf_add_callback(struct sk_buff *skb, if (f->opt_num > ARRAY_SIZE(f->opt)) return -EINVAL; + for (i = 0; i < f->opt_num; i++) { + if (!f->opt[i].length || f->opt[i].length > MAX_IPOPTLEN) + return -EINVAL; + if (f->opt[i].kind == OSFOPT_MSS && f->opt[i].length < 4) + return -EINVAL; + + tot_opt_len += f->opt[i].length; + if (tot_opt_len > MAX_IPOPTLEN) + return -EINVAL; + } + if (!memchr(f->genre, 0, MAXGENRELEN) || !memchr(f->subtype, 0, MAXGENRELEN) || !memchr(f->version, 0, MAXGENRELEN)) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 47d3ef109a9963..128ff8155b5de1 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -23,6 +23,7 @@ #include #include #include +#include "nf_internals.h" struct nft_ct_helper_obj { struct nf_conntrack_helper *helper4; @@ -543,6 +544,7 @@ static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) #endif #ifdef CONFIG_NF_CONNTRACK_ZONES case NFT_CT_ZONE: + nf_queue_nf_hook_drop(ctx->net); mutex_lock(&nft_ct_pcpu_mutex); if (--nft_ct_pcpu_template_refcnt == 0) nft_ct_tmpl_put_pcpu(); @@ -1015,6 +1017,7 @@ static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx, struct nft_ct_timeout_obj *priv = nft_obj_data(obj); struct nf_ct_timeout *timeout = priv->timeout; + nf_queue_nf_hook_drop(ctx->net); nf_ct_untimeout(ctx->net, timeout); nf_ct_netns_put(ctx->net, ctx->family); kfree(priv->timeout); @@ -1147,6 +1150,7 @@ static void nft_ct_helper_obj_destroy(const struct nft_ctx *ctx, { struct nft_ct_helper_obj *priv = nft_obj_data(obj); + nf_queue_nf_hook_drop(ctx->net); if (priv->helper4) nf_conntrack_helper_put(priv->helper4); if (priv->helper6) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 7807d812966464..9123277be03ced 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -30,18 +30,26 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv, const struct nft_set_ext *ext) { struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext); + struct nft_ctx ctx = { + .net = read_pnet(&priv->set->net), + .family = priv->set->table->family, + }; struct nft_expr *expr; int i; for (i = 0; i < priv->num_exprs; i++) { expr = nft_setelem_expr_at(elem_expr, elem_expr->size); if (nft_expr_clone(expr, priv->expr_array[i], GFP_ATOMIC) < 0) - return -1; + goto err_out; elem_expr->size += priv->expr_array[i]->ops->size; } return 0; +err_out: + nft_set_elem_expr_destroy(&ctx, elem_expr); + + return -1; } struct nft_elem_priv *nft_dynset_new(struct nft_set *set, diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 7ff90325c97fa8..6395982e4d95c5 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -242,7 +242,7 @@ static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -319,7 +319,7 @@ static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -414,7 +414,7 @@ static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -505,7 +505,7 @@ static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -641,7 +641,7 @@ static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -699,7 +699,7 @@ static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -764,7 +764,7 @@ static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -839,7 +839,7 @@ static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -925,7 +925,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -1019,7 +1019,7 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index ee3d4f5b9ff76e..737c339decd0c5 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -304,19 +304,10 @@ static void nft_rbtree_set_start_cookie(struct nft_rbtree *priv, priv->start_rbe_cookie = (unsigned long)rbe; } -static void nft_rbtree_set_start_cookie_open(struct nft_rbtree *priv, - const struct nft_rbtree_elem *rbe, - unsigned long open_interval) -{ - priv->start_rbe_cookie = (unsigned long)rbe | open_interval; -} - -#define NFT_RBTREE_OPEN_INTERVAL 1UL - static bool nft_rbtree_cmp_start_cookie(struct nft_rbtree *priv, const struct nft_rbtree_elem *rbe) { - return (priv->start_rbe_cookie & ~NFT_RBTREE_OPEN_INTERVAL) == (unsigned long)rbe; + return priv->start_rbe_cookie == (unsigned long)rbe; } static bool nft_rbtree_insert_same_interval(const struct net *net, @@ -346,14 +337,13 @@ static bool nft_rbtree_insert_same_interval(const struct net *net, static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, - struct nft_elem_priv **elem_priv, u64 tstamp, bool last) + struct nft_elem_priv **elem_priv, u64 tstamp) { struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL, *rbe_prev; struct rb_node *node, *next, *parent, **p, *first = NULL; struct nft_rbtree *priv = nft_set_priv(set); u8 cur_genmask = nft_genmask_cur(net); u8 genmask = nft_genmask_next(net); - unsigned long open_interval = 0; int d; /* Descend the tree to search for an existing element greater than the @@ -459,18 +449,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, } } - if (nft_rbtree_interval_null(set, new)) { + if (nft_rbtree_interval_null(set, new)) + priv->start_rbe_cookie = 0; + else if (nft_rbtree_interval_start(new) && priv->start_rbe_cookie) priv->start_rbe_cookie = 0; - } else if (nft_rbtree_interval_start(new) && priv->start_rbe_cookie) { - if (nft_set_is_anonymous(set)) { - priv->start_rbe_cookie = 0; - } else if (priv->start_rbe_cookie & NFT_RBTREE_OPEN_INTERVAL) { - /* Previous element is an open interval that partially - * overlaps with an existing non-open interval. - */ - return -ENOTEMPTY; - } - } /* - new start element matching existing start element: full overlap * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. @@ -478,27 +460,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { *elem_priv = &rbe_ge->priv; - - /* - Corner case: new start element of open interval (which - * comes as last element in the batch) overlaps the start of - * an existing interval with an end element: partial overlap. - */ - node = rb_first(&priv->root); - rbe = __nft_rbtree_next_active(node, genmask); - if (rbe && nft_rbtree_interval_end(rbe)) { - rbe = nft_rbtree_next_active(rbe, genmask); - if (rbe && - nft_rbtree_interval_start(rbe) && - !nft_rbtree_cmp(set, new, rbe)) { - if (last) - return -ENOTEMPTY; - - /* Maybe open interval? */ - open_interval = NFT_RBTREE_OPEN_INTERVAL; - } - } - nft_rbtree_set_start_cookie_open(priv, rbe_ge, open_interval); - + nft_rbtree_set_start_cookie(priv, rbe_ge); return -EEXIST; } @@ -553,12 +515,6 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new)) return -ENOTEMPTY; - /* - start element overlaps an open interval but end element is new: - * partial overlap, reported as -ENOEMPTY. - */ - if (!rbe_ge && priv->start_rbe_cookie && nft_rbtree_interval_end(new)) - return -ENOTEMPTY; - /* Accepted element: pick insertion point depending on key value */ parent = NULL; p = &priv->root.rb_node; @@ -616,14 +572,12 @@ static struct nft_array *nft_array_alloc(u32 max_intervals) return array; } -#define NFT_ARRAY_EXTRA_SIZE 10240 - /* Similar to nft_rbtree_{u,k}size to hide details to userspace, but consider * packed representation coming from userspace for anonymous sets too. */ static u32 nft_array_elems(const struct nft_set *set) { - u32 nelems = atomic_read(&set->nelems); + u32 nelems = atomic_read(&set->nelems) - set->ndeact; /* Adjacent intervals are represented with a single start element in * anonymous sets, use the current element counter as is. @@ -639,27 +593,87 @@ static u32 nft_array_elems(const struct nft_set *set) return (nelems / 2) + 2; } -static int nft_array_may_resize(const struct nft_set *set) +#define NFT_ARRAY_INITIAL_SIZE 1024 +#define NFT_ARRAY_INITIAL_ANON_SIZE 16 +#define NFT_ARRAY_INITIAL_ANON_THRESH (8192U / sizeof(struct nft_array_interval)) + +static int nft_array_may_resize(const struct nft_set *set, bool flush) { - u32 nelems = nft_array_elems(set), new_max_intervals; + u32 initial_intervals, max_intervals, new_max_intervals, delta; + u32 shrinked_max_intervals, nelems = nft_array_elems(set); struct nft_rbtree *priv = nft_set_priv(set); struct nft_array *array; - if (!priv->array_next) { - array = nft_array_alloc(nelems + NFT_ARRAY_EXTRA_SIZE); - if (!array) - return -ENOMEM; + if (nft_set_is_anonymous(set)) + initial_intervals = NFT_ARRAY_INITIAL_ANON_SIZE; + else + initial_intervals = NFT_ARRAY_INITIAL_SIZE; + + if (priv->array_next) { + max_intervals = priv->array_next->max_intervals; + new_max_intervals = priv->array_next->max_intervals; + } else { + if (priv->array) { + max_intervals = priv->array->max_intervals; + new_max_intervals = priv->array->max_intervals; + } else { + max_intervals = 0; + new_max_intervals = initial_intervals; + } + } - priv->array_next = array; + if (nft_set_is_anonymous(set)) + goto maybe_grow; + + if (flush) { + /* Set flush just started, nelems still report elements.*/ + nelems = 0; + new_max_intervals = NFT_ARRAY_INITIAL_SIZE; + goto realloc_array; } - if (nelems < priv->array_next->max_intervals) - return 0; + if (check_add_overflow(new_max_intervals, new_max_intervals, + &shrinked_max_intervals)) + return -EOVERFLOW; + + shrinked_max_intervals = DIV_ROUND_UP(shrinked_max_intervals, 3); + + if (shrinked_max_intervals > NFT_ARRAY_INITIAL_SIZE && + nelems < shrinked_max_intervals) { + new_max_intervals = shrinked_max_intervals; + goto realloc_array; + } +maybe_grow: + if (nelems > new_max_intervals) { + if (nft_set_is_anonymous(set) && + new_max_intervals < NFT_ARRAY_INITIAL_ANON_THRESH) { + new_max_intervals <<= 1; + } else { + delta = new_max_intervals >> 1; + if (check_add_overflow(new_max_intervals, delta, + &new_max_intervals)) + return -EOVERFLOW; + } + } - new_max_intervals = priv->array_next->max_intervals + NFT_ARRAY_EXTRA_SIZE; - if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0) +realloc_array: + if (WARN_ON_ONCE(nelems > new_max_intervals)) return -ENOMEM; + if (priv->array_next) { + if (max_intervals == new_max_intervals) + return 0; + + if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0) + return -ENOMEM; + } else { + array = nft_array_alloc(new_max_intervals); + if (!array) + return -ENOMEM; + + priv->array_next = array; + } + return 0; } @@ -668,14 +682,13 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_elem_priv **elem_priv) { struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); - bool last = !!(elem->flags & NFT_SET_ELEM_INTERNAL_LAST); struct nft_rbtree *priv = nft_set_priv(set); u64 tstamp = nft_net_tstamp(net); int err; nft_rbtree_maybe_reset_start_cookie(priv, tstamp); - if (nft_array_may_resize(set) < 0) + if (nft_array_may_resize(set, false) < 0) return -ENOMEM; do { @@ -685,12 +698,8 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, cond_resched(); write_lock_bh(&priv->lock); - err = __nft_rbtree_insert(net, set, rbe, elem_priv, tstamp, last); + err = __nft_rbtree_insert(net, set, rbe, elem_priv, tstamp); write_unlock_bh(&priv->lock); - - if (nft_rbtree_interval_end(rbe)) - priv->start_rbe_cookie = 0; - } while (err == -EAGAIN); return err; @@ -778,7 +787,6 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv); - bool last = !!(elem->flags & NFT_SET_ELEM_INTERNAL_LAST); struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; u8 genmask = nft_genmask_next(net); @@ -791,7 +799,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, nft_rbtree_interval_null(set, this)) priv->start_rbe_cookie = 0; - if (nft_array_may_resize(set) < 0) + if (nft_array_may_resize(set, false) < 0) return NULL; while (parent != NULL) { @@ -819,10 +827,9 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, continue; } - if (nft_rbtree_interval_start(rbe)) { - if (!last) - nft_rbtree_set_start_cookie(priv, rbe); - } else if (!nft_rbtree_deactivate_same_interval(net, priv, rbe)) + if (nft_rbtree_interval_start(rbe)) + nft_rbtree_set_start_cookie(priv, rbe); + else if (!nft_rbtree_deactivate_same_interval(net, priv, rbe)) return NULL; nft_rbtree_flush(net, set, &rbe->priv); @@ -862,7 +869,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, switch (iter->type) { case NFT_ITER_UPDATE_CLONE: - if (nft_array_may_resize(set) < 0) { + if (nft_array_may_resize(set, true) < 0) { iter->err = -ENOMEM; break; } diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 3ba94c34297cf5..498f5871c84a0e 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -16,6 +16,7 @@ #include #include #include +#include "nf_internals.h" static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) { @@ -283,6 +284,9 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, struct nf_conn_help *help; if (ct) { + if (info->helper[0] || info->timeout[0]) + nf_queue_nf_hook_drop(par->net); + help = nfct_help(ct); xt_ct_put_helper(help); diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 00319d2a54da23..d9d74011bb6453 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -223,13 +223,13 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par) localtime_2(¤t_time, stamp); - if (!(info->weekdays_match & (1 << current_time.weekday))) + if (!(info->weekdays_match & (1U << current_time.weekday))) return false; /* Do not spend time computing monthday if all days match anyway */ if (info->monthdays_match != XT_TIME_ALL_MONTHDAYS) { localtime_3(¤t_time, stamp); - if (!(info->monthdays_match & (1 << current_time.monthday))) + if (!(info->monthdays_match & (1U << current_time.monthday))) return false; } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 43d871525dbc1c..5f46c4b5720f6c 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -579,8 +579,7 @@ static int nci_close_device(struct nci_dev *ndev) skb_queue_purge(&ndev->rx_q); skb_queue_purge(&ndev->tx_q); - /* Flush RX and TX wq */ - flush_workqueue(ndev->rx_wq); + /* Flush TX wq, RX wq flush can't be under the lock */ flush_workqueue(ndev->tx_wq); /* Reset device */ @@ -592,13 +591,13 @@ static int nci_close_device(struct nci_dev *ndev) msecs_to_jiffies(NCI_RESET_TIMEOUT)); /* After this point our queues are empty - * and no works are scheduled. + * rx work may be running but will see that NCI_UP was cleared */ ndev->ops->close(ndev); clear_bit(NCI_INIT, &ndev->flags); - /* Flush cmd wq */ + /* Flush cmd and tx wq */ flush_workqueue(ndev->cmd_wq); timer_delete_sync(&ndev->cmd_timer); @@ -613,6 +612,9 @@ static int nci_close_device(struct nci_dev *ndev) mutex_unlock(&ndev->req_lock); + /* rx_work may take req_lock via nci_deactivate_target */ + flush_workqueue(ndev->rx_wq); + return 0; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 67fbf6e48a3010..13052408a132f4 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2953,6 +2953,8 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_MPLS: if (!eth_p_mpls(eth_type)) return -EINVAL; + if (key_len != sizeof(struct ovs_key_mpls)) + return -EINVAL; break; case OVS_KEY_ATTR_SCTP: diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 6574f9bcdc0268..12055af832dc08 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -151,11 +151,15 @@ static void vport_netdev_free(struct rcu_head *rcu) void ovs_netdev_detach_dev(struct vport *vport) { ASSERT_RTNL(); - vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; netdev_rx_handler_unregister(vport->dev); netdev_upper_dev_unlink(vport->dev, netdev_master_upper_dev_get(vport->dev)); dev_set_promiscuity(vport->dev, -1); + + /* paired with smp_mb() in netdev_destroy() */ + smp_wmb(); + + vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; } static void netdev_destroy(struct vport *vport) @@ -174,6 +178,9 @@ static void netdev_destroy(struct vport *vport) rtnl_unlock(); } + /* paired with smp_wmb() in ovs_netdev_detach_dev() */ + smp_mb(); + call_rcu(&vport->rcu, vport_netdev_free); } @@ -189,8 +196,6 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev, 0, NULL); - netdev_put(vport->dev, &vport->dev_tracker); - vport->dev = NULL; rtnl_unlock(); call_rcu(&vport->rcu, vport_netdev_free); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 72d0935139f0f5..bb2d88205e5a65 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3135,6 +3135,7 @@ static int packet_release(struct socket *sock) spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); + WRITE_ONCE(po->num, 0); packet_cached_dev_reset(po); if (po->prot_hook.dev) { diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 238a9638d2b0f6..d89225d6bfd3bf 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -129,9 +129,12 @@ static int pn_header_create(struct sk_buff *skb, struct net_device *dev, return 1; } -static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr) +static int pn_header_parse(const struct sk_buff *skb, + const struct net_device *dev, + unsigned char *haddr) { const u8 *media = skb_mac_header(skb); + *haddr = *media; return 1; } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 841d62481048de..ba56213e0a2aaf 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -811,6 +811,11 @@ static int rose_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int goto out_release; } + if (sk->sk_state == TCP_SYN_SENT) { + err = -EALREADY; + goto out_release; + } + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 98ffe64de51f43..9e726c3bd86bce 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1288,33 +1288,6 @@ static void dev_deactivate_queue(struct net_device *dev, } } -static void dev_reset_queue(struct net_device *dev, - struct netdev_queue *dev_queue, - void *_unused) -{ - struct Qdisc *qdisc; - bool nolock; - - qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); - if (!qdisc) - return; - - nolock = qdisc->flags & TCQ_F_NOLOCK; - - if (nolock) - spin_lock_bh(&qdisc->seqlock); - spin_lock_bh(qdisc_lock(qdisc)); - - qdisc_reset(qdisc); - - spin_unlock_bh(qdisc_lock(qdisc)); - if (nolock) { - clear_bit(__QDISC_STATE_MISSED, &qdisc->state); - clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); - spin_unlock_bh(&qdisc->seqlock); - } -} - static bool some_qdisc_is_busy(struct net_device *dev) { unsigned int i; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index cc6051d4f2ef88..c3e18bae8fbfc7 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -113,14 +113,15 @@ static void ingress_destroy(struct Qdisc *sch) { struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - struct bpf_mprog_entry *entry = rtnl_dereference(dev->tcx_ingress); + struct bpf_mprog_entry *entry; if (sch->parent != TC_H_INGRESS) return; tcf_block_put_ext(q->block, sch, &q->block_info); - if (entry) { + if (mini_qdisc_pair_inited(&q->miniqp)) { + entry = rtnl_dereference(dev->tcx_ingress); tcx_miniq_dec(entry); if (!tcx_entry_is_active(entry)) { tcx_entry_update(dev, NULL, true); @@ -290,10 +291,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, static void clsact_destroy(struct Qdisc *sch) { + struct bpf_mprog_entry *ingress_entry, *egress_entry; struct clsact_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - struct bpf_mprog_entry *ingress_entry = rtnl_dereference(dev->tcx_ingress); - struct bpf_mprog_entry *egress_entry = rtnl_dereference(dev->tcx_egress); if (sch->parent != TC_H_CLSACT) return; @@ -301,7 +301,8 @@ static void clsact_destroy(struct Qdisc *sch) tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); - if (ingress_entry) { + if (mini_qdisc_pair_inited(&q->miniqp_ingress)) { + ingress_entry = rtnl_dereference(dev->tcx_ingress); tcx_miniq_dec(ingress_entry); if (!tcx_entry_is_active(ingress_entry)) { tcx_entry_update(dev, NULL, true); @@ -309,7 +310,8 @@ static void clsact_destroy(struct Qdisc *sch) } } - if (egress_entry) { + if (mini_qdisc_pair_inited(&q->miniqp_egress)) { + egress_entry = rtnl_dereference(dev->tcx_egress); tcx_miniq_dec(egress_entry); if (!tcx_entry_is_active(egress_entry)) { tcx_entry_update(dev, NULL, false); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 783300d8b01975..ec4039a201a2c2 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -146,15 +146,12 @@ teql_destroy(struct Qdisc *sch) master->slaves = NEXT_SLAVE(q); if (q == master->slaves) { struct netdev_queue *txq; - spinlock_t *root_lock; txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; - root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc)); - spin_lock_bh(root_lock); - qdisc_reset(rtnl_dereference(txq->qdisc)); - spin_unlock_bh(root_lock); + dev_reset_queue(master->dev, + txq, NULL); } } skb_queue_purge(&dat->q); diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 3fd6629cb99923..94bc9c7382ea62 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -36,29 +36,26 @@ static struct net_shaper_binding *net_shaper_binding_from_ctx(void *ctx) return &((struct net_shaper_nl_ctx *)ctx)->binding; } -static void net_shaper_lock(struct net_shaper_binding *binding) +static struct net_shaper_hierarchy * +net_shaper_hierarchy(struct net_shaper_binding *binding) { - switch (binding->type) { - case NET_SHAPER_BINDING_TYPE_NETDEV: - netdev_lock(binding->netdev); - break; - } -} + /* Pairs with WRITE_ONCE() in net_shaper_hierarchy_setup. */ + if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV) + return READ_ONCE(binding->netdev->net_shaper_hierarchy); -static void net_shaper_unlock(struct net_shaper_binding *binding) -{ - switch (binding->type) { - case NET_SHAPER_BINDING_TYPE_NETDEV: - netdev_unlock(binding->netdev); - break; - } + /* No other type supported yet. */ + return NULL; } static struct net_shaper_hierarchy * -net_shaper_hierarchy(struct net_shaper_binding *binding) +net_shaper_hierarchy_rcu(struct net_shaper_binding *binding) { - /* Pairs with WRITE_ONCE() in net_shaper_hierarchy_setup. */ - if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV) + /* Readers look up the device and take a ref, then take RCU lock + * later at which point netdev may have been unregistered and flushed. + * READ_ONCE() pairs with WRITE_ONCE() in net_shaper_hierarchy_setup. + */ + if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV && + READ_ONCE(binding->netdev->reg_state) <= NETREG_REGISTERED) return READ_ONCE(binding->netdev->net_shaper_hierarchy); /* No other type supported yet. */ @@ -204,12 +201,49 @@ static int net_shaper_ctx_setup(const struct genl_info *info, int type, return 0; } +/* Like net_shaper_ctx_setup(), but for "write" handlers (never for dumps!) + * Acquires the lock protecting the hierarchy (instance lock for netdev). + */ +static int net_shaper_ctx_setup_lock(const struct genl_info *info, int type, + struct net_shaper_nl_ctx *ctx) +{ + struct net *ns = genl_info_net(info); + struct net_device *dev; + int ifindex; + + if (GENL_REQ_ATTR_CHECK(info, type)) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[type]); + dev = netdev_get_by_index_lock(ns, ifindex); + if (!dev) { + NL_SET_BAD_ATTR(info->extack, info->attrs[type]); + return -ENOENT; + } + + if (!dev->netdev_ops->net_shaper_ops) { + NL_SET_BAD_ATTR(info->extack, info->attrs[type]); + netdev_unlock(dev); + return -EOPNOTSUPP; + } + + ctx->binding.type = NET_SHAPER_BINDING_TYPE_NETDEV; + ctx->binding.netdev = dev; + return 0; +} + static void net_shaper_ctx_cleanup(struct net_shaper_nl_ctx *ctx) { if (ctx->binding.type == NET_SHAPER_BINDING_TYPE_NETDEV) netdev_put(ctx->binding.netdev, &ctx->dev_tracker); } +static void net_shaper_ctx_cleanup_unlock(struct net_shaper_nl_ctx *ctx) +{ + if (ctx->binding.type == NET_SHAPER_BINDING_TYPE_NETDEV) + netdev_unlock(ctx->binding.netdev); +} + static u32 net_shaper_handle_to_index(const struct net_shaper_handle *handle) { return FIELD_PREP(NET_SHAPER_SCOPE_MASK, handle->scope) | @@ -251,9 +285,10 @@ static struct net_shaper * net_shaper_lookup(struct net_shaper_binding *binding, const struct net_shaper_handle *handle) { - struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); u32 index = net_shaper_handle_to_index(handle); + struct net_shaper_hierarchy *hierarchy; + hierarchy = net_shaper_hierarchy_rcu(binding); if (!hierarchy || xa_get_mark(&hierarchy->shapers, index, NET_SHAPER_NOT_VALID)) return NULL; @@ -262,7 +297,7 @@ net_shaper_lookup(struct net_shaper_binding *binding, } /* Allocate on demand the per device shaper's hierarchy container. - * Called under the net shaper lock + * Called under the lock protecting the hierarchy (instance lock for netdev) */ static struct net_shaper_hierarchy * net_shaper_hierarchy_setup(struct net_shaper_binding *binding) @@ -681,6 +716,22 @@ void net_shaper_nl_post_doit(const struct genl_split_ops *ops, net_shaper_generic_post(info); } +int net_shaper_nl_pre_doit_write(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)info->ctx; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(info->ctx)); + + return net_shaper_ctx_setup_lock(info, NET_SHAPER_A_IFINDEX, ctx); +} + +void net_shaper_nl_post_doit_write(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + net_shaper_ctx_cleanup_unlock((struct net_shaper_nl_ctx *)info->ctx); +} + int net_shaper_nl_pre_dumpit(struct netlink_callback *cb) { struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; @@ -778,17 +829,19 @@ int net_shaper_nl_get_dumpit(struct sk_buff *skb, /* Don't error out dumps performed before any set operation. */ binding = net_shaper_binding_from_ctx(ctx); - hierarchy = net_shaper_hierarchy(binding); - if (!hierarchy) - return 0; rcu_read_lock(); + hierarchy = net_shaper_hierarchy_rcu(binding); + if (!hierarchy) + goto out_unlock; + for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index, U32_MAX, XA_PRESENT)); ctx->start_index++) { ret = net_shaper_fill_one(skb, binding, shaper, info); if (ret) break; } +out_unlock: rcu_read_unlock(); return ret; @@ -806,45 +859,38 @@ int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info) binding = net_shaper_binding_from_ctx(info->ctx); - net_shaper_lock(binding); ret = net_shaper_parse_info(binding, info->attrs, info, &shaper, &exists); if (ret) - goto unlock; + return ret; if (!exists) net_shaper_default_parent(&shaper.handle, &shaper.parent); hierarchy = net_shaper_hierarchy_setup(binding); - if (!hierarchy) { - ret = -ENOMEM; - goto unlock; - } + if (!hierarchy) + return -ENOMEM; /* The 'set' operation can't create node-scope shapers. */ handle = shaper.handle; if (handle.scope == NET_SHAPER_SCOPE_NODE && - !net_shaper_lookup(binding, &handle)) { - ret = -ENOENT; - goto unlock; - } + !net_shaper_lookup(binding, &handle)) + return -ENOENT; ret = net_shaper_pre_insert(binding, &handle, info->extack); if (ret) - goto unlock; + return ret; ops = net_shaper_ops(binding); ret = ops->set(binding, &shaper, info->extack); if (ret) { net_shaper_rollback(binding); - goto unlock; + return ret; } net_shaper_commit(binding, 1, &shaper); -unlock: - net_shaper_unlock(binding); - return ret; + return 0; } static int __net_shaper_delete(struct net_shaper_binding *binding, @@ -1072,35 +1118,26 @@ int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info) binding = net_shaper_binding_from_ctx(info->ctx); - net_shaper_lock(binding); ret = net_shaper_parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info, &handle); if (ret) - goto unlock; + return ret; hierarchy = net_shaper_hierarchy(binding); - if (!hierarchy) { - ret = -ENOENT; - goto unlock; - } + if (!hierarchy) + return -ENOENT; shaper = net_shaper_lookup(binding, &handle); - if (!shaper) { - ret = -ENOENT; - goto unlock; - } + if (!shaper) + return -ENOENT; if (handle.scope == NET_SHAPER_SCOPE_NODE) { ret = net_shaper_pre_del_node(binding, shaper, info->extack); if (ret) - goto unlock; + return ret; } - ret = __net_shaper_delete(binding, shaper, info->extack); - -unlock: - net_shaper_unlock(binding); - return ret; + return __net_shaper_delete(binding, shaper, info->extack); } static int net_shaper_group_send_reply(struct net_shaper_binding *binding, @@ -1149,21 +1186,17 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) if (!net_shaper_ops(binding)->group) return -EOPNOTSUPP; - net_shaper_lock(binding); leaves_count = net_shaper_list_len(info, NET_SHAPER_A_LEAVES); if (!leaves_count) { NL_SET_BAD_ATTR(info->extack, info->attrs[NET_SHAPER_A_LEAVES]); - ret = -EINVAL; - goto unlock; + return -EINVAL; } leaves = kcalloc(leaves_count, sizeof(struct net_shaper) + sizeof(struct net_shaper *), GFP_KERNEL); - if (!leaves) { - ret = -ENOMEM; - goto unlock; - } + if (!leaves) + return -ENOMEM; old_nodes = (void *)&leaves[leaves_count]; ret = net_shaper_parse_node(binding, info->attrs, info, &node); @@ -1240,9 +1273,6 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) free_leaves: kfree(leaves); - -unlock: - net_shaper_unlock(binding); return ret; free_msg: @@ -1352,14 +1382,12 @@ static void net_shaper_flush(struct net_shaper_binding *binding) if (!hierarchy) return; - net_shaper_lock(binding); xa_lock(&hierarchy->shapers); xa_for_each(&hierarchy->shapers, index, cur) { __xa_erase(&hierarchy->shapers, index); kfree(cur); } xa_unlock(&hierarchy->shapers); - net_shaper_unlock(binding); kfree(hierarchy); } diff --git a/net/shaper/shaper_nl_gen.c b/net/shaper/shaper_nl_gen.c index e8cccc4c118035..9b29be3ef19a85 100644 --- a/net/shaper/shaper_nl_gen.c +++ b/net/shaper/shaper_nl_gen.c @@ -99,27 +99,27 @@ static const struct genl_split_ops net_shaper_nl_ops[] = { }, { .cmd = NET_SHAPER_CMD_SET, - .pre_doit = net_shaper_nl_pre_doit, + .pre_doit = net_shaper_nl_pre_doit_write, .doit = net_shaper_nl_set_doit, - .post_doit = net_shaper_nl_post_doit, + .post_doit = net_shaper_nl_post_doit_write, .policy = net_shaper_set_nl_policy, .maxattr = NET_SHAPER_A_IFINDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { .cmd = NET_SHAPER_CMD_DELETE, - .pre_doit = net_shaper_nl_pre_doit, + .pre_doit = net_shaper_nl_pre_doit_write, .doit = net_shaper_nl_delete_doit, - .post_doit = net_shaper_nl_post_doit, + .post_doit = net_shaper_nl_post_doit_write, .policy = net_shaper_delete_nl_policy, .maxattr = NET_SHAPER_A_IFINDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { .cmd = NET_SHAPER_CMD_GROUP, - .pre_doit = net_shaper_nl_pre_doit, + .pre_doit = net_shaper_nl_pre_doit_write, .doit = net_shaper_nl_group_doit, - .post_doit = net_shaper_nl_post_doit, + .post_doit = net_shaper_nl_post_doit_write, .policy = net_shaper_group_nl_policy, .maxattr = NET_SHAPER_A_LEAVES, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, diff --git a/net/shaper/shaper_nl_gen.h b/net/shaper/shaper_nl_gen.h index ec41c90431a4c9..42c46c52c77513 100644 --- a/net/shaper/shaper_nl_gen.h +++ b/net/shaper/shaper_nl_gen.h @@ -18,12 +18,17 @@ extern const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGH int net_shaper_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_pre_doit_write(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); int net_shaper_nl_cap_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); void net_shaper_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); void +net_shaper_nl_post_doit_write(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); +void net_shaper_nl_cap_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); int net_shaper_nl_pre_dumpit(struct netlink_callback *cb); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d0119afcc6a1f4..1a565095376aab 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -131,7 +131,14 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, struct smc_sock *smc; struct sock *child; - smc = smc_clcsock_user_data(sk); + rcu_read_lock(); + smc = smc_clcsock_user_data_rcu(sk); + if (!smc || !refcount_inc_not_zero(&smc->sk.sk_refcnt)) { + rcu_read_unlock(); + smc = NULL; + goto drop; + } + rcu_read_unlock(); if (READ_ONCE(sk->sk_ack_backlog) + atomic_read(&smc->queued_smc_hs) > sk->sk_max_ack_backlog) @@ -153,11 +160,14 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, if (inet_csk(child)->icsk_af_ops == inet_csk(sk)->icsk_af_ops) inet_csk(child)->icsk_af_ops = smc->ori_af_ops; } + sock_put(&smc->sk); return child; drop: dst_release(dst); tcp_listendrop(sk); + if (smc) + sock_put(&smc->sk); return NULL; } @@ -254,7 +264,7 @@ static void smc_fback_restore_callbacks(struct smc_sock *smc) struct sock *clcsk = smc->clcsock->sk; write_lock_bh(&clcsk->sk_callback_lock); - clcsk->sk_user_data = NULL; + rcu_assign_sk_user_data(clcsk, NULL); smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change); smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready); @@ -902,7 +912,7 @@ static void smc_fback_replace_callbacks(struct smc_sock *smc) struct sock *clcsk = smc->clcsock->sk; write_lock_bh(&clcsk->sk_callback_lock); - clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + __rcu_assign_sk_user_data_with_flags(clcsk, smc, SK_USER_DATA_NOCOPY); smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change, &smc->clcsk_state_change); @@ -2665,8 +2675,8 @@ int smc_listen(struct socket *sock, int backlog) * smc-specific sk_data_ready function */ write_lock_bh(&smc->clcsock->sk->sk_callback_lock); - smc->clcsock->sk->sk_user_data = - (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + __rcu_assign_sk_user_data_with_flags(smc->clcsock->sk, smc, + SK_USER_DATA_NOCOPY); smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready, smc_clcsock_data_ready, &smc->clcsk_data_ready); write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); @@ -2687,10 +2697,11 @@ int smc_listen(struct socket *sock, int backlog) write_lock_bh(&smc->clcsock->sk->sk_callback_lock); smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, &smc->clcsk_data_ready); - smc->clcsock->sk->sk_user_data = NULL; + rcu_assign_sk_user_data(smc->clcsock->sk, NULL); write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); goto out; } + sock_set_flag(sk, SOCK_RCU_FREE); sk->sk_max_ack_backlog = backlog; sk->sk_ack_backlog = 0; sk->sk_state = SMC_LISTEN; diff --git a/net/smc/smc.h b/net/smc/smc.h index 9e6af72784baa8..52145df83f6e7b 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -346,6 +346,11 @@ static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk) ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY); } +static inline struct smc_sock *smc_clcsock_user_data_rcu(const struct sock *clcsk) +{ + return (struct smc_sock *)rcu_dereference_sk_user_data(clcsk); +} + /* save target_cb in saved_cb, and replace target_cb with new_cb */ static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *), void (*new_cb)(struct sock *), diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 10219f55aad14d..bb0313ef5f7c1b 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -218,7 +218,7 @@ int smc_close_active(struct smc_sock *smc) write_lock_bh(&smc->clcsock->sk->sk_callback_lock); smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, &smc->clcsk_data_ready); - smc->clcsock->sk->sk_user_data = NULL; + rcu_assign_sk_user_data(smc->clcsock->sk, NULL); write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); } diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index d833e36f7fd40d..c1d9b923938dad 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -135,9 +135,16 @@ static void smc_rx_pipe_buf_release(struct pipe_inode_info *pipe, sock_put(sk); } +static bool smc_rx_pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + /* smc_spd_priv in buf->private is not shareable; disallow cloning. */ + return false; +} + static const struct pipe_buf_operations smc_pipe_ops = { .release = smc_rx_pipe_buf_release, - .get = generic_pipe_buf_get + .get = smc_rx_pipe_buf_get, }; static void smc_rx_spd_release(struct splice_pipe_desc *spd, diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 237f67a5d004cf..ef8b7e8b1e9c99 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1062,14 +1062,25 @@ static int cache_release(struct inode *inode, struct file *filp, struct cache_reader *rp = filp->private_data; if (rp) { + struct cache_request *rq = NULL; + spin_lock(&queue_lock); if (rp->offset) { struct cache_queue *cq; - for (cq= &rp->q; &cq->list != &cd->queue; - cq = list_entry(cq->list.next, struct cache_queue, list)) + for (cq = &rp->q; &cq->list != &cd->queue; + cq = list_entry(cq->list.next, + struct cache_queue, list)) if (!cq->reader) { - container_of(cq, struct cache_request, q) - ->readers--; + struct cache_request *cr = + container_of(cq, + struct cache_request, q); + cr->readers--; + if (cr->readers == 0 && + !test_bit(CACHE_PENDING, + &cr->item->flags)) { + list_del(&cr->q.list); + rq = cr; + } break; } rp->offset = 0; @@ -1077,9 +1088,14 @@ static int cache_release(struct inode *inode, struct file *filp, list_del(&rp->q.list); spin_unlock(&queue_lock); + if (rq) { + cache_put(rq->item, cd); + kfree(rq->buf); + kfree(rq); + } + filp->private_data = NULL; kfree(rp); - } if (filp->f_mode & FMODE_WRITE) { atomic_dec(&cd->writers); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 5fe07f110fe8c7..dd9dda759bbb8c 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -246,6 +246,7 @@ static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); atomic_inc(&ctx->decrypt_pending); + __skb_queue_purge(&ctx->async_hold); return ctx->async_wait.err; } @@ -2225,7 +2226,6 @@ int tls_sw_recvmsg(struct sock *sk, /* Wait for all previously submitted records to be decrypted */ ret = tls_decrypt_async_wait(ctx); - __skb_queue_purge(&ctx->async_hold); if (ret) { if (err >= 0 || err == -EINPROGRESS) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7eaa5b187fef4d..b23c33df8b4654 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1958,6 +1958,8 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) { scm->fp = scm_fp_dup(UNIXCB(skb).fp); + + unix_peek_fpl(scm->fp); } static void unix_destruct_scm(struct sk_buff *skb) diff --git a/net/unix/af_unix.h b/net/unix/af_unix.h index c4f1b2da363def..8119dbeef3a3c6 100644 --- a/net/unix/af_unix.h +++ b/net/unix/af_unix.h @@ -29,6 +29,7 @@ void unix_del_edges(struct scm_fp_list *fpl); void unix_update_edges(struct unix_sock *receiver); int unix_prepare_fpl(struct scm_fp_list *fpl); void unix_destroy_fpl(struct scm_fp_list *fpl); +void unix_peek_fpl(struct scm_fp_list *fpl); void unix_schedule_gc(struct user_struct *user); /* SOCK_DIAG */ diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 816e8fa2b06246..a7967a34582737 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -318,6 +318,25 @@ void unix_destroy_fpl(struct scm_fp_list *fpl) unix_free_vertices(fpl); } +static bool gc_in_progress; +static seqcount_t unix_peek_seq = SEQCNT_ZERO(unix_peek_seq); + +void unix_peek_fpl(struct scm_fp_list *fpl) +{ + static DEFINE_SPINLOCK(unix_peek_lock); + + if (!fpl || !fpl->count_unix) + return; + + if (!READ_ONCE(gc_in_progress)) + return; + + /* Invalidate the final refcnt check in unix_vertex_dead(). */ + spin_lock(&unix_peek_lock); + raw_write_seqcount_barrier(&unix_peek_seq); + spin_unlock(&unix_peek_lock); +} + static bool unix_vertex_dead(struct unix_vertex *vertex) { struct unix_edge *edge; @@ -351,6 +370,36 @@ static bool unix_vertex_dead(struct unix_vertex *vertex) return true; } +static LIST_HEAD(unix_visited_vertices); +static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2; + +static bool unix_scc_dead(struct list_head *scc, bool fast) +{ + struct unix_vertex *vertex; + bool scc_dead = true; + unsigned int seq; + + seq = read_seqcount_begin(&unix_peek_seq); + + list_for_each_entry_reverse(vertex, scc, scc_entry) { + /* Don't restart DFS from this vertex. */ + list_move_tail(&vertex->entry, &unix_visited_vertices); + + /* Mark vertex as off-stack for __unix_walk_scc(). */ + if (!fast) + vertex->index = unix_vertex_grouped_index; + + if (scc_dead) + scc_dead = unix_vertex_dead(vertex); + } + + /* If MSG_PEEK intervened, defer this SCC to the next round. */ + if (read_seqcount_retry(&unix_peek_seq, seq)) + return false; + + return scc_dead; +} + static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist) { struct unix_vertex *vertex; @@ -404,9 +453,6 @@ static bool unix_scc_cyclic(struct list_head *scc) return false; } -static LIST_HEAD(unix_visited_vertices); -static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2; - static unsigned long __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_index, struct sk_buff_head *hitlist) @@ -474,9 +520,7 @@ static unsigned long __unix_walk_scc(struct unix_vertex *vertex, } if (vertex->index == vertex->scc_index) { - struct unix_vertex *v; struct list_head scc; - bool scc_dead = true; /* SCC finalised. * @@ -485,18 +529,7 @@ static unsigned long __unix_walk_scc(struct unix_vertex *vertex, */ __list_cut_position(&scc, &vertex_stack, &vertex->scc_entry); - list_for_each_entry_reverse(v, &scc, scc_entry) { - /* Don't restart DFS from this vertex in unix_walk_scc(). */ - list_move_tail(&v->entry, &unix_visited_vertices); - - /* Mark vertex as off-stack. */ - v->index = unix_vertex_grouped_index; - - if (scc_dead) - scc_dead = unix_vertex_dead(v); - } - - if (scc_dead) { + if (unix_scc_dead(&scc, false)) { unix_collect_skb(&scc, hitlist); } else { if (unix_vertex_max_scc_index < vertex->scc_index) @@ -550,19 +583,11 @@ static void unix_walk_scc_fast(struct sk_buff_head *hitlist) while (!list_empty(&unix_unvisited_vertices)) { struct unix_vertex *vertex; struct list_head scc; - bool scc_dead = true; vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry); list_add(&scc, &vertex->scc_entry); - list_for_each_entry_reverse(vertex, &scc, scc_entry) { - list_move_tail(&vertex->entry, &unix_visited_vertices); - - if (scc_dead) - scc_dead = unix_vertex_dead(vertex); - } - - if (scc_dead) { + if (unix_scc_dead(&scc, true)) { cyclic_sccs--; unix_collect_skb(&scc, hitlist); } @@ -577,8 +602,6 @@ static void unix_walk_scc_fast(struct sk_buff_head *hitlist) cyclic_sccs ? UNIX_GRAPH_CYCLIC : UNIX_GRAPH_NOT_CYCLIC); } -static bool gc_in_progress; - static void unix_gc(struct work_struct *work) { struct sk_buff_head hitlist; diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 44bd88c9ea6649..50e8e19aa366c2 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -664,6 +664,7 @@ void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev) } spin_unlock_bh(&wdev->pmsr_lock); + cancel_work_sync(&wdev->pmsr_free_wk); if (found) cfg80211_pmsr_process_abort(wdev); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 4ed346e682c7eb..dc1312ed5a0955 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -75,7 +75,10 @@ int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) spin_lock_bh(&xfrm_input_afinfo_lock); if (likely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family])) { - if (unlikely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family] != afinfo)) + const struct xfrm_input_afinfo *cur; + + cur = rcu_access_pointer(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family]); + if (unlikely(cur != afinfo)) err = -EINVAL; else RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family], NULL); diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 050a82101ca518..97bc979e55baf9 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -901,6 +901,12 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq, iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) { iptfs_skb_add_frags(newskb, fragwalk, data, copylen); } else { + if (skb_linearize(newskb)) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINBUFFERERROR); + goto abandon; + } + /* copy fragment data into newskb */ if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), copylen)) { @@ -991,6 +997,11 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, iplen = be16_to_cpu(iph->tot_len); iphlen = iph->ihl << 2; + if (iplen < iphlen || iphlen < sizeof(*iph)) { + XFRM_INC_STATS(net, + LINUX_MIB_XFRMINHDRERROR); + goto done; + } protocol = cpu_to_be16(ETH_P_IP); XFRM_MODE_SKB_CB(skbseq->root_skb)->tos = iph->tos; } else if (iph->version == 0x6) { @@ -2653,9 +2664,6 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) if (!xtfs) return -ENOMEM; - x->mode_data = xtfs; - xtfs->x = x; - xtfs->ra_newskb = NULL; if (xtfs->cfg.reorder_win_size) { xtfs->w_saved = kzalloc_objs(*xtfs->w_saved, @@ -2666,6 +2674,9 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) } } + x->mode_data = xtfs; + xtfs->x = x; + return 0; } diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c index ebf95d48e86c14..1856beee0149bb 100644 --- a/net/xfrm/xfrm_nat_keepalive.c +++ b/net/xfrm/xfrm_nat_keepalive.c @@ -261,7 +261,7 @@ int __net_init xfrm_nat_keepalive_net_init(struct net *net) int xfrm_nat_keepalive_net_fini(struct net *net) { - cancel_delayed_work_sync(&net->xfrm.nat_keepalive_work); + disable_delayed_work_sync(&net->xfrm.nat_keepalive_work); return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9e1a522ab1c527..362939aa56cfb7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4156,7 +4156,7 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo) int i; for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) { - if (xfrm_policy_afinfo[i] != afinfo) + if (rcu_access_pointer(xfrm_policy_afinfo[i]) != afinfo) continue; RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL); break; @@ -4242,7 +4242,7 @@ static int __net_init xfrm_policy_init(struct net *net) net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0; htab = &net->xfrm.policy_bydst[dir]; - htab->table = xfrm_hash_alloc(sz); + rcu_assign_pointer(htab->table, xfrm_hash_alloc(sz)); if (!htab->table) goto out_bydst; htab->hmask = hmask; @@ -4269,7 +4269,7 @@ static int __net_init xfrm_policy_init(struct net *net) struct xfrm_policy_hash *htab; htab = &net->xfrm.policy_bydst[dir]; - xfrm_hash_free(htab->table, sz); + xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz); } xfrm_hash_free(net->xfrm.policy_byidx, sz); out_byidx: @@ -4282,6 +4282,8 @@ static void xfrm_policy_fini(struct net *net) unsigned int sz; int dir; + disable_work_sync(&net->xfrm.policy_hthresh.work); + flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); @@ -4295,8 +4297,8 @@ static void xfrm_policy_fini(struct net *net) htab = &net->xfrm.policy_bydst[dir]; sz = (htab->hmask + 1) * sizeof(struct hlist_head); - WARN_ON(!hlist_empty(htab->table)); - xfrm_hash_free(htab->table, sz); + WARN_ON(!hlist_empty(rcu_dereference_protected(htab->table, true))); + xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz); } sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 98b362d518363b..1748d374abcab3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -53,7 +53,7 @@ static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); static HLIST_HEAD(xfrm_state_gc_list); static HLIST_HEAD(xfrm_state_dev_gc_list); -static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) +static inline bool xfrm_state_hold_rcu(struct xfrm_state *x) { return refcount_inc_not_zero(&x->refcnt); } @@ -870,7 +870,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, task_valid); @@ -891,7 +891,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool struct xfrm_state *x; struct xfrm_dev_offload *xso; - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { xso = &x->xso; if (xso->dev == dev && @@ -931,7 +931,7 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; restart: - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -973,7 +973,7 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali err = -ESRCH; for (i = 0; i <= net->xfrm.state_hmask; i++) { restart: - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { xso = &x->xso; if (!xfrm_state_kern(x) && xso->dev == dev) { @@ -1563,23 +1563,23 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, list_add(&x->km.all, &net->xfrm.state_all); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); XFRM_STATE_INSERT(bydst, &x->bydst, - net->xfrm.state_bydst + h, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, daddr, saddr, encap_family); XFRM_STATE_INSERT(bysrc, &x->bysrc, - net->xfrm.state_bysrc + h, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); INIT_HLIST_NODE(&x->state_cache); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); XFRM_STATE_INSERT(byspi, &x->byspi, - net->xfrm.state_byspi + h, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, x->xso.type); } if (x->km.seq) { h = xfrm_seq_hash(net, x->km.seq); XFRM_STATE_INSERT(byseq, &x->byseq, - net->xfrm.state_byseq + h, + xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, x->xso.type); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; @@ -1652,7 +1652,7 @@ xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, reqid, family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && @@ -1703,18 +1703,12 @@ static struct xfrm_state *xfrm_state_lookup_spi_proto(struct net *net, __be32 sp struct xfrm_state *x; unsigned int i; - rcu_read_lock(); for (i = 0; i <= net->xfrm.state_hmask; i++) { - hlist_for_each_entry_rcu(x, &net->xfrm.state_byspi[i], byspi) { - if (x->id.spi == spi && x->id.proto == proto) { - if (!xfrm_state_hold_rcu(x)) - continue; - rcu_read_unlock(); + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byspi, net) + i, byspi) { + if (x->id.spi == spi && x->id.proto == proto) return x; - } } } - rcu_read_unlock(); return NULL; } @@ -1730,25 +1724,29 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); - XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, + XFRM_STATE_INSERT(bydst, &x->bydst, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); - XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h, + XFRM_STATE_INSERT(bysrc, &x->bysrc, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, + XFRM_STATE_INSERT(byspi, &x->byspi, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, x->xso.type); } if (x->km.seq) { h = xfrm_seq_hash(net, x->km.seq); - XFRM_STATE_INSERT(byseq, &x->byseq, net->xfrm.state_byseq + h, + XFRM_STATE_INSERT(byseq, &x->byseq, + xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, x->xso.type); } @@ -1775,7 +1773,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) u32 cpu_id = xnew->pcpu_num; h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.family == family && x->props.reqid == reqid && x->if_id == if_id && @@ -1811,7 +1809,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_state *x; u32 mark = m->v & m->m; - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -1868,10 +1866,12 @@ static struct xfrm_state *__find_acq_core(struct net *net, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL_SOFT); list_add(&x->km.all, &net->xfrm.state_all); - XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, + XFRM_STATE_INSERT(bydst, &x->bydst, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, daddr, saddr, family); - XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h, + XFRM_STATE_INSERT(bysrc, &x->bysrc, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); net->xfrm.state_num++; @@ -2091,7 +2091,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n if (m->reqid) { h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -2110,7 +2110,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n } else { h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -2264,6 +2264,7 @@ int xfrm_state_update(struct xfrm_state *x) err = 0; x->km.state = XFRM_STATE_DEAD; + xfrm_dev_state_delete(x); __xfrm_state_put(x); } @@ -2312,7 +2313,7 @@ void xfrm_state_update_stats(struct net *net) spin_lock_bh(&net->xfrm.xfrm_state_lock); for (i = 0; i <= net->xfrm.state_hmask; i++) { - hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst) + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) xfrm_dev_state_update_stats(x); } spin_unlock_bh(&net->xfrm.xfrm_state_lock); @@ -2503,7 +2504,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s unsigned int h = xfrm_seq_hash(net, seq); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, byseq) { if (x->km.seq == seq && (mark & x->mark.m) == x->mark.v && x->pcpu_num == pcpu_num && @@ -2602,12 +2603,13 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high, if (!x0) { x->id.spi = newspi; h = xfrm_spi_hash(net, &x->id.daddr, newspi, x->id.proto, x->props.family); - XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, x->xso.type); + XFRM_STATE_INSERT(byspi, &x->byspi, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, + x->xso.type); spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = 0; goto unlock; } - xfrm_state_put(x0); spin_unlock_bh(&net->xfrm.xfrm_state_lock); next: @@ -3258,6 +3260,7 @@ EXPORT_SYMBOL(xfrm_init_state); int __net_init xfrm_state_init(struct net *net) { + struct hlist_head *ndst, *nsrc, *nspi, *nseq; unsigned int sz; if (net_eq(net, &init_net)) @@ -3268,18 +3271,25 @@ int __net_init xfrm_state_init(struct net *net) sz = sizeof(struct hlist_head) * 8; - net->xfrm.state_bydst = xfrm_hash_alloc(sz); - if (!net->xfrm.state_bydst) + ndst = xfrm_hash_alloc(sz); + if (!ndst) goto out_bydst; - net->xfrm.state_bysrc = xfrm_hash_alloc(sz); - if (!net->xfrm.state_bysrc) + rcu_assign_pointer(net->xfrm.state_bydst, ndst); + + nsrc = xfrm_hash_alloc(sz); + if (!nsrc) goto out_bysrc; - net->xfrm.state_byspi = xfrm_hash_alloc(sz); - if (!net->xfrm.state_byspi) + rcu_assign_pointer(net->xfrm.state_bysrc, nsrc); + + nspi = xfrm_hash_alloc(sz); + if (!nspi) goto out_byspi; - net->xfrm.state_byseq = xfrm_hash_alloc(sz); - if (!net->xfrm.state_byseq) + rcu_assign_pointer(net->xfrm.state_byspi, nspi); + + nseq = xfrm_hash_alloc(sz); + if (!nseq) goto out_byseq; + rcu_assign_pointer(net->xfrm.state_byseq, nseq); net->xfrm.state_cache_input = alloc_percpu(struct hlist_head); if (!net->xfrm.state_cache_input) @@ -3295,17 +3305,19 @@ int __net_init xfrm_state_init(struct net *net) return 0; out_state_cache_input: - xfrm_hash_free(net->xfrm.state_byseq, sz); + xfrm_hash_free(nseq, sz); out_byseq: - xfrm_hash_free(net->xfrm.state_byspi, sz); + xfrm_hash_free(nspi, sz); out_byspi: - xfrm_hash_free(net->xfrm.state_bysrc, sz); + xfrm_hash_free(nsrc, sz); out_bysrc: - xfrm_hash_free(net->xfrm.state_bydst, sz); + xfrm_hash_free(ndst, sz); out_bydst: return -ENOMEM; } +#define xfrm_state_deref_netexit(table) \ + rcu_dereference_protected((table), true /* netns is going away */) void xfrm_state_fini(struct net *net) { unsigned int sz; @@ -3318,17 +3330,17 @@ void xfrm_state_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.state_all)); for (i = 0; i <= net->xfrm.state_hmask; i++) { - WARN_ON(!hlist_empty(net->xfrm.state_byseq + i)); - WARN_ON(!hlist_empty(net->xfrm.state_byspi + i)); - WARN_ON(!hlist_empty(net->xfrm.state_bysrc + i)); - WARN_ON(!hlist_empty(net->xfrm.state_bydst + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byseq) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byspi) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bysrc) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bydst) + i)); } sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); - xfrm_hash_free(net->xfrm.state_byseq, sz); - xfrm_hash_free(net->xfrm.state_byspi, sz); - xfrm_hash_free(net->xfrm.state_bysrc, sz); - xfrm_hash_free(net->xfrm.state_bydst, sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byseq), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byspi), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bysrc), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bydst), sz); free_percpu(net->xfrm.state_cache_input); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 403b5ecac2c544..1656b487f8334e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -35,6 +35,15 @@ #endif #include +static struct sock *xfrm_net_nlsk(const struct net *net, const struct sk_buff *skb) +{ + /* get the source of this request, see netlink_unicast_kernel */ + const struct sock *sk = NETLINK_CB(skb).sk; + + /* sk is refcounted, the netns stays alive and nlsk with it */ + return rcu_dereference_protected(net->xfrm.nlsk, sk->sk_net_refcnt); +} + static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type, struct netlink_ext_ack *extack) { @@ -1727,7 +1736,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_spdinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid); } static inline unsigned int xfrm_sadinfo_msgsize(void) @@ -1787,7 +1796,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_sadinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1807,7 +1816,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: @@ -1850,6 +1859,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); if (pcpu_num >= num_possible_cpus()) { err = -EINVAL; + NL_SET_ERR_MSG(extack, "pCPU number too big"); goto out_noput; } } @@ -1897,7 +1907,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, } } - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -2542,7 +2552,7 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, r_up->out = net->xfrm.policy_default[XFRM_POLICY_OUT]; nlmsg_end(r_skb, r_nlh); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, portid); } static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2608,7 +2618,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); } } else { @@ -2781,7 +2791,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_aevent(r_skb, x, &c); BUG_ON(err < 0); - err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -3001,8 +3011,10 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_SA_PCPU]) { x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); err = -EINVAL; - if (x->pcpu_num >= num_possible_cpus()) + if (x->pcpu_num >= num_possible_cpus()) { + NL_SET_ERR_MSG(extack, "pCPU number too big"); goto free_state; + } } err = verify_newpolicy_info(&ua->policy, extack); @@ -3483,7 +3495,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, goto err; } - err = netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); + err = netlink_dump_start(xfrm_net_nlsk(net, skb), skb, nlh, &c); goto err; } @@ -3673,7 +3685,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) } if (x->if_id) l += nla_total_size(sizeof(x->if_id)); - if (x->pcpu_num) + if (x->pcpu_num != UINT_MAX) l += nla_total_size(sizeof(x->pcpu_num)); /* Must count x->lastused as it may become non-zero behind our back. */ diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c index e7af02f98208ba..9f21088c0855c8 100644 --- a/samples/landlock/sandboxer.c +++ b/samples/landlock/sandboxer.c @@ -299,7 +299,7 @@ static bool check_ruleset_scope(const char *const env_var, /* clang-format on */ -#define LANDLOCK_ABI_LAST 7 +#define LANDLOCK_ABI_LAST 8 #define XSTR(s) #s #define STR(s) XSTR(s) @@ -436,7 +436,8 @@ int main(const int argc, char *const argv[], char *const *const envp) /* Removes LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON for ABI < 7 */ supported_restrict_flags &= ~LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; - + __attribute__((fallthrough)); + case 7: /* Must be printed for any ABI < LANDLOCK_ABI_LAST. */ fprintf(stderr, "Hint: You should update the running kernel " diff --git a/scripts/coccinelle/api/kmalloc_objs.cocci b/scripts/coccinelle/api/kmalloc_objs.cocci index db12b7be7247a4..e9a415b7b6f45a 100644 --- a/scripts/coccinelle/api/kmalloc_objs.cocci +++ b/scripts/coccinelle/api/kmalloc_objs.cocci @@ -122,3 +122,14 @@ fresh identifier ALLOC_OBJS = script:python(ALLOC_ARRAY) { alloc_array(ALLOC_ARR - ALLOC(struct_size_t(TYPE, FLEX, COUNT), GFP) + ALLOC_FLEX(TYPE, FLEX, COUNT, GFP) ) + +@drop_gfp_kernel depends on patch && !(file in "tools") && !(file in "samples")@ +identifier ALLOC = {kmalloc_obj,kmalloc_objs,kmalloc_flex, + kzalloc_obj,kzalloc_objs,kzalloc_flex, + kvmalloc_obj,kvmalloc_objs,kvmalloc_flex, + kvzalloc_obj,kvzalloc_objs,kvzalloc_flex}; +@@ + + ALLOC(... +- , GFP_KERNEL + ) diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh index 735e1de450c6aa..f08e0863b7128a 100755 --- a/scripts/kconfig/merge_config.sh +++ b/scripts/kconfig/merge_config.sh @@ -151,6 +151,7 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do if ! "$AWK" -v prefix="$CONFIG_PREFIX" \ -v warnoverride="$WARNOVERRIDE" \ -v strict="$STRICT" \ + -v outfile="$TMP_FILE.new" \ -v builtin="$BUILTIN" \ -v warnredun="$WARNREDUN" ' BEGIN { @@ -195,7 +196,7 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do # First pass: read merge file, store all lines and index FILENAME == ARGV[1] { - mergefile = FILENAME + mergefile = FILENAME merge_lines[FNR] = $0 merge_total = FNR cfg = get_cfg($0) @@ -212,17 +213,17 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do # Not a config or not in merge file - keep it if (cfg == "" || !(cfg in merge_cfg)) { - print $0 >> ARGV[3] + print $0 >> outfile next } - prev_val = $0 + prev_val = $0 new_val = merge_cfg[cfg] # BUILTIN: do not demote y to m if (builtin == "true" && new_val ~ /=m$/ && prev_val ~ /=y$/) { warn_builtin(cfg, prev_val, new_val) - print $0 >> ARGV[3] + print $0 >> outfile skip_merge[merge_cfg_line[cfg]] = 1 next } @@ -235,7 +236,7 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do # "=n" is the same as "is not set" if (prev_val ~ /=n$/ && new_val ~ / is not set$/) { - print $0 >> ARGV[3] + print $0 >> outfile next } @@ -246,25 +247,20 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do } } - # output file, skip all lines - FILENAME == ARGV[3] { - nextfile - } - END { # Newline in case base file lacks trailing newline - print "" >> ARGV[3] + print "" >> outfile # Append merge file, skipping lines marked for builtin preservation for (i = 1; i <= merge_total; i++) { if (!(i in skip_merge)) { - print merge_lines[i] >> ARGV[3] + print merge_lines[i] >> outfile } } if (strict_violated) { exit 1 } }' \ - "$ORIG_MERGE_FILE" "$TMP_FILE" "$TMP_FILE.new"; then + "$ORIG_MERGE_FILE" "$TMP_FILE"; then # awk exited non-zero, strict mode was violated STRICT_MODE_VIOLATED=true fi @@ -381,7 +377,7 @@ END { STRICT_MODE_VIOLATED=true fi -if [ "$STRICT" == "true" ] && [ "$STRICT_MODE_VIOLATED" == "true" ]; then +if [ "$STRICT" = "true" ] && [ "$STRICT_MODE_VIOLATED" = "true" ]; then echo "Requested and effective config differ" exit 1 fi diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build index 809e198a561d54..7b82c7503c2bf0 100755 --- a/scripts/livepatch/klp-build +++ b/scripts/livepatch/klp-build @@ -285,15 +285,14 @@ set_module_name() { # application from appending it with '+' due to a dirty git working tree. set_kernelversion() { local file="$SRC/scripts/setlocalversion" - local localversion + local kernelrelease stash_file "$file" - localversion="$(cd "$SRC" && make --no-print-directory kernelversion)" - localversion="$(cd "$SRC" && KERNELVERSION="$localversion" ./scripts/setlocalversion)" - [[ -z "$localversion" ]] && die "setlocalversion failed" + kernelrelease="$(cd "$SRC" && make syncconfig &>/dev/null && make -s kernelrelease)" + [[ -z "$kernelrelease" ]] && die "failed to get kernel version" - sed -i "2i echo $localversion; exit 0" scripts/setlocalversion + sed -i "2i echo $kernelrelease; exit 0" scripts/setlocalversion } get_patch_files() { diff --git a/security/landlock/domain.c b/security/landlock/domain.c index f5b78d4766cd86..f0d83f43afa1d2 100644 --- a/security/landlock/domain.c +++ b/security/landlock/domain.c @@ -94,8 +94,7 @@ static struct landlock_details *get_current_details(void) * allocate with GFP_KERNEL_ACCOUNT because it is independent from the * caller. */ - details = - kzalloc_flex(*details, exe_path, path_size); + details = kzalloc_flex(*details, exe_path, path_size); if (!details) return ERR_PTR(-ENOMEM); diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c index 31987358638521..73018dc8d6c7ec 100644 --- a/security/landlock/ruleset.c +++ b/security/landlock/ruleset.c @@ -32,9 +32,8 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers) { struct landlock_ruleset *new_ruleset; - new_ruleset = - kzalloc_flex(*new_ruleset, access_masks, num_layers, - GFP_KERNEL_ACCOUNT); + new_ruleset = kzalloc_flex(*new_ruleset, access_masks, num_layers, + GFP_KERNEL_ACCOUNT); if (!new_ruleset) return ERR_PTR(-ENOMEM); refcount_set(&new_ruleset->usage, 1); @@ -559,8 +558,8 @@ landlock_merge_ruleset(struct landlock_ruleset *const parent, if (IS_ERR(new_dom)) return new_dom; - new_dom->hierarchy = kzalloc_obj(*new_dom->hierarchy, - GFP_KERNEL_ACCOUNT); + new_dom->hierarchy = + kzalloc_obj(*new_dom->hierarchy, GFP_KERNEL_ACCOUNT); if (!new_dom->hierarchy) return ERR_PTR(-ENOMEM); diff --git a/security/landlock/tsync.c b/security/landlock/tsync.c index de01aa8997510b..4d4427ba8d9395 100644 --- a/security/landlock/tsync.c +++ b/security/landlock/tsync.c @@ -203,6 +203,40 @@ static struct tsync_work *tsync_works_provide(struct tsync_works *s, return ctx; } +/** + * tsync_works_trim - Put the last tsync_work element + * + * @s: TSYNC works to trim. + * + * Put the last task and decrement the size of @s. + * + * This helper does not cancel a running task, but just reset the last element + * to zero. + */ +static void tsync_works_trim(struct tsync_works *s) +{ + struct tsync_work *ctx; + + if (WARN_ON_ONCE(s->size <= 0)) + return; + + ctx = s->works[s->size - 1]; + + /* + * For consistency, remove the task from ctx so that it does not look like + * we handed it a task_work. + */ + put_task_struct(ctx->task); + *ctx = (typeof(*ctx)){}; + + /* + * Cancel the tsync_works_provide() change to recycle the reserved memory + * for the next thread, if any. This also ensures that cancel_tsync_works() + * and tsync_works_release() do not see any NULL task pointers. + */ + s->size--; +} + /* * tsync_works_grow_by - preallocates space for n more contexts in s * @@ -256,13 +290,14 @@ static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags) * tsync_works_contains - checks for presence of task in s */ static bool tsync_works_contains_task(const struct tsync_works *s, - struct task_struct *task) + const struct task_struct *task) { size_t i; for (i = 0; i < s->size; i++) if (s->works[i]->task == task) return true; + return false; } @@ -276,7 +311,7 @@ static void tsync_works_release(struct tsync_works *s) size_t i; for (i = 0; i < s->size; i++) { - if (!s->works[i]->task) + if (WARN_ON_ONCE(!s->works[i]->task)) continue; put_task_struct(s->works[i]->task); @@ -284,6 +319,7 @@ static void tsync_works_release(struct tsync_works *s) for (i = 0; i < s->capacity; i++) kfree(s->works[i]); + kfree(s->works); s->works = NULL; s->size = 0; @@ -295,7 +331,7 @@ static void tsync_works_release(struct tsync_works *s) */ static size_t count_additional_threads(const struct tsync_works *works) { - struct task_struct *thread, *caller; + const struct task_struct *caller, *thread; size_t n = 0; caller = current; @@ -334,7 +370,8 @@ static bool schedule_task_work(struct tsync_works *works, struct tsync_shared_context *shared_ctx) { int err; - struct task_struct *thread, *caller; + const struct task_struct *caller; + struct task_struct *thread; struct tsync_work *ctx; bool found_more_threads = false; @@ -379,16 +416,14 @@ static bool schedule_task_work(struct tsync_works *works, init_task_work(&ctx->work, restrict_one_thread_callback); err = task_work_add(thread, &ctx->work, TWA_SIGNAL); - if (err) { + if (unlikely(err)) { /* * task_work_add() only fails if the task is about to exit. We * checked that earlier, but it can happen as a race. Resume * without setting an error, as the task is probably gone in the - * next loop iteration. For consistency, remove the task from ctx - * so that it does not look like we handed it a task_work. + * next loop iteration. */ - put_task_struct(ctx->task); - ctx->task = NULL; + tsync_works_trim(works); atomic_dec(&shared_ctx->num_preparing); atomic_dec(&shared_ctx->num_unfinished); @@ -406,12 +441,15 @@ static bool schedule_task_work(struct tsync_works *works, * shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two * completions if needed, as if the task was never scheduled. */ -static void cancel_tsync_works(struct tsync_works *works, +static void cancel_tsync_works(const struct tsync_works *works, struct tsync_shared_context *shared_ctx) { - int i; + size_t i; for (i = 0; i < works->size; i++) { + if (WARN_ON_ONCE(!works->works[i]->task)) + continue; + if (!task_work_cancel(works->works[i]->task, &works->works[i]->work)) continue; @@ -447,6 +485,16 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred, shared_ctx.new_cred = new_cred; shared_ctx.set_no_new_privs = task_no_new_privs(current); + /* + * Serialize concurrent TSYNC operations to prevent deadlocks when + * multiple threads call landlock_restrict_self() simultaneously. + * If the lock is already held, we gracefully yield by restarting the + * syscall. This allows the current thread to process pending + * task_works before retrying. + */ + if (!down_write_trylock(¤t->signal->exec_update_lock)) + return restart_syscall(); + /* * We schedule a pseudo-signal task_work for each of the calling task's * sibling threads. In the task work, each thread: @@ -527,24 +575,30 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred, -ERESTARTNOINTR); /* - * Cancel task works for tasks that did not start running yet, - * and decrement all_prepared and num_unfinished accordingly. + * Opportunistic improvement: try to cancel task + * works for tasks that did not start running + * yet. We do not have a guarantee that it + * cancels any of the enqueued task works + * because task_work_run() might already have + * dequeued them. */ cancel_tsync_works(&works, &shared_ctx); /* - * The remaining task works have started running, so waiting for - * their completion will finish. + * Break the loop with error. The cleanup code + * after the loop unblocks the remaining + * task_works. */ - wait_for_completion(&shared_ctx.all_prepared); + break; } } } while (found_more_threads && !atomic_read(&shared_ctx.preparation_error)); /* - * We now have all sibling threads blocking and in "prepared" state in the - * task work. Ask all threads to commit. + * We now have either (a) all sibling threads blocking and in "prepared" + * state in the task work, or (b) the preparation error is set. Ask all + * threads to commit (or abort). */ complete_all(&shared_ctx.ready_to_commit); @@ -556,6 +610,6 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred, wait_for_completion(&shared_ctx.all_finished); tsync_works_release(&works); - + up_write(¤t->signal->exec_update_lock); return atomic_read(&shared_ctx.preparation_error); } diff --git a/security/security.c b/security/security.c index 67af9228c4e941..a26c1474e2e499 100644 --- a/security/security.c +++ b/security/security.c @@ -61,6 +61,7 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX + 1] = { [LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM", [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM", [LOCKDOWN_RTAS_ERROR_INJECTION] = "RTAS error injection", + [LOCKDOWN_XEN_USER_ACTIONS] = "Xen guest user action", [LOCKDOWN_INTEGRITY_MAX] = "integrity", [LOCKDOWN_KCORE] = "/proc/kcore access", [LOCKDOWN_KPROBES] = "use of kprobes", diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index e9964f0e010aee..140907a41a70df 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -1360,10 +1360,10 @@ static int i2s_create_secondary_device(struct samsung_i2s_priv *priv) if (!pdev_sec) return -ENOMEM; - pdev_sec->driver_override = kstrdup("samsung-i2s", GFP_KERNEL); - if (!pdev_sec->driver_override) { + ret = device_set_driver_override(&pdev_sec->dev, "samsung-i2s"); + if (ret) { platform_device_put(pdev_sec); - return -ENOMEM; + return ret; } ret = platform_device_add(pdev_sec); diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index da5275d8eda63e..6673601246b382 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -740,7 +740,10 @@ #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) #define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 #define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) -#define MSR_AMD64_SNP_RESV_BIT 19 +#define MSR_AMD64_SNP_RESERVED_BITS19_22 GENMASK_ULL(22, 19) +#define MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT 23 +#define MSR_AMD64_SNP_IBPB_ON_ENTRY BIT_ULL(MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT) +#define MSR_AMD64_SNP_RESV_BIT 24 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) #define MSR_AMD64_SAVIC_CONTROL 0xc0010138 #define MSR_AMD64_SAVIC_EN_BIT 0 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 846a63215ce14b..0d4538fa6c31ab 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -476,6 +476,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) #define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9) +#define KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM (1 << 10) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 55d59ed507d541..643f707b8f1da1 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -162,8 +162,11 @@ static int load_xbc_file(const char *path, char **buf) if (fd < 0) return -errno; ret = fstat(fd, &stat); - if (ret < 0) - return -errno; + if (ret < 0) { + ret = -errno; + close(fd); + return ret; + } ret = load_xbc_fd(fd, buf, stat.st_size); diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index ab2aa97bd8ce44..406923bd4846c3 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -32,7 +32,8 @@ /** * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied * error message. - * @condition: the condition which the compiler should know is false. + * @cond: the condition which the compiler should know is false. + * @msg: build-time error message * * See BUILD_BUG_ON for description. */ @@ -60,6 +61,7 @@ /** * static_assert - check integer constant expression at build time + * @expr: expression to be checked * * static_assert() is a wrapper for the C11 _Static_assert, with a * little macro magic to make the message optional (defaulting to the diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 65500f5db37992..80364d4dbebb0c 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -14,6 +14,10 @@ #include #include +#ifdef __KERNEL__ +#include +#endif + #define KVM_API_VERSION 12 /* @@ -1601,7 +1605,11 @@ struct kvm_stats_desc { __u16 size; __u32 offset; __u32 bucket_size; +#ifdef __KERNEL__ + char name[KVM_STATS_NAME_SIZE]; +#else char name[]; +#endif }; #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 91b3ff4803cf27..b6765e8765074d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2184,12 +2184,11 @@ static void mark_func_jump_tables(struct objtool_file *file, last = insn; /* - * Store back-pointers for unconditional forward jumps such + * Store back-pointers for forward jumps such * that find_jump_table() can back-track using those and * avoid some potentially confusing code. */ - if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && - insn->offset > last->offset && + if (insn->jump_dest && insn->jump_dest->offset > insn->offset && !insn->jump_dest->first_jump_src) { diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 3da90686350d71..2ffe3ebfbe37c1 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -1189,7 +1188,7 @@ struct elf *elf_open_read(const char *name, int flags) struct elf *elf_create_file(GElf_Ehdr *ehdr, const char *name) { struct section *null, *symtab, *strtab, *shstrtab; - char *dir, *base, *tmp_name; + char *tmp_name; struct symbol *sym; struct elf *elf; @@ -1203,29 +1202,13 @@ struct elf *elf_create_file(GElf_Ehdr *ehdr, const char *name) INIT_LIST_HEAD(&elf->sections); - dir = strdup(name); - if (!dir) { - ERROR_GLIBC("strdup"); - return NULL; - } - - dir = dirname(dir); - - base = strdup(name); - if (!base) { - ERROR_GLIBC("strdup"); - return NULL; - } - - base = basename(base); - - tmp_name = malloc(256); + tmp_name = malloc(strlen(name) + 8); if (!tmp_name) { ERROR_GLIBC("malloc"); return NULL; } - snprintf(tmp_name, 256, "%s/%s.XXXXXX", dir, base); + sprintf(tmp_name, "%s.XXXXXX", name); elf->fd = mkstemp(tmp_name); if (elf->fd == -1) { diff --git a/tools/objtool/klp-diff.c b/tools/objtool/klp-diff.c index a3198a63c2f0d7..c2c4e4968bc264 100644 --- a/tools/objtool/klp-diff.c +++ b/tools/objtool/klp-diff.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -560,7 +561,7 @@ static struct symbol *__clone_symbol(struct elf *elf, struct symbol *patched_sym } if (!is_sec_sym(patched_sym)) - offset = sec_size(out_sec); + offset = ALIGN(sec_size(out_sec), out_sec->sh.sh_addralign); if (patched_sym->len || is_sec_sym(patched_sym)) { void *data = NULL; diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index da3aca87457fdc..31826621eebdb0 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -187,7 +187,6 @@ done check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include "' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' check arch/x86/include/asm/amd/ibs.h '-I "^#include .*/msr-index.h"' -check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' check include/linux/unaligned.h '-I "^#include " -I "^#include " -I "^#pragma GCC diagnostic"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' diff --git a/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c b/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c index 43275d25b6cbcc..0f626db3a4392d 100644 --- a/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c +++ b/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c @@ -4,9 +4,9 @@ #include "../kvm-stat.h" #include "../evsel.h" #include "../env.h" -#include "../../arch/x86/include/uapi/asm/svm.h" -#include "../../arch/x86/include/uapi/asm/vmx.h" -#include "../../arch/x86/include/uapi/asm/kvm.h" +#include "../../../arch/x86/include/uapi/asm/svm.h" +#include "../../../arch/x86/include/uapi/asm/vmx.h" +#include "../../../arch/x86/include/uapi/asm/kvm.h" #include define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 46bf4dfeebc8c1..7e39d469111b29 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -1605,9 +1605,9 @@ bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_gr .metric_or_groups = metric_or_groups, }; - return pmu_metrics_table__for_each_metric(table, - metricgroup__has_metric_or_groups_callback, - &data) + return metricgroup__for_each_metric(table, + metricgroup__has_metric_or_groups_callback, + &data) ? true : false; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b9efb296bba510..7b4629625b1e6f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1117,7 +1117,7 @@ static int config_attr(struct perf_event_attr *attr, static struct evsel_config_term *add_config_term(enum evsel_term_type type, struct list_head *head_terms, - bool weak) + bool weak, char *str, u64 val) { struct evsel_config_term *t; @@ -1128,8 +1128,62 @@ static struct evsel_config_term *add_config_term(enum evsel_term_type type, INIT_LIST_HEAD(&t->list); t->type = type; t->weak = weak; - list_add_tail(&t->list, head_terms); + switch (type) { + case EVSEL__CONFIG_TERM_PERIOD: + case EVSEL__CONFIG_TERM_FREQ: + case EVSEL__CONFIG_TERM_STACK_USER: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG1: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG2: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG3: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG4: + t->val.val = val; + break; + case EVSEL__CONFIG_TERM_TIME: + t->val.time = val; + break; + case EVSEL__CONFIG_TERM_INHERIT: + t->val.inherit = val; + break; + case EVSEL__CONFIG_TERM_OVERWRITE: + t->val.overwrite = val; + break; + case EVSEL__CONFIG_TERM_MAX_STACK: + t->val.max_stack = val; + break; + case EVSEL__CONFIG_TERM_MAX_EVENTS: + t->val.max_events = val; + break; + case EVSEL__CONFIG_TERM_PERCORE: + t->val.percore = val; + break; + case EVSEL__CONFIG_TERM_AUX_OUTPUT: + t->val.aux_output = val; + break; + case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: + t->val.aux_sample_size = val; + break; + case EVSEL__CONFIG_TERM_CALLGRAPH: + case EVSEL__CONFIG_TERM_BRANCH: + case EVSEL__CONFIG_TERM_DRV_CFG: + case EVSEL__CONFIG_TERM_RATIO_TO_PREV: + case EVSEL__CONFIG_TERM_AUX_ACTION: + if (str) { + t->val.str = strdup(str); + if (!t->val.str) { + zfree(&t); + return NULL; + } + t->free_str = true; + } + break; + default: + t->val.val = val; + break; + } + + list_add_tail(&t->list, head_terms); return t; } @@ -1142,7 +1196,7 @@ static int get_config_terms(const struct parse_events_terms *head_config, struct evsel_config_term *new_term; enum evsel_term_type new_type; bool str_type = false; - u64 val; + u64 val = 0; switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: @@ -1234,20 +1288,15 @@ static int get_config_terms(const struct parse_events_terms *head_config, continue; } - new_term = add_config_term(new_type, head_terms, term->weak); + /* + * Note: Members evsel_config_term::val and + * parse_events_term::val are unions and endianness needs + * to be taken into account when changing such union members. + */ + new_term = add_config_term(new_type, head_terms, term->weak, + str_type ? term->val.str : NULL, val); if (!new_term) return -ENOMEM; - - if (str_type) { - new_term->val.str = strdup(term->val.str); - if (!new_term->val.str) { - zfree(&new_term); - return -ENOMEM; - } - new_term->free_str = true; - } else { - new_term->val.val = val; - } } return 0; } @@ -1277,10 +1326,9 @@ static int add_cfg_chg(const struct perf_pmu *pmu, if (bits) { struct evsel_config_term *new_term; - new_term = add_config_term(new_term_type, head_terms, false); + new_term = add_config_term(new_term_type, head_terms, false, NULL, bits); if (!new_term) return -ENOMEM; - new_term->val.cfg_chg = bits; } return 0; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index d5acbeba03838f..65485967c96812 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -409,7 +409,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" \ LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \ EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \ - HOSTPKG_CONFIG=$(PKG_CONFIG) \ + HOSTPKG_CONFIG='$(PKG_CONFIG)' \ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) # Get Clang's default includes on this system, as opposed to those seen by diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 8a0fdff899271d..9ea1353488d736 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -8,6 +8,11 @@ #include "bpf_experimental.h" extern void bpf_rcu_read_lock(void) __ksym; +extern void bpf_rcu_read_unlock(void) __ksym; +extern void bpf_preempt_disable(void) __ksym; +extern void bpf_preempt_enable(void) __ksym; +extern void bpf_local_irq_save(unsigned long *) __ksym; +extern void bpf_local_irq_restore(unsigned long *) __ksym; #define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) @@ -131,7 +136,7 @@ int reject_subprog_with_lock(void *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region") +__failure __msg("bpf_throw cannot be used inside bpf_rcu_read_lock-ed region") int reject_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); @@ -147,11 +152,13 @@ __noinline static int throwing_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region") +__failure __msg("bpf_throw cannot be used inside bpf_rcu_read_lock-ed region") int reject_subprog_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); - return throwing_subprog(ctx); + throwing_subprog(ctx); + bpf_rcu_read_unlock(); + return 0; } static bool rbless(struct bpf_rb_node *n1, const struct bpf_rb_node *n2) @@ -346,4 +353,47 @@ int reject_exception_throw_cb_diff(struct __sk_buff *ctx) return 0; } +__noinline static int always_throws(void) +{ + bpf_throw(0); + return 0; +} + +__noinline static int rcu_lock_then_throw(void) +{ + bpf_rcu_read_lock(); + bpf_throw(0); + return 0; +} + +SEC("?tc") +__failure __msg("bpf_throw cannot be used inside bpf_rcu_read_lock-ed region") +int reject_subprog_rcu_lock_throw(void *ctx) +{ + rcu_lock_then_throw(); + return 0; +} + +SEC("?tc") +__failure __msg("bpf_throw cannot be used inside bpf_preempt_disable-ed region") +int reject_subprog_throw_preempt_lock(void *ctx) +{ + bpf_preempt_disable(); + always_throws(); + bpf_preempt_enable(); + return 0; +} + +SEC("?tc") +__failure __msg("bpf_throw cannot be used inside bpf_local_irq_save-ed region") +int reject_subprog_throw_irq_lock(void *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + always_throws(); + bpf_local_irq_restore(&flags); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index e526315c718ad7..79a328276805de 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -2037,4 +2037,98 @@ __naked void signed_unsigned_intersection32_case2(void *ctx) : __clobber_all); } +SEC("socket") +__description("maybe_fork_scalars: OR with constant rejects OOB") +__failure __msg("invalid access to map value") +__naked void or_scalar_fork_rejects_oob(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r9 = r0; \ + r6 = *(u64*)(r9 + 0); \ + r6 s>>= 63; \ + r6 |= 8; \ + /* r6 is -1 (current) or 8 (pushed) */ \ + if r6 s< 0 goto l0_%=; \ + /* pushed path: r6 = 8, OOB for value_size=8 */ \ + r9 += r6; \ + r0 = *(u8*)(r9 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("maybe_fork_scalars: AND with constant still works") +__success __retval(0) +__naked void and_scalar_fork_still_works(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r9 = r0; \ + r6 = *(u64*)(r9 + 0); \ + r6 s>>= 63; \ + r6 &= 4; \ + /* \ + * r6 is 0 (pushed, 0&4==0) or 4 (current) \ + * both within value_size=8 \ + */ \ + if r6 s< 0 goto l0_%=; \ + r9 += r6; \ + r0 = *(u8*)(r9 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("maybe_fork_scalars: OR with constant allows in-bounds") +__success __retval(0) +__naked void or_scalar_fork_allows_inbounds(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r9 = r0; \ + r6 = *(u64*)(r9 + 0); \ + r6 s>>= 63; \ + r6 |= 4; \ + /* \ + * r6 is -1 (current) or 4 (pushed) \ + * pushed path: r6 = 4, within value_size=8 \ + */ \ + if r6 s< 0 goto l0_%=; \ + r9 += r6; \ + r0 = *(u8*)(r9 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c index 4b779deee7672b..cffaf36192bc57 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bswap.c +++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c @@ -91,6 +91,28 @@ BSWAP_RANGE_TEST(le32_range, "le32", 0x3f00, 0x3f0000) BSWAP_RANGE_TEST(le64_range, "le64", 0x3f00, 0x3f000000000000) #endif +SEC("socket") +__description("BSWAP, reset reg id") +__failure __msg("math between fp pointer and register with unbounded min value is not allowed") +__naked void bswap_reset_reg_id(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + r1 = r0; \ + r0 = be16 r0; \ + if r0 != 1 goto l0_%=; \ + r2 = r10; \ + r2 += -512; \ + r2 += r1; \ + *(u8 *)(r2 + 0) = 0; \ +l0_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + #else SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c index 7bf7dbfd237daa..f4f8a055af8a85 100644 --- a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c +++ b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c @@ -348,6 +348,114 @@ l0_%=: \ : __clobber_all); } +/* + * Test that sync_linked_regs() checks reg->id (the linked target register) + * for BPF_ADD_CONST32 rather than known_reg->id (the branch register). + */ +SEC("socket") +__success +__naked void scalars_alu32_zext_linked_reg(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + w6 = w0; /* r6 in [0, 0xFFFFFFFF] */ \ + r7 = r6; /* linked: same id as r6 */ \ + w7 += 1; /* alu32: r7.id |= BPF_ADD_CONST32 */ \ + r8 = 0xFFFFffff ll; \ + if r6 < r8 goto l0_%=; \ + /* r6 in [0xFFFFFFFF, 0xFFFFFFFF] */ \ + /* sync_linked_regs: known_reg=r6, reg=r7 */ \ + /* CPU: w7 = (u32)(0xFFFFFFFF + 1) = 0, zext -> r7 = 0 */ \ + /* With fix: r7 64-bit = [0, 0] (zext applied) */ \ + /* Without fix: r7 64-bit = [0x100000000] (no zext) */ \ + r7 >>= 32; \ + if r7 == 0 goto l0_%=; \ + r0 /= 0; /* unreachable with fix */ \ +l0_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* + * Test that sync_linked_regs() skips propagation when one register used + * alu32 (BPF_ADD_CONST32) and the other used alu64 (BPF_ADD_CONST64). + * The delta relationship doesn't hold across different ALU widths. + */ +SEC("socket") +__failure __msg("div by zero") +__naked void scalars_alu32_alu64_cross_type(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + w6 = w0; /* r6 in [0, 0xFFFFFFFF] */ \ + r7 = r6; /* linked: same id as r6 */ \ + w7 += 1; /* alu32: BPF_ADD_CONST32, delta = 1 */ \ + r8 = r6; /* linked: same id as r6 */ \ + r8 += 2; /* alu64: BPF_ADD_CONST64, delta = 2 */ \ + r9 = 0xFFFFffff ll; \ + if r7 < r9 goto l0_%=; \ + /* r7 = 0xFFFFFFFF */ \ + /* sync: known_reg=r7 (ADD_CONST32), reg=r8 (ADD_CONST64) */ \ + /* Without fix: r8 = zext(0xFFFFFFFF + 1) = 0 */ \ + /* With fix: r8 stays [2, 0x100000001] (r8 >= 2) */ \ + if r8 > 0 goto l1_%=; \ + goto l0_%=; \ +l1_%=: \ + r0 /= 0; /* div by zero */ \ +l0_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* + * Test that regsafe() prevents pruning when two paths reach the same program + * point with linked registers carrying different ADD_CONST flags (one + * BPF_ADD_CONST32 from alu32, another BPF_ADD_CONST64 from alu64). + */ +SEC("socket") +__failure __msg("div by zero") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void scalars_alu32_alu64_regsafe_pruning(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + w6 = w0; /* r6 in [0, 0xFFFFFFFF] */ \ + r7 = r6; /* linked: same id as r6 */ \ + /* Get another random value for the path branch */ \ + call %[bpf_get_prandom_u32]; \ + if r0 > 0 goto l_pathb_%=; \ + /* Path A: alu32 */ \ + w7 += 1; /* BPF_ADD_CONST32, delta = 1 */\ + goto l_merge_%=; \ +l_pathb_%=: \ + /* Path B: alu64 */ \ + r7 += 1; /* BPF_ADD_CONST64, delta = 1 */\ +l_merge_%=: \ + /* Merge point: regsafe() compares path B against cached path A. */ \ + /* Narrow r6 to trigger sync_linked_regs for r7 */ \ + r9 = 0xFFFFffff ll; \ + if r6 < r9 goto l0_%=; \ + /* r6 = 0xFFFFFFFF */ \ + /* sync: r7 = 0xFFFFFFFF + 1 = 0x100000000 */ \ + /* Path A: zext -> r7 = 0 */ \ + /* Path B: no zext -> r7 = 0x100000000 */ \ + r7 >>= 32; \ + if r7 == 0 goto l0_%=; \ + r0 /= 0; /* div by zero on path B */ \ +l0_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + SEC("socket") __success void alu32_negative_offset(void) diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c index 148d2299e5b4c8..fd59d57e8e3763 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c +++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c @@ -1209,6 +1209,64 @@ __naked void smod32_ri_divisor_neg_1(void) : __clobber_all); } +SEC("socket") +__description("SDIV32, INT_MIN divided by 2, imm") +__success __success_unpriv __retval(-1073741824) +__naked void sdiv32_int_min_div_2_imm(void) +{ + asm volatile (" \ + w0 = %[int_min]; \ + w0 s/= 2; \ + exit; \ +" : + : __imm_const(int_min, INT_MIN) + : __clobber_all); +} + +SEC("socket") +__description("SDIV32, INT_MIN divided by 2, reg") +__success __success_unpriv __retval(-1073741824) +__naked void sdiv32_int_min_div_2_reg(void) +{ + asm volatile (" \ + w0 = %[int_min]; \ + w1 = 2; \ + w0 s/= w1; \ + exit; \ +" : + : __imm_const(int_min, INT_MIN) + : __clobber_all); +} + +SEC("socket") +__description("SMOD32, INT_MIN modulo 2, imm") +__success __success_unpriv __retval(0) +__naked void smod32_int_min_mod_2_imm(void) +{ + asm volatile (" \ + w0 = %[int_min]; \ + w0 s%%= 2; \ + exit; \ +" : + : __imm_const(int_min, INT_MIN) + : __clobber_all); +} + +SEC("socket") +__description("SMOD32, INT_MIN modulo -2, imm") +__success __success_unpriv __retval(0) +__naked void smod32_int_min_mod_neg2_imm(void) +{ + asm volatile (" \ + w0 = %[int_min]; \ + w0 s%%= -2; \ + exit; \ +" : + : __imm_const(int_min, INT_MIN) + : __clobber_all); +} + + #else SEC("socket") diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index 45a3e7ad3dcb8f..02d6f51d5a0658 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -3,6 +3,7 @@ TEST_PROGS := \ dev_addr_lists.sh \ + non_ether_header_ops.sh \ options.sh \ propagation.sh \ refleak.sh \ diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 558e1d0cf565bd..5d36a22ef0803e 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -1,7 +1,9 @@ +CONFIG_BONDING=y CONFIG_DUMMY=y CONFIG_IPV6=y CONFIG_MACVLAN=y CONFIG_NETDEVSIM=m +CONFIG_NET_IPGRE=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y diff --git a/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh new file mode 100755 index 00000000000000..948a43576bdc9d --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2154 +# +# Reproduce the non-Ethernet header_ops confusion scenario with: +# g0 (gre) -> b0 (bond) -> t0 (team) +# +# Before the fix, direct header_ops inheritance in this stack could call +# callbacks with the wrong net_device context and crash. + +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/lib.sh + +trap cleanup_all_ns EXIT + +setup_ns ns1 + +ip -n "$ns1" link add d0 type dummy +ip -n "$ns1" addr add 10.10.10.1/24 dev d0 +ip -n "$ns1" link set d0 up + +ip -n "$ns1" link add g0 type gre local 10.10.10.1 +ip -n "$ns1" link add b0 type bond mode active-backup +ip -n "$ns1" link add t0 type team + +ip -n "$ns1" link set g0 master b0 +ip -n "$ns1" link set b0 master t0 + +ip -n "$ns1" link set g0 up +ip -n "$ns1" link set b0 up +ip -n "$ns1" link set t0 up + +# IPv6 address assignment triggers MLD join reports that call +# dev_hard_header() on t0, exercising the inherited header_ops path. +ip -n "$ns1" -6 addr add 2001:db8:1::1/64 dev t0 nodad +for i in $(seq 1 20); do + ip netns exec "$ns1" ping -6 -I t0 ff02::1 -c1 -W1 &>/dev/null || true +done + +echo "PASS: non-Ethernet header_ops stacking did not crash" +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index 80ab609058654d..cdca912f3afd8e 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -6,8 +6,10 @@ #define __HID_BPF_HELPERS_H /* "undefine" structs and enums in vmlinux.h, because we "override" them below */ +#define bpf_wq bpf_wq___not_used #define hid_bpf_ctx hid_bpf_ctx___not_used #define hid_bpf_ops hid_bpf_ops___not_used +#define hid_device hid_device___not_used #define hid_report_type hid_report_type___not_used #define hid_class_request hid_class_request___not_used #define hid_bpf_attach_flags hid_bpf_attach_flags___not_used @@ -27,8 +29,10 @@ #include "vmlinux.h" +#undef bpf_wq #undef hid_bpf_ctx #undef hid_bpf_ops +#undef hid_device #undef hid_report_type #undef hid_class_request #undef hid_bpf_attach_flags @@ -55,6 +59,14 @@ enum hid_report_type { HID_REPORT_TYPES, }; +struct hid_device { + unsigned int id; +} __attribute__((preserve_access_index)); + +struct bpf_wq { + __u64 __opaque[2]; +}; + struct hid_bpf_ctx { struct hid_device *hid; __u32 allocated_size; diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index dc68371f76a332..6471fa214a9f95 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -206,6 +206,7 @@ TEST_GEN_PROGS_s390 += s390/ucontrol_test TEST_GEN_PROGS_s390 += s390/user_operexec TEST_GEN_PROGS_s390 += s390/keyop TEST_GEN_PROGS_s390 += rseq_test +TEST_GEN_PROGS_s390 += s390/irq_routing TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test diff --git a/tools/testing/selftests/kvm/s390/irq_routing.c b/tools/testing/selftests/kvm/s390/irq_routing.c new file mode 100644 index 00000000000000..7819a0af19a8eb --- /dev/null +++ b/tools/testing/selftests/kvm/s390/irq_routing.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IRQ routing offset tests. + * + * Copyright IBM Corp. 2026 + * + * Authors: + * Janosch Frank + */ +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" +#include "ucall_common.h" + +extern char guest_code[]; +asm("guest_code:\n" + "diag %r0,%r0,0\n" + "j .\n"); + +static void test(void) +{ + struct kvm_irq_routing *routing; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_paddr_t mem; + int ret; + + struct kvm_irq_routing_entry ue = { + .type = KVM_IRQ_ROUTING_S390_ADAPTER, + .gsi = 1, + }; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + mem = vm_phy_pages_alloc(vm, 2, 4096 * 42, 0); + + routing = kvm_gsi_routing_create(); + routing->nr = 1; + routing->entries[0] = ue; + routing->entries[0].u.adapter.summary_addr = (uintptr_t)mem; + routing->entries[0].u.adapter.ind_addr = (uintptr_t)mem; + + routing->entries[0].u.adapter.summary_offset = 4096 * 8; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == -1 && errno == EINVAL, "summary offset outside of page\n"); + + routing->entries[0].u.adapter.summary_offset -= 4; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == 0, "summary offset inside of page\n"); + + routing->entries[0].u.adapter.ind_offset = 4096 * 8; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == -1 && errno == EINVAL, "ind offset outside of page\n"); + + routing->entries[0].u.adapter.ind_offset -= 4; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == 0, "ind offset inside of page\n"); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_IRQ_ROUTING)); + + ksft_print_header(); + ksft_set_plan(4); + test(); + + ksft_finished(); /* Print results and exit() accordingly */ +} diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c index 37ef0d2270db9d..2b9ad4f154f481 100644 --- a/tools/testing/selftests/landlock/tsync_test.c +++ b/tools/testing/selftests/landlock/tsync_test.c @@ -6,9 +6,10 @@ */ #define _GNU_SOURCE +#include #include +#include #include -#include #include "common.h" @@ -158,4 +159,92 @@ TEST(competing_enablement) EXPECT_EQ(0, close(ruleset_fd)); } +static void signal_nop_handler(int sig) +{ +} + +struct signaler_data { + pthread_t target; + volatile bool stop; +}; + +static void *signaler_thread(void *data) +{ + struct signaler_data *sd = data; + + while (!sd->stop) + pthread_kill(sd->target, SIGUSR1); + + return NULL; +} + +/* + * Number of idle sibling threads. This must be large enough that even on + * machines with many cores, the sibling threads cannot all complete their + * credential preparation in a single parallel wave, otherwise the signaler + * thread has no window to interrupt wait_for_completion_interruptible(). + * 200 threads on a 64-core machine yields ~3 serialized waves, giving the + * tight signal loop enough time to land an interruption. + */ +#define NUM_IDLE_THREADS 200 + +/* + * Exercises the tsync interruption and cancellation paths in tsync.c. + * + * When a signal interrupts the calling thread while it waits for sibling + * threads to finish their credential preparation + * (wait_for_completion_interruptible in landlock_restrict_sibling_threads), + * the kernel sets ERESTARTNOINTR, cancels queued task works that have not + * started yet (cancel_tsync_works), then waits for the remaining works to + * finish. On the error return, syscalls.c aborts the prepared credentials. + * The kernel automatically restarts the syscall, so userspace sees success. + */ +TEST(tsync_interrupt) +{ + size_t i; + pthread_t threads[NUM_IDLE_THREADS]; + pthread_t signaler; + struct signaler_data sd; + struct sigaction sa = {}; + const int ruleset_fd = create_ruleset(_metadata); + + disable_caps(_metadata); + + /* Install a no-op SIGUSR1 handler so the signal does not kill us. */ + sa.sa_handler = signal_nop_handler; + sigemptyset(&sa.sa_mask); + ASSERT_EQ(0, sigaction(SIGUSR1, &sa, NULL)); + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + + for (i = 0; i < NUM_IDLE_THREADS; i++) + ASSERT_EQ(0, pthread_create(&threads[i], NULL, idle, NULL)); + + /* + * Start a signaler thread that continuously sends SIGUSR1 to the + * calling thread. This maximizes the chance of interrupting + * wait_for_completion_interruptible() in the kernel's tsync path. + */ + sd.target = pthread_self(); + sd.stop = false; + ASSERT_EQ(0, pthread_create(&signaler, NULL, signaler_thread, &sd)); + + /* + * The syscall may be interrupted and transparently restarted by the + * kernel (ERESTARTNOINTR). From userspace, it should always succeed. + */ + EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, + LANDLOCK_RESTRICT_SELF_TSYNC)); + + sd.stop = true; + ASSERT_EQ(0, pthread_join(signaler, NULL)); + + for (i = 0; i < NUM_IDLE_THREADS; i++) { + ASSERT_EQ(0, pthread_cancel(threads[i])); + ASSERT_EQ(0, pthread_join(threads[i], NULL)); + } + + EXPECT_EQ(0, close(ruleset_fd)); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index c5694cc4ddd26a..829f72c8ee070b 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -868,6 +868,64 @@ fib6_gc_test() check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l) log_test $ret 0 "ipv6 route garbage collection (replace with permanent)" + # Delete dummy_10 and remove all routes + $IP link del dev dummy_10 + + # rd6 is required for the next test. (ipv6toolkit) + if [ ! -x "$(command -v rd6)" ]; then + echo "SKIP: rd6 not found." + set +e + cleanup &> /dev/null + return + fi + + setup_ns ns2 + $IP link add veth1 type veth peer veth2 netns $ns2 + $IP link set veth1 up + ip -netns $ns2 link set veth2 up + $IP addr add fe80:dead::1/64 dev veth1 + ip -netns $ns2 addr add fe80:dead::2/64 dev veth2 + + # Add NTF_ROUTER neighbour to prevent rt6_age_examine_exception() + # from removing not-yet-expired exceptions. + ip -netns $ns2 link set veth2 address 00:11:22:33:44:55 + $IP neigh add fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router + + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_redirects=1 + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.forwarding=0 + + # Temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:10::$i \ + via fe80:dead::2 dev veth1 expires $EXPIRE + + ip netns exec $ns2 rd6 -i veth2 \ + -s fe80:dead::2 -d fe80:dead::1 \ + -r 2001:10::$i -t fe80:dead::3 -p ICMP6 + done + + check_rt_num 5 $($IP -6 route list | grep expires | grep 2001:10:: | wc -l) + + # Promote to permanent routes by "prepend" (w/o NLM_F_EXCL and NLM_F_REPLACE) + for i in $(seq 1 5); do + # -EEXIST, but the temporary route becomes the permanent route. + $IP -6 route append 2001:10::$i \ + via fe80:dead::2 dev veth1 2>/dev/null || true + done + + check_rt_num 5 $($IP -6 route list | grep -v expires | grep 2001:10:: | wc -l) + check_rt_num 5 $($IP -6 route list cache | grep 2001:10:: | wc -l) + + # Trigger GC instead of waiting $GC_WAIT_TIME. + # rt6_nh_dump_exceptions() just skips expired exceptions. + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list cache | grep 2001:10:: | wc -l) + log_test $ret 0 "ipv6 route garbage collection (promote to permanent routes)" + + $IP neigh del fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router + $IP link del veth1 + # ra6 is required for the next test. (ipv6toolkit) if [ ! -x "$(command -v ra6)" ]; then echo "SKIP: ra6 not found." @@ -876,9 +934,6 @@ fib6_gc_test() return fi - # Delete dummy_10 and remove all routes - $IP link del dev dummy_10 - # Create a pair of veth devices to send a RA message from one # device to another. $IP link add veth1 type veth peer name veth2 diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 394166f224a4b3..ffdc6ccc6511c2 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -29,7 +29,8 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate insert_overlap" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate + insert_overlap load_flush_load4 load_flush_load8" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -432,6 +433,30 @@ race_repeat 0 perf_duration 0 " +TYPE_load_flush_load4=" +display reload with flush, 4bit groups +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_load_flush_load8=" +display reload with flush, 8bit groups +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1997,6 +2022,49 @@ test_bug_insert_overlap() return 0 } +test_bug_load_flush_load4() +{ + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + for i in $(seq 0 255); do + local addelem="add element inet filter test" + local j + + for j in $(seq 0 20); do + echo "$addelem { 10.$j.0.$i . 10.$j.1.$i }" + echo "$addelem { 10.$j.0.$i . 10.$j.2.$i }" + done + done > "$tmp" + + nft -f "$tmp" || return 1 + + ( echo "flush set inet filter test";cat "$tmp") | nft -f - + [ $? -eq 0 ] || return 1 + + return 0 +} + +test_bug_load_flush_load8() +{ + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + for i in $(seq 1 100); do + echo "add element inet filter test { 10.0.0.$i . 10.0.1.$i }" + echo "add element inet filter test { 10.0.0.$i . 10.0.2.$i }" + done > "$tmp" + + nft -f "$tmp" || return 1 + + ( echo "flush set inet filter test";cat "$tmp") | nft -f - + [ $? -eq 0 ] || return 1 + + return 0 +} + test_reported_issues() { eval test_bug_"${subtest}" }