From bc74a7cbb77eeb2902a04b84d0c66b3dee6d0df6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Thu, 25 Sep 2025 21:04:12 +0200 Subject: [PATCH 01/11] smb: client: fix potential OOB in smb2_dump_detail() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-6795 cve CVE-2023-6610 commit-author Paulo Alcantara commit 567320c46a60a3c39b69aa1df802d753817a3f86 Validate SMB message with ->check_message() before calling ->calc_smb_size(). This fixes CVE-2023-6610. Reported-by: j51569436@gmail.com Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218219 Cc; stable@vger.kernel.org Signed-off-by: Paulo Alcantara Signed-off-by: Steve French (cherry picked from commit 567320c46a60a3c39b69aa1df802d753817a3f86) Signed-off-by: Marcin Wcisło --- fs/cifs/smb2misc.c | 30 +++++++++++++++--------------- fs/cifs/smb2ops.c | 6 ++++-- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index d73e5672aac49..bbe424bff78fa 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -169,6 +169,21 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) } mid = le64_to_cpu(shdr->MessageId); + if (check_smb2_hdr(shdr, mid)) + return 1; + + if (shdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) { + cifs_dbg(VFS, "Invalid structure size %u\n", + le16_to_cpu(shdr->StructureSize)); + return 1; + } + + command = le16_to_cpu(shdr->Command); + if (command >= NUMBER_OF_SMB2_COMMANDS) { + cifs_dbg(VFS, "Invalid SMB2 command %d\n", command); + return 1; + } + if (len < pdu_size) { if ((len >= hdr_size) && (shdr->Status != 0)) { @@ -189,21 +204,6 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) return 1; } - if (check_smb2_hdr(shdr, mid)) - return 1; - - if (shdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) { - cifs_dbg(VFS, "Invalid structure size %u\n", - le16_to_cpu(shdr->StructureSize)); - return 1; - } - - command = le16_to_cpu(shdr->Command); - if (command >= NUMBER_OF_SMB2_COMMANDS) { - cifs_dbg(VFS, "Invalid SMB2 command %d\n", command); - return 1; - } - if (smb2_rsp_struct_sizes[command] != pdu->StructureSize2) { if (command != SMB2_OPLOCK_BREAK_HE && (shdr->Status == 0 || pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2_LE)) { diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 26ed194bfd5b7..2846207ec61c6 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -386,8 +386,10 @@ smb2_dump_detail(void *buf, struct TCP_Server_Info *server) cifs_server_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Mid: %llu Pid: %d\n", shdr->Command, shdr->Status, shdr->Flags, shdr->MessageId, shdr->Id.SyncId.ProcessId); - cifs_server_dbg(VFS, "smb buf %p len %u\n", buf, - server->ops->calc_smb_size(buf)); + if (!server->ops->check_message(buf, server->total_read, server)) { + cifs_server_dbg(VFS, "smb buf %p len %u\n", buf, + server->ops->calc_smb_size(buf)); + } #endif } From eac3216c0653790567d27883f8260d28fbca1d7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Thu, 25 Sep 2025 21:28:06 +0200 Subject: [PATCH 02/11] smb: client: fix OOB in smbCalcSize() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-6788 cve CVE-2023-6606 commit-author Paulo Alcantara commit b35858b3786ddbb56e1c35138ba25d6adf8d0bef Validate @smb->WordCount to avoid reading off the end of @smb and thus causing the following KASAN splat: BUG: KASAN: slab-out-of-bounds in smbCalcSize+0x32/0x40 [cifs] Read of size 2 at addr ffff88801c024ec5 by task cifsd/1328 CPU: 1 PID: 1328 Comm: cifsd Not tainted 6.7.0-rc5 #9 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 Call Trace: dump_stack_lvl+0x4a/0x80 print_report+0xcf/0x650 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? __phys_addr+0x46/0x90 kasan_report+0xd8/0x110 ? smbCalcSize+0x32/0x40 [cifs] ? smbCalcSize+0x32/0x40 [cifs] kasan_check_range+0x105/0x1b0 smbCalcSize+0x32/0x40 [cifs] checkSMB+0x162/0x370 [cifs] ? __pfx_checkSMB+0x10/0x10 [cifs] cifs_handle_standard+0xbc/0x2f0 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 cifs_demultiplex_thread+0xed1/0x1360 [cifs] ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 ? lockdep_hardirqs_on_prepare+0x136/0x210 ? __pfx_lock_release+0x10/0x10 ? srso_alias_return_thunk+0x5/0xfbef5 ? mark_held_locks+0x1a/0x90 ? lockdep_hardirqs_on_prepare+0x136/0x210 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? __kthread_parkme+0xce/0xf0 ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs] kthread+0x18d/0x1d0 ? kthread+0xdb/0x1d0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x60 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 This fixes CVE-2023-6606. Reported-by: j51569436@gmail.com Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218218 Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French (cherry picked from commit b35858b3786ddbb56e1c35138ba25d6adf8d0bef) Signed-off-by: Marcin Wcisło --- fs/cifs/misc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 709e4408ac841..c3d489c3f12ee 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -349,6 +349,10 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server) cifs_dbg(VFS, "Length less than smb header size\n"); } return -EIO; + } else if (total_read < sizeof(*smb) + 2 * smb->WordCount) { + cifs_dbg(VFS, "%s: can't read BCC due to invalid WordCount(%u)\n", + __func__, smb->WordCount); + return -EIO; } /* otherwise, there is enough to get to the BCC */ From 89621d387a9d02ea656f390863c4cab52866e774 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Thu, 25 Sep 2025 22:00:47 +0200 Subject: [PATCH 03/11] RDMA/core: Refactor rdma_bind_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-6364 cve CVE-2023-2176 commit-author Patrisious Haddad commit 8d037973d48c026224ab285e6a06985ccac6f7bf Refactor rdma_bind_addr function so that it doesn't require that the cma destination address be changed before calling it. So now it will update the destination address internally only when it is really needed and after passing all the required checks. Which in turn results in a cleaner and more sensible call and error handling flows for the functions that call it directly or indirectly. Signed-off-by: Patrisious Haddad Reported-by: Wei Chen Reviewed-by: Mark Zhang Link: https://lore.kernel.org/r/3d0e9a2fd62bc10ba02fed1c7c48a48638952320.1672819273.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky (cherry picked from commit 8d037973d48c026224ab285e6a06985ccac6f7bf) Signed-off-by: Marcin Wcisło --- drivers/infiniband/core/cma.c | 253 +++++++++++++++++----------------- 1 file changed, 130 insertions(+), 123 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index ff8821f79feca..e26391bf28d85 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3493,121 +3493,6 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) return ret; } -static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr) -{ - struct sockaddr_storage zero_sock = {}; - - if (src_addr && src_addr->sa_family) - return rdma_bind_addr(id, src_addr); - - /* - * When the src_addr is not specified, automatically supply an any addr - */ - zero_sock.ss_family = dst_addr->sa_family; - if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { - struct sockaddr_in6 *src_addr6 = - (struct sockaddr_in6 *)&zero_sock; - struct sockaddr_in6 *dst_addr6 = - (struct sockaddr_in6 *)dst_addr; - - src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; - if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - id->route.addr.dev_addr.bound_dev_if = - dst_addr6->sin6_scope_id; - } else if (dst_addr->sa_family == AF_IB) { - ((struct sockaddr_ib *)&zero_sock)->sib_pkey = - ((struct sockaddr_ib *)dst_addr)->sib_pkey; - } - return rdma_bind_addr(id, (struct sockaddr *)&zero_sock); -} - -/* - * If required, resolve the source address for bind and leave the id_priv in - * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior - * calls made by ULP, a previously bound ID will not be re-bound and src_addr is - * ignored. - */ -static int resolve_prepare_src(struct rdma_id_private *id_priv, - struct sockaddr *src_addr, - const struct sockaddr *dst_addr) -{ - int ret; - - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { - /* For a well behaved ULP state will be RDMA_CM_IDLE */ - ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); - if (ret) - goto err_dst; - if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, - RDMA_CM_ADDR_QUERY))) { - ret = -EINVAL; - goto err_dst; - } - } - - if (cma_family(id_priv) != dst_addr->sa_family) { - ret = -EINVAL; - goto err_state; - } - return 0; - -err_state: - cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); -err_dst: - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); - return ret; -} - -int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr, unsigned long timeout_ms) -{ - struct rdma_id_private *id_priv = - container_of(id, struct rdma_id_private, id); - int ret; - - ret = resolve_prepare_src(id_priv, src_addr, dst_addr); - if (ret) - return ret; - - if (cma_any_addr(dst_addr)) { - ret = cma_resolve_loopback(id_priv); - } else { - if (dst_addr->sa_family == AF_IB) { - ret = cma_resolve_ib_addr(id_priv); - } else { - /* - * The FSM can return back to RDMA_CM_ADDR_BOUND after - * rdma_resolve_ip() is called, eg through the error - * path in addr_handler(). If this happens the existing - * request must be canceled before issuing a new one. - * Since canceling a request is a bit slow and this - * oddball path is rare, keep track once a request has - * been issued. The track turns out to be a permanent - * state since this is the only cancel as it is - * immediately before rdma_resolve_ip(). - */ - if (id_priv->used_resolve_ip) - rdma_addr_cancel(&id->route.addr.dev_addr); - else - id_priv->used_resolve_ip = 1; - ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, - &id->route.addr.dev_addr, - timeout_ms, addr_handler, - false, id_priv); - } - } - if (ret) - goto err; - - return 0; -err: - cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); - return ret; -} -EXPORT_SYMBOL(rdma_resolve_addr); - int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) { struct rdma_id_private *id_priv; @@ -4010,27 +3895,26 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) } EXPORT_SYMBOL(rdma_listen); -int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +static int rdma_bind_addr_dst(struct rdma_id_private *id_priv, + struct sockaddr *addr, const struct sockaddr *daddr) { - struct rdma_id_private *id_priv; + struct sockaddr *id_daddr; int ret; - struct sockaddr *daddr; if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && addr->sa_family != AF_IB) return -EAFNOSUPPORT; - id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) return -EINVAL; - ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); + ret = cma_check_linklocal(&id_priv->id.route.addr.dev_addr, addr); if (ret) goto err1; memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); if (!cma_any_addr(addr)) { - ret = cma_translate_addr(addr, &id->route.addr.dev_addr); + ret = cma_translate_addr(addr, &id_priv->id.route.addr.dev_addr); if (ret) goto err1; @@ -4050,8 +3934,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) } #endif } - daddr = cma_dst_addr(id_priv); - daddr->sa_family = addr->sa_family; + id_daddr = cma_dst_addr(id_priv); + if (daddr != id_daddr) + memcpy(id_daddr, daddr, rdma_addr_size(addr)); + id_daddr->sa_family = addr->sa_family; ret = cma_get_port(id_priv); if (ret) @@ -4067,6 +3953,127 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); return ret; } + +static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + struct sockaddr_storage zero_sock = {}; + + if (src_addr && src_addr->sa_family) + return rdma_bind_addr_dst(id_priv, src_addr, dst_addr); + + /* + * When the src_addr is not specified, automatically supply an any addr + */ + zero_sock.ss_family = dst_addr->sa_family; + if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { + struct sockaddr_in6 *src_addr6 = + (struct sockaddr_in6 *)&zero_sock; + struct sockaddr_in6 *dst_addr6 = + (struct sockaddr_in6 *)dst_addr; + + src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; + if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + id->route.addr.dev_addr.bound_dev_if = + dst_addr6->sin6_scope_id; + } else if (dst_addr->sa_family == AF_IB) { + ((struct sockaddr_ib *)&zero_sock)->sib_pkey = + ((struct sockaddr_ib *)dst_addr)->sib_pkey; + } + return rdma_bind_addr_dst(id_priv, (struct sockaddr *)&zero_sock, dst_addr); +} + +/* + * If required, resolve the source address for bind and leave the id_priv in + * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior + * calls made by ULP, a previously bound ID will not be re-bound and src_addr is + * ignored. + */ +static int resolve_prepare_src(struct rdma_id_private *id_priv, + struct sockaddr *src_addr, + const struct sockaddr *dst_addr) +{ + int ret; + + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + /* For a well behaved ULP state will be RDMA_CM_IDLE */ + ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); + if (ret) + return ret; + if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, + RDMA_CM_ADDR_QUERY))) + return -EINVAL; + + } + + if (cma_family(id_priv) != dst_addr->sa_family) { + ret = -EINVAL; + goto err_state; + } + return 0; + +err_state: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); + return ret; +} + +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr, unsigned long timeout_ms) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + int ret; + + ret = resolve_prepare_src(id_priv, src_addr, dst_addr); + if (ret) + return ret; + + if (cma_any_addr(dst_addr)) { + ret = cma_resolve_loopback(id_priv); + } else { + if (dst_addr->sa_family == AF_IB) { + ret = cma_resolve_ib_addr(id_priv); + } else { + /* + * The FSM can return back to RDMA_CM_ADDR_BOUND after + * rdma_resolve_ip() is called, eg through the error + * path in addr_handler(). If this happens the existing + * request must be canceled before issuing a new one. + * Since canceling a request is a bit slow and this + * oddball path is rare, keep track once a request has + * been issued. The track turns out to be a permanent + * state since this is the only cancel as it is + * immediately before rdma_resolve_ip(). + */ + if (id_priv->used_resolve_ip) + rdma_addr_cancel(&id->route.addr.dev_addr); + else + id_priv->used_resolve_ip = 1; + ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, + &id->route.addr.dev_addr, + timeout_ms, addr_handler, + false, id_priv); + } + } + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_addr); + +int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + return rdma_bind_addr_dst(id_priv, addr, cma_dst_addr(id_priv)); +} EXPORT_SYMBOL(rdma_bind_addr); static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) From ab9fc805c2e168f415382f0eb82b19de91e68544 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Thu, 25 Sep 2025 22:02:37 +0200 Subject: [PATCH 04/11] RDMA/core: Update CMA destination address on rdma_resolve_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-6364 cve-bf CVE-2023-2176 commit-author Shiraz Saleem commit 0e15863015d97c1ee2cc29d599abcc7fa2dc3e95 8d037973d48c ("RDMA/core: Refactor rdma_bind_addr") intoduces as regression on irdma devices on certain tests which uses rdma CM, such as cmtime. No connections can be established with the MAD QP experiences a fatal error on the active side. The cma destination address is not updated with the dst_addr when ULP on active side calls rdma_bind_addr followed by rdma_resolve_addr. The id_priv state is 'bound' in resolve_prepare_src and update is skipped. This leaves the dgid passed into irdma driver to create an Address Handle (AH) for the MAD QP at 0. The create AH descriptor as well as the ARP cache entry is invalid and HW throws an asynchronous events as result. [ 1207.656888] resolve_prepare_src caller: ucma_resolve_addr+0xff/0x170 [rdma_ucm] daddr=200.0.4.28 id_priv->state=7 [....] [ 1207.680362] ice 0000:07:00.1 rocep7s0f1: caller: irdma_create_ah+0x3e/0x70 [irdma] ah_id=0 arp_idx=0 dest_ip=0.0.0.0 destMAC=00:00:64:ca:b7:52 ipvalid=1 raw=0000:0000:0000:0000:0000:ffff:0000:0000 [ 1207.682077] ice 0000:07:00.1 rocep7s0f1: abnormal ae_id = 0x401 bool qp=1 qp_id = 1, ae_src=5 [ 1207.691657] infiniband rocep7s0f1: Fatal error (1) on MAD QP (1) Fix this by updating the CMA destination address when the ULP calls a resolve address with the CM state already bound. Fixes: 8d037973d48c ("RDMA/core: Refactor rdma_bind_addr") Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230712234133.1343-1-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky (cherry picked from commit 0e15863015d97c1ee2cc29d599abcc7fa2dc3e95) Signed-off-by: Marcin Wcisło --- drivers/infiniband/core/cma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e26391bf28d85..80d7ae9d073e7 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4006,6 +4006,8 @@ static int resolve_prepare_src(struct rdma_id_private *id_priv, RDMA_CM_ADDR_QUERY))) return -EINVAL; + } else { + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); } if (cma_family(id_priv) != dst_addr->sa_family) { From 7998c532c6f5be4f5826db28839a38cf62aaeec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 26 Sep 2025 00:01:27 +0200 Subject: [PATCH 05/11] ipv4: igmp: fix refcnt uaf issue when receiving igmp query packet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-708 cve CVE-2023-6932 commit-author Zhengchao Shao commit e2b706c691905fe78468c361aaabc719d0a496f1 When I perform the following test operations: 1.ip link add br0 type bridge 2.brctl addif br0 eth0 3.ip addr add 239.0.0.1/32 dev eth0 4.ip addr add 239.0.0.1/32 dev br0 5.ip addr add 224.0.0.1/32 dev br0 6.while ((1)) do ifconfig br0 up ifconfig br0 down done 7.send IGMPv2 query packets to port eth0 continuously. For example, ./mausezahn ethX -c 0 "01 00 5e 00 00 01 00 72 19 88 aa 02 08 00 45 00 00 1c 00 01 00 00 01 02 0e 7f c0 a8 0a b7 e0 00 00 01 11 64 ee 9b 00 00 00 00" The preceding tests may trigger the refcnt uaf issue of the mc list. The stack is as follows: refcount_t: addition on 0; use-after-free. WARNING: CPU: 21 PID: 144 at lib/refcount.c:25 refcount_warn_saturate (lib/refcount.c:25) CPU: 21 PID: 144 Comm: ksoftirqd/21 Kdump: loaded Not tainted 6.7.0-rc1-next-20231117-dirty #80 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:refcount_warn_saturate (lib/refcount.c:25) RSP: 0018:ffffb68f00657910 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff8a00c3bf96c0 RCX: ffff8a07b6160908 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff8a07b6160900 RBP: ffff8a00cba36862 R08: 0000000000000000 R09: 00000000ffff7fff R10: ffffb68f006577c0 R11: ffffffffb0fdcdc8 R12: ffff8a00c3bf9680 R13: ffff8a00c3bf96f0 R14: 0000000000000000 R15: ffff8a00d8766e00 FS: 0000000000000000(0000) GS:ffff8a07b6140000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f10b520b28 CR3: 000000039741a000 CR4: 00000000000006f0 Call Trace: igmp_heard_query (net/ipv4/igmp.c:1068) igmp_rcv (net/ipv4/igmp.c:1132) ip_protocol_deliver_rcu (net/ipv4/ip_input.c:205) ip_local_deliver_finish (net/ipv4/ip_input.c:234) __netif_receive_skb_one_core (net/core/dev.c:5529) netif_receive_skb_internal (net/core/dev.c:5729) netif_receive_skb (net/core/dev.c:5788) br_handle_frame_finish (net/bridge/br_input.c:216) nf_hook_bridge_pre (net/bridge/br_input.c:294) __netif_receive_skb_core (net/core/dev.c:5423) __netif_receive_skb_list_core (net/core/dev.c:5606) __netif_receive_skb_list (net/core/dev.c:5674) netif_receive_skb_list_internal (net/core/dev.c:5764) napi_gro_receive (net/core/gro.c:609) e1000_clean_rx_irq (drivers/net/ethernet/intel/e1000/e1000_main.c:4467) e1000_clean (drivers/net/ethernet/intel/e1000/e1000_main.c:3805) __napi_poll (net/core/dev.c:6533) net_rx_action (net/core/dev.c:6735) __do_softirq (kernel/softirq.c:554) run_ksoftirqd (kernel/softirq.c:913) smpboot_thread_fn (kernel/smpboot.c:164) kthread (kernel/kthread.c:388) ret_from_fork (arch/x86/kernel/process.c:153) ret_from_fork_asm (arch/x86/entry/entry_64.S:250) The root causes are as follows: Thread A Thread B ... netif_receive_skb br_dev_stop ... br_multicast_leave_snoopers ... __ip_mc_dec_group ... __igmp_group_dropped igmp_rcv igmp_stop_timer igmp_heard_query //ref = 1 ip_ma_put igmp_mod_timer refcount_dec_and_test igmp_start_timer //ref = 0 ... refcount_inc //ref increases from 0 When the device receives an IGMPv2 Query message, it starts the timer immediately, regardless of whether the device is running. If the device is down and has left the multicast group, it will cause the mc list refcount uaf issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Reviewed-by: Hangbin Liu Signed-off-by: David S. Miller (cherry picked from commit e2b706c691905fe78468c361aaabc719d0a496f1) Signed-off-by: Marcin Wcisło --- net/ipv4/igmp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 4b888c7acedc6..ebc7f203277b4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -216,8 +216,10 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) int tv = prandom_u32() % max_delay; im->tm_running = 1; - if (!mod_timer(&im->timer, jiffies+tv+2)) - refcount_inc(&im->refcnt); + if (refcount_inc_not_zero(&im->refcnt)) { + if (mod_timer(&im->timer, jiffies + tv + 2)) + ip_ma_put(im); + } } static void igmp_gq_start_timer(struct in_device *in_dev) From 008fd2cc4bdfda540899103e4bc14c09fa378a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 26 Sep 2025 00:12:57 +0200 Subject: [PATCH 06/11] perf: Fix perf_event_validate_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-700 cve CVE-2023-6931 commit-author Peter Zijlstra commit 382c27f4ed28f803b1f1473ac2d8db0afc795a1b Budimir noted that perf_event_validate_size() only checks the size of the newly added event, even though the sizes of all existing events can also change due to not all events having the same read_format. When we attach the new event, perf_group_attach(), we do re-compute the size for all events. Fixes: a723968c0ed3 ("perf: Fix u16 overflows") Reported-by: Budimir Markovic Signed-off-by: Peter Zijlstra (Intel) (cherry picked from commit 382c27f4ed28f803b1f1473ac2d8db0afc795a1b) Signed-off-by: Marcin Wcisło --- kernel/events/core.c | 61 +++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a273b8a17c0f9..9c2c6958210ad 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1806,31 +1806,34 @@ static inline void perf_event__state_init(struct perf_event *event) PERF_EVENT_STATE_INACTIVE; } -static void __perf_event_read_size(struct perf_event *event, int nr_siblings) +static int __perf_event_read_size(u64 read_format, int nr_siblings) { int entry = sizeof(u64); /* value */ int size = 0; int nr = 1; - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) size += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) size += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_ID) + if (read_format & PERF_FORMAT_ID) entry += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_LOST) + if (read_format & PERF_FORMAT_LOST) entry += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_GROUP) { + if (read_format & PERF_FORMAT_GROUP) { nr += nr_siblings; size += sizeof(u64); } - size += entry * nr; - event->read_size = size; + /* + * Since perf_event_validate_size() limits this to 16k and inhibits + * adding more siblings, this will never overflow. + */ + return size + nr * entry; } static void __perf_event_header_size(struct perf_event *event, u64 sample_type) @@ -1880,8 +1883,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) */ static void perf_event__header_size(struct perf_event *event) { - __perf_event_read_size(event, - event->group_leader->nr_siblings); + event->read_size = + __perf_event_read_size(event->attr.read_format, + event->group_leader->nr_siblings); __perf_event_header_size(event, event->attr.sample_type); } @@ -1912,24 +1916,35 @@ static void perf_event__id_header_size(struct perf_event *event) event->id_header_size = size; } +/* + * Check that adding an event to the group does not result in anybody + * overflowing the 64k event limit imposed by the output buffer. + * + * Specifically, check that the read_size for the event does not exceed 16k, + * read_size being the one term that grows with groups size. Since read_size + * depends on per-event read_format, also (re)check the existing events. + * + * This leaves 48k for the constant size fields and things like callchains, + * branch stacks and register sets. + */ static bool perf_event_validate_size(struct perf_event *event) { - /* - * The values computed here will be over-written when we actually - * attach the event. - */ - __perf_event_read_size(event, event->group_leader->nr_siblings + 1); - __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ); - perf_event__id_header_size(event); + struct perf_event *sibling, *group_leader = event->group_leader; - /* - * Sum the lot; should not exceed the 64k limit we have on records. - * Conservative limit to allow for callchains and other variable fields. - */ - if (event->read_size + event->header_size + - event->id_header_size + sizeof(struct perf_event_header) >= 16*1024) + if (__perf_event_read_size(event->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) return false; + if (__perf_event_read_size(group_leader->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) + return false; + + for_each_sibling_event(sibling, group_leader) { + if (__perf_event_read_size(sibling->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) + return false; + } + return true; } From bfdac78296670914d3481ffc2c9a58561a1dafff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 26 Sep 2025 00:14:39 +0200 Subject: [PATCH 07/11] perf: Fix perf_event_validate_size() lockdep splat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-700 cve-bf CVE-2023-6931 commit-author Mark Rutland commit 7e2c1e4b34f07d9aa8937fab88359d4a0fce468e When lockdep is enabled, the for_each_sibling_event(sibling, event) macro checks that event->ctx->mutex is held. When creating a new group leader event, we call perf_event_validate_size() on a partially initialized event where event->ctx is NULL, and so when for_each_sibling_event() attempts to check event->ctx->mutex, we get a splat, as reported by Lucas De Marchi: WARNING: CPU: 8 PID: 1471 at kernel/events/core.c:1950 __do_sys_perf_event_open+0xf37/0x1080 This only happens for a new event which is its own group_leader, and in this case there cannot be any sibling events. Thus it's safe to skip the check for siblings, which avoids having to make invasive and ugly changes to for_each_sibling_event(). Avoid the splat by bailing out early when the new event is its own group_leader. Fixes: 382c27f4ed28f803 ("perf: Fix perf_event_validate_size()") Closes: https://lore.kernel.org/lkml/20231214000620.3081018-1-lucas.demarchi@intel.com/ Closes: https://lore.kernel.org/lkml/ZXpm6gQ%2Fd59jGsuW@xpf.sh.intel.com/ Reported-by: Lucas De Marchi Reported-by: Pengfei Xu Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231215112450.3972309-1-mark.rutland@arm.com (cherry picked from commit 7e2c1e4b34f07d9aa8937fab88359d4a0fce468e) Signed-off-by: Marcin Wcisło --- kernel/events/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 9c2c6958210ad..b64e28847c092 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1939,6 +1939,16 @@ static bool perf_event_validate_size(struct perf_event *event) group_leader->nr_siblings + 1) > 16*1024) return false; + /* + * When creating a new group leader, group_leader->ctx is initialized + * after the size has been validated, but we cannot safely use + * for_each_sibling_event() until group_leader->ctx is set. A new group + * leader cannot have any siblings yet, so we can safely skip checking + * the non-existent siblings. + */ + if (event == group_leader) + return true; + for_each_sibling_event(sibling, group_leader) { if (__perf_event_read_size(sibling->attr.read_format, group_leader->nr_siblings + 1) > 16*1024) From fa69b43f8a56b084a4132c97928145fd7555f68b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 26 Sep 2025 00:32:18 +0200 Subject: [PATCH 08/11] vc_screen: move load of struct vc_data pointer in vcs_read() to avoid UAF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-358 jira VULN-55184 cve CVE-2023-3567 cve CVE-2023-52973 commit-author George Kennedy commit 226fae124b2dac217ea5436060d623ff3385bc34 After a call to console_unlock() in vcs_read() the vc_data struct can be freed by vc_deallocate(). Because of that, the struct vc_data pointer load must be done at the top of while loop in vcs_read() to avoid a UAF when vcs_size() is called. Syzkaller reported a UAF in vcs_size(). BUG: KASAN: use-after-free in vcs_size (drivers/tty/vt/vc_screen.c:215) Read of size 4 at addr ffff8881137479a8 by task 4a005ed81e27e65/1537 CPU: 0 PID: 1537 Comm: 4a005ed81e27e65 Not tainted 6.2.0-rc5 #1 Hardware name: Red Hat KVM, BIOS 1.15.0-2.module Call Trace: __asan_report_load4_noabort (mm/kasan/report_generic.c:350) vcs_size (drivers/tty/vt/vc_screen.c:215) vcs_read (drivers/tty/vt/vc_screen.c:415) vfs_read (fs/read_write.c:468 fs/read_write.c:450) ... Allocated by task 1191: ... kmalloc_trace (mm/slab_common.c:1069) vc_allocate (./include/linux/slab.h:580 ./include/linux/slab.h:720 drivers/tty/vt/vt.c:1128 drivers/tty/vt/vt.c:1108) con_install (drivers/tty/vt/vt.c:3383) tty_init_dev (drivers/tty/tty_io.c:1301 drivers/tty/tty_io.c:1413 drivers/tty/tty_io.c:1390) tty_open (drivers/tty/tty_io.c:2080 drivers/tty/tty_io.c:2126) chrdev_open (fs/char_dev.c:415) do_dentry_open (fs/open.c:883) vfs_open (fs/open.c:1014) ... Freed by task 1548: ... kfree (mm/slab_common.c:1021) vc_port_destruct (drivers/tty/vt/vt.c:1094) tty_port_destructor (drivers/tty/tty_port.c:296) tty_port_put (drivers/tty/tty_port.c:312) vt_disallocate_all (drivers/tty/vt/vt_ioctl.c:662 (discriminator 2)) vt_ioctl (drivers/tty/vt/vt_ioctl.c:903) tty_ioctl (drivers/tty/tty_io.c:2776) ... The buggy address belongs to the object at ffff888113747800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 424 bytes inside of 1024-byte region [ffff888113747800, ffff888113747c00) The buggy address belongs to the physical page: page:00000000b3fe6c7c refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x113740 head:00000000b3fe6c7c order:3 compound_mapcount:0 subpages_mapcount:0 compound_pincount:0 anon flags: 0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff) raw: 0017ffffc0010200 ffff888100042dc0 0000000000000000 dead000000000001 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888113747880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888113747900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > ffff888113747980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888113747a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888113747a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Disabling lock debugging due to kernel taint Fixes: ac751efa6a0d ("console: rename acquire/release_console_sem() to console_lock/unlock()") Reported-by: syzkaller Suggested-by: Jiri Slaby Signed-off-by: George Kennedy Link: https://lore.kernel.org/r/1674577014-12374-1-git-send-email-george.kennedy@oracle.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 226fae124b2dac217ea5436060d623ff3385bc34) Signed-off-by: Marcin Wcisło --- drivers/tty/vt/vc_screen.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c index 1850bacdb5b0e..f566eb1839dc5 100644 --- a/drivers/tty/vt/vc_screen.c +++ b/drivers/tty/vt/vc_screen.c @@ -386,10 +386,6 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) uni_mode = use_unicode(inode); attr = use_attributes(inode); - ret = -ENXIO; - vc = vcs_vc(inode, &viewed); - if (!vc) - goto unlock_out; ret = -EINVAL; if (pos < 0) @@ -407,6 +403,11 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) unsigned int this_round, skip = 0; int size; + ret = -ENXIO; + vc = vcs_vc(inode, &viewed); + if (!vc) + goto unlock_out; + /* Check whether we are above size each round, * as copy_to_user at the end of this loop * could sleep. From a09e234da306cf426ba064ce0707b9af1a368151 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 26 Sep 2025 00:32:28 +0200 Subject: [PATCH 09/11] vc_screen: modify vcs_size() handling in vcs_read() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-358 jira VULN-55184 cve-bf CVE-2023-3567 cve-bf CVE-2023-52973 commit-author George Kennedy commit 46d733d0efc79bc8430d63b57ab88011806d5180 Restore the vcs_size() handling in vcs_read() to what it had been in previous version. Fixes: 226fae124b2d ("vc_screen: move load of struct vc_data pointer in vcs_read() to avoid UAF") Suggested-by: Jiri Slaby Signed-off-by: George Kennedy Signed-off-by: Linus Torvalds (cherry picked from commit 46d733d0efc79bc8430d63b57ab88011806d5180) Signed-off-by: Marcin Wcisło --- drivers/tty/vt/vc_screen.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c index f566eb1839dc5..c0a2273bb998b 100644 --- a/drivers/tty/vt/vc_screen.c +++ b/drivers/tty/vt/vc_screen.c @@ -414,10 +414,8 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) */ size = vcs_size(vc, attr, uni_mode); if (size < 0) { - if (read) - break; ret = size; - goto unlock_out; + break; } if (pos >= size) break; From 23ca0c7424a32be783cc6b85a8d202cb594a87b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 26 Sep 2025 00:33:13 +0200 Subject: [PATCH 10/11] vc_screen: don't clobber return value in vcs_read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-358 jira VULN-55184 cve-bf CVE-2023-3567 cve-bf CVE-2023-52973 commit-author Thomas Weißschuh commit ae3419fbac845b4d3f3a9fae4cc80c68d82cdf6e Commit 226fae124b2d ("vc_screen: move load of struct vc_data pointer in vcs_read() to avoid UAF") moved the call to vcs_vc() into the loop. While doing this it also moved the unconditional assignment of ret = -ENXIO; This unconditional assignment was valid outside the loop but within it it clobbers the actual value of ret. To avoid this only assign "ret = -ENXIO" when actually needed. [ Also, the 'goto unlock_out" needs to be just a "break", so that it does the right thing when it exits on later iterations when partial success has happened - Linus ] Reported-by: Storm Dragon Link: https://lore.kernel.org/lkml/Y%2FKS6vdql2pIsCiI@hotmail.com/ Fixes: 226fae124b2d ("vc_screen: move load of struct vc_data pointer in vcs_read() to avoid UAF") Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/lkml/64981d94-d00c-4b31-9063-43ad0a384bde@t-8ch.de/ Signed-off-by: Linus Torvalds (cherry picked from commit ae3419fbac845b4d3f3a9fae4cc80c68d82cdf6e) Signed-off-by: Marcin Wcisło --- drivers/tty/vt/vc_screen.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c index c0a2273bb998b..1dc07f9214d57 100644 --- a/drivers/tty/vt/vc_screen.c +++ b/drivers/tty/vt/vc_screen.c @@ -403,10 +403,11 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) unsigned int this_round, skip = 0; int size; - ret = -ENXIO; vc = vcs_vc(inode, &viewed); - if (!vc) - goto unlock_out; + if (!vc) { + ret = -ENXIO; + break; + } /* Check whether we are above size each round, * as copy_to_user at the end of this loop From 94bacfb56e28f6b138052028968e0eacafb379d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 26 Sep 2025 00:45:51 +0200 Subject: [PATCH 11/11] ovl: fix use after free in struct ovl_aio_req MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-6260 cve CVE-2023-1252 commit-author yangerkun commit 9a254403760041528bc8f69fe2f5e1ef86950991 Example for triggering use after free in a overlay on ext4 setup: aio_read ovl_read_iter vfs_iter_read ext4_file_read_iter ext4_dio_read_iter iomap_dio_rw -> -EIOCBQUEUED /* * Here IO is completed in a separate thread, * ovl_aio_cleanup_handler() frees aio_req which has iocb embedded */ file_accessed(iocb->ki_filp); /**BOOM**/ Fix by introducing a refcount in ovl_aio_req similarly to aio_kiocb. This guarantees that iocb is only freed after vfs_read/write_iter() returns on underlying fs. Fixes: 2406a307ac7d ("ovl: implement async IO routines") Signed-off-by: yangerkun Link: https://lore.kernel.org/r/20210930032228.3199690-3-yangerkun@huawei.com/ Cc: # v5.6 Signed-off-by: Miklos Szeredi (cherry picked from commit 9a254403760041528bc8f69fe2f5e1ef86950991) Signed-off-by: Marcin Wcisło --- fs/overlayfs/file.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index caaf5612b5c0b..2c7511d40e8aa 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -17,6 +17,7 @@ struct ovl_aio_req { struct kiocb iocb; + refcount_t ref; struct kiocb *orig_iocb; struct fd fd; }; @@ -252,6 +253,14 @@ static rwf_t ovl_iocb_to_rwf(int ifl) return flags; } +static inline void ovl_aio_put(struct ovl_aio_req *aio_req) +{ + if (refcount_dec_and_test(&aio_req->ref)) { + fdput(aio_req->fd); + kmem_cache_free(ovl_aio_request_cachep, aio_req); + } +} + static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) { struct kiocb *iocb = &aio_req->iocb; @@ -268,8 +277,7 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) } orig_iocb->ki_pos = iocb->ki_pos; - fdput(aio_req->fd); - kmem_cache_free(ovl_aio_request_cachep, aio_req); + ovl_aio_put(aio_req); } static void ovl_aio_rw_complete(struct kiocb *iocb, long res) @@ -313,7 +321,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) aio_req->orig_iocb = iocb; kiocb_clone(&aio_req->iocb, iocb, real.file); aio_req->iocb.ki_complete = ovl_aio_rw_complete; + refcount_set(&aio_req->ref, 2); ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter); + ovl_aio_put(aio_req); if (ret != -EIOCBQUEUED) ovl_aio_cleanup_handler(aio_req); } @@ -378,7 +388,9 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) kiocb_clone(&aio_req->iocb, iocb, real.file); aio_req->iocb.ki_flags = ifl; aio_req->iocb.ki_complete = ovl_aio_rw_complete; + refcount_set(&aio_req->ref, 2); ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); + ovl_aio_put(aio_req); if (ret != -EIOCBQUEUED) ovl_aio_cleanup_handler(aio_req); }