io_uring/af_unix: defer registered files gc to io_uring release

Instead of putting io_uring's registered files in unix_gc() we want it
to be done by io_uring itself. The trick here is to consider io_uring
registered files for cycle detection but not actually putting them down.
Because io_uring can't register other ring instances, this will remove
all refs to the ring file triggering the ->release path and clean up
with io_ring_ctx_free().

Cc: stable@vger.kernel.org
Fixes: 6b06314c47 ("io_uring: add file set registration")
Reported-and-tested-by: David Bouman <dbouman03@gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
[axboe: add kerneldoc comment to skb, fold in skb leak fix]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Pavel Begunkov 2022-10-03 13:59:47 +01:00 committed by Jens Axboe
parent d7cce96c44
commit 0091bfc817
3 changed files with 23 additions and 0 deletions

View File

@ -803,6 +803,7 @@ typedef unsigned char *sk_buff_data_t;
* @csum_level: indicates the number of consecutive checksums found in * @csum_level: indicates the number of consecutive checksums found in
* the packet minus one that have been verified as * the packet minus one that have been verified as
* CHECKSUM_UNNECESSARY (max 3) * CHECKSUM_UNNECESSARY (max 3)
* @scm_io_uring: SKB holds io_uring registered files
* @dst_pending_confirm: need to confirm neighbour * @dst_pending_confirm: need to confirm neighbour
* @decrypted: Decrypted SKB * @decrypted: Decrypted SKB
* @slow_gro: state present at GRO time, slower prepare step required * @slow_gro: state present at GRO time, slower prepare step required
@ -982,6 +983,7 @@ struct sk_buff {
#endif #endif
__u8 slow_gro:1; __u8 slow_gro:1;
__u8 csum_not_inet:1; __u8 csum_not_inet:1;
__u8 scm_io_uring:1;
#ifdef CONFIG_NET_SCHED #ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */ __u16 tc_index; /* traffic control index */

View File

@ -855,6 +855,7 @@ int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
UNIXCB(skb).fp = fpl; UNIXCB(skb).fp = fpl;
skb->sk = sk; skb->sk = sk;
skb->scm_io_uring = 1;
skb->destructor = unix_destruct_scm; skb->destructor = unix_destruct_scm;
refcount_add(skb->truesize, &sk->sk_wmem_alloc); refcount_add(skb->truesize, &sk->sk_wmem_alloc);
} }

View File

@ -204,6 +204,7 @@ void wait_for_unix_gc(void)
/* The external entry point: unix_gc() */ /* The external entry point: unix_gc() */
void unix_gc(void) void unix_gc(void)
{ {
struct sk_buff *next_skb, *skb;
struct unix_sock *u; struct unix_sock *u;
struct unix_sock *next; struct unix_sock *next;
struct sk_buff_head hitlist; struct sk_buff_head hitlist;
@ -297,11 +298,30 @@ void unix_gc(void)
spin_unlock(&unix_gc_lock); spin_unlock(&unix_gc_lock);
/* We need io_uring to clean its registered files, ignore all io_uring
* originated skbs. It's fine as io_uring doesn't keep references to
* other io_uring instances and so killing all other files in the cycle
* will put all io_uring references forcing it to go through normal
* release.path eventually putting registered files.
*/
skb_queue_walk_safe(&hitlist, skb, next_skb) {
if (skb->scm_io_uring) {
__skb_unlink(skb, &hitlist);
skb_queue_tail(&skb->sk->sk_receive_queue, skb);
}
}
/* Here we are. Hitlist is filled. Die. */ /* Here we are. Hitlist is filled. Die. */
__skb_queue_purge(&hitlist); __skb_queue_purge(&hitlist);
spin_lock(&unix_gc_lock); spin_lock(&unix_gc_lock);
/* There could be io_uring registered files, just push them back to
* the inflight list
*/
list_for_each_entry_safe(u, next, &gc_candidates, link)
list_move_tail(&u->link, &gc_inflight_list);
/* All candidates should have been detached by now. */ /* All candidates should have been detached by now. */
BUG_ON(!list_empty(&gc_candidates)); BUG_ON(!list_empty(&gc_candidates));