blkcg: Restructure blkg_conf_prep() and friends
We want to support lazy init of rq-qos policies so that iolatency is enabled lazily on configuration instead of gendisk initialization. The way blkg config helpers are structured now is a bit awkward for that. Let's restructure: * blkcg_conf_open_bdev() is renamed to blkg_conf_open_bdev(). The blkcg_ prefix was used because the bdev opening step is blkg-independent. However, the distinction is too subtle and confuses more than helps. Let's switch to blkg prefix so that it's consistent with the type and other helper names. * struct blkg_conf_ctx now remembers the original input string and is always initialized by the new blkg_conf_init(). * blkg_conf_open_bdev() is updated to take a pointer to blkg_conf_ctx like blkg_conf_prep() and can be called multiple times safely. Instead of modifying the double pointer to input string directly, blkg_conf_open_bdev() now sets blkg_conf_ctx->body. * blkg_conf_finish() is renamed to blkg_conf_exit() for symmetry and now must be called on all blkg_conf_ctx's which were initialized with blkg_conf_init(). Combined, this allows the users to either open the bdev first or do it altogether with blkg_conf_prep() which will help implementing lazy init of rq-qos policies. blkg_conf_init/exit() will also be used implement synchronization against device removal. This is necessary because iolat / iocost are configured through cgroupfs instead of one of the files under /sys/block/DEVICE. As cgroupfs operations aren't synchronized with block layer, the lazy init and other configuration operations may race against device removal. This patch makes blkg_conf_init/exit() used consistently for all cgroup-orginating configurations making them a good place to implement explicit synchronization. Users are updated accordingly. No behavior change is intended by this patch. v2: bfq wasn't updated in v1 causing a build error. Fixed. v3: Update the description to include future use of blkg_conf_init/exit() as synchronization points. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Josef Bacik <josef@toxicpanda.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Yu Kuai <yukuai1@huaweicloud.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20230413000649.115785-3-tj@kernel.org Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
83462a6c97
commit
faffaab289
|
@ -1105,9 +1105,11 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
|
|||
struct bfq_group *bfqg;
|
||||
u64 v;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
if (sscanf(ctx.body, "%llu", &v) == 1) {
|
||||
/* require "default" on dfl */
|
||||
|
@ -1129,7 +1131,7 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
|
|||
ret = 0;
|
||||
}
|
||||
out:
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
|
|
|
@ -653,69 +653,93 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
|
|||
EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
|
||||
|
||||
/**
|
||||
* blkcg_conf_open_bdev - parse and open bdev for per-blkg config update
|
||||
* @inputp: input string pointer
|
||||
* blkg_conf_init - initialize a blkg_conf_ctx
|
||||
* @ctx: blkg_conf_ctx to initialize
|
||||
* @input: input string
|
||||
*
|
||||
* Parse the device node prefix part, MAJ:MIN, of per-blkg config update
|
||||
* from @input and get and return the matching bdev. *@inputp is
|
||||
* updated to point past the device node prefix. Returns an ERR_PTR()
|
||||
* value on error.
|
||||
*
|
||||
* Use this function iff blkg_conf_prep() can't be used for some reason.
|
||||
* Initialize @ctx which can be used to parse blkg config input string @input.
|
||||
* Once initialized, @ctx can be used with blkg_conf_open_bdev() and
|
||||
* blkg_conf_prep(), and must be cleaned up with blkg_conf_exit().
|
||||
*/
|
||||
struct block_device *blkcg_conf_open_bdev(char **inputp)
|
||||
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input)
|
||||
{
|
||||
char *input = *inputp;
|
||||
*ctx = (struct blkg_conf_ctx){ .input = input };
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_init);
|
||||
|
||||
/**
|
||||
* blkg_conf_open_bdev - parse and open bdev for per-blkg config update
|
||||
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
|
||||
*
|
||||
* Parse the device node prefix part, MAJ:MIN, of per-blkg config update from
|
||||
* @ctx->input and get and store the matching bdev in @ctx->bdev. @ctx->body is
|
||||
* set to point past the device node prefix.
|
||||
*
|
||||
* This function may be called multiple times on @ctx and the extra calls become
|
||||
* NOOPs. blkg_conf_prep() implicitly calls this function. Use this function
|
||||
* explicitly if bdev access is needed without resolving the blkcg / policy part
|
||||
* of @ctx->input. Returns -errno on error.
|
||||
*/
|
||||
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
|
||||
{
|
||||
char *input = ctx->input;
|
||||
unsigned int major, minor;
|
||||
struct block_device *bdev;
|
||||
int key_len;
|
||||
|
||||
if (ctx->bdev)
|
||||
return 0;
|
||||
|
||||
if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
|
||||
return ERR_PTR(-EINVAL);
|
||||
return -EINVAL;
|
||||
|
||||
input += key_len;
|
||||
if (!isspace(*input))
|
||||
return ERR_PTR(-EINVAL);
|
||||
return -EINVAL;
|
||||
input = skip_spaces(input);
|
||||
|
||||
bdev = blkdev_get_no_open(MKDEV(major, minor));
|
||||
if (!bdev)
|
||||
return ERR_PTR(-ENODEV);
|
||||
return -ENODEV;
|
||||
if (bdev_is_partition(bdev)) {
|
||||
blkdev_put_no_open(bdev);
|
||||
return ERR_PTR(-ENODEV);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
*inputp = input;
|
||||
return bdev;
|
||||
ctx->body = input;
|
||||
ctx->bdev = bdev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_conf_prep - parse and prepare for per-blkg config update
|
||||
* @blkcg: target block cgroup
|
||||
* @pol: target policy
|
||||
* @input: input string
|
||||
* @ctx: blkg_conf_ctx to be filled
|
||||
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
|
||||
*
|
||||
* Parse per-blkg config update from @input and initialize @ctx with the
|
||||
* result. @ctx->blkg points to the blkg to be updated and @ctx->body the
|
||||
* part of @input following MAJ:MIN. This function returns with queue lock
|
||||
* held and must be paired with blkg_conf_finish().
|
||||
* Parse per-blkg config update from @ctx->input and initialize @ctx
|
||||
* accordingly. On success, @ctx->body points to the part of @ctx->input
|
||||
* following MAJ:MIN, @ctx->bdev points to the target block device and
|
||||
* @ctx->blkg to the blkg being configured.
|
||||
*
|
||||
* blkg_conf_open_bdev() may be called on @ctx beforehand. On success, this
|
||||
* function returns with queue lock held and must be followed by
|
||||
* blkg_conf_exit().
|
||||
*/
|
||||
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
char *input, struct blkg_conf_ctx *ctx)
|
||||
struct blkg_conf_ctx *ctx)
|
||||
__acquires(&bdev->bd_queue->queue_lock)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
struct gendisk *disk;
|
||||
struct request_queue *q;
|
||||
struct blkcg_gq *blkg;
|
||||
int ret;
|
||||
|
||||
bdev = blkcg_conf_open_bdev(&input);
|
||||
if (IS_ERR(bdev))
|
||||
return PTR_ERR(bdev);
|
||||
disk = bdev->bd_disk;
|
||||
ret = blkg_conf_open_bdev(ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
disk = ctx->bdev->bd_disk;
|
||||
q = disk->queue;
|
||||
|
||||
/*
|
||||
|
@ -793,9 +817,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
|||
}
|
||||
success:
|
||||
blk_queue_exit(q);
|
||||
ctx->bdev = bdev;
|
||||
ctx->blkg = blkg;
|
||||
ctx->body = input;
|
||||
return 0;
|
||||
|
||||
fail_preloaded:
|
||||
|
@ -805,7 +827,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
|||
fail_exit_queue:
|
||||
blk_queue_exit(q);
|
||||
fail:
|
||||
blkdev_put_no_open(bdev);
|
||||
/*
|
||||
* If queue was bypassing, we should retry. Do so after a
|
||||
* short msleep(). It isn't strictly necessary but queue
|
||||
|
@ -821,19 +842,27 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
|||
EXPORT_SYMBOL_GPL(blkg_conf_prep);
|
||||
|
||||
/**
|
||||
* blkg_conf_finish - finish up per-blkg config update
|
||||
* @ctx: blkg_conf_ctx initialized by blkg_conf_prep()
|
||||
* blkg_conf_exit - clean up per-blkg config update
|
||||
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
|
||||
*
|
||||
* Finish up after per-blkg config update. This function must be paired
|
||||
* with blkg_conf_prep().
|
||||
* Clean up after per-blkg config update. This function must be called on all
|
||||
* blkg_conf_ctx's initialized with blkg_conf_init().
|
||||
*/
|
||||
void blkg_conf_finish(struct blkg_conf_ctx *ctx)
|
||||
void blkg_conf_exit(struct blkg_conf_ctx *ctx)
|
||||
__releases(&ctx->bdev->bd_queue->queue_lock)
|
||||
{
|
||||
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
|
||||
blkdev_put_no_open(ctx->bdev);
|
||||
if (ctx->blkg) {
|
||||
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
|
||||
ctx->blkg = NULL;
|
||||
}
|
||||
|
||||
if (ctx->bdev) {
|
||||
blkdev_put_no_open(ctx->bdev);
|
||||
ctx->body = NULL;
|
||||
ctx->bdev = NULL;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_finish);
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_exit);
|
||||
|
||||
static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
|
|
|
@ -206,15 +206,17 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
|
|||
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
|
||||
|
||||
struct blkg_conf_ctx {
|
||||
char *input;
|
||||
char *body;
|
||||
struct block_device *bdev;
|
||||
struct blkcg_gq *blkg;
|
||||
char *body;
|
||||
};
|
||||
|
||||
struct block_device *blkcg_conf_open_bdev(char **inputp);
|
||||
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input);
|
||||
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx);
|
||||
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
char *input, struct blkg_conf_ctx *ctx);
|
||||
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
|
||||
struct blkg_conf_ctx *ctx);
|
||||
void blkg_conf_exit(struct blkg_conf_ctx *ctx);
|
||||
|
||||
/**
|
||||
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
|
||||
|
|
|
@ -3106,9 +3106,11 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
|
|||
return nbytes;
|
||||
}
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err;
|
||||
|
||||
iocg = blkg_to_iocg(ctx.blkg);
|
||||
|
||||
|
@ -3127,12 +3129,14 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
|
|||
weight_updated(iocg, &now);
|
||||
spin_unlock(&iocg->ioc->lock);
|
||||
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return nbytes;
|
||||
|
||||
einval:
|
||||
blkg_conf_finish(&ctx);
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
err:
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
|
@ -3189,19 +3193,22 @@ static const match_table_t qos_tokens = {
|
|||
static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
struct blkg_conf_ctx ctx;
|
||||
struct gendisk *disk;
|
||||
struct ioc *ioc;
|
||||
u32 qos[NR_QOS_PARAMS];
|
||||
bool enable, user;
|
||||
char *p;
|
||||
char *body, *p;
|
||||
int ret;
|
||||
|
||||
bdev = blkcg_conf_open_bdev(&input);
|
||||
if (IS_ERR(bdev))
|
||||
return PTR_ERR(bdev);
|
||||
blkg_conf_init(&ctx, input);
|
||||
|
||||
disk = bdev->bd_disk;
|
||||
ret = blkg_conf_open_bdev(&ctx);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
body = ctx.body;
|
||||
disk = ctx.bdev->bd_disk;
|
||||
if (!queue_is_mq(disk->queue)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto err;
|
||||
|
@ -3223,7 +3230,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
|||
enable = ioc->enabled;
|
||||
user = ioc->user_qos_params;
|
||||
|
||||
while ((p = strsep(&input, " \t\n"))) {
|
||||
while ((p = strsep(&body, " \t\n"))) {
|
||||
substring_t args[MAX_OPT_ARGS];
|
||||
char buf[32];
|
||||
int tok;
|
||||
|
@ -3313,7 +3320,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
|||
blk_mq_unquiesce_queue(disk->queue);
|
||||
blk_mq_unfreeze_queue(disk->queue);
|
||||
|
||||
blkdev_put_no_open(bdev);
|
||||
blkg_conf_exit(&ctx);
|
||||
return nbytes;
|
||||
einval:
|
||||
spin_unlock_irq(&ioc->lock);
|
||||
|
@ -3323,7 +3330,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
|||
|
||||
ret = -EINVAL;
|
||||
err:
|
||||
blkdev_put_no_open(bdev);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -3376,19 +3383,22 @@ static const match_table_t i_lcoef_tokens = {
|
|||
static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
struct blkg_conf_ctx ctx;
|
||||
struct request_queue *q;
|
||||
struct ioc *ioc;
|
||||
u64 u[NR_I_LCOEFS];
|
||||
bool user;
|
||||
char *p;
|
||||
char *body, *p;
|
||||
int ret;
|
||||
|
||||
bdev = blkcg_conf_open_bdev(&input);
|
||||
if (IS_ERR(bdev))
|
||||
return PTR_ERR(bdev);
|
||||
blkg_conf_init(&ctx, input);
|
||||
|
||||
q = bdev_get_queue(bdev);
|
||||
ret = blkg_conf_open_bdev(&ctx);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
body = ctx.body;
|
||||
q = bdev_get_queue(ctx.bdev);
|
||||
if (!queue_is_mq(q)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto err;
|
||||
|
@ -3396,7 +3406,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
|||
|
||||
ioc = q_to_ioc(q);
|
||||
if (!ioc) {
|
||||
ret = blk_iocost_init(bdev->bd_disk);
|
||||
ret = blk_iocost_init(ctx.bdev->bd_disk);
|
||||
if (ret)
|
||||
goto err;
|
||||
ioc = q_to_ioc(q);
|
||||
|
@ -3409,7 +3419,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
|||
memcpy(u, ioc->params.i_lcoefs, sizeof(u));
|
||||
user = ioc->user_cost_model;
|
||||
|
||||
while ((p = strsep(&input, " \t\n"))) {
|
||||
while ((p = strsep(&body, " \t\n"))) {
|
||||
substring_t args[MAX_OPT_ARGS];
|
||||
char buf[32];
|
||||
int tok;
|
||||
|
@ -3456,7 +3466,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
|||
blk_mq_unquiesce_queue(q);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
blkdev_put_no_open(bdev);
|
||||
blkg_conf_exit(&ctx);
|
||||
return nbytes;
|
||||
|
||||
einval:
|
||||
|
@ -3467,7 +3477,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
|||
|
||||
ret = -EINVAL;
|
||||
err:
|
||||
blkdev_put_no_open(bdev);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -836,9 +836,11 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
|||
u64 oldval;
|
||||
int ret;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
iolat = blkg_to_lat(ctx.blkg);
|
||||
p = ctx.body;
|
||||
|
@ -874,7 +876,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
|||
iolatency_clear_scaling(blkg);
|
||||
ret = 0;
|
||||
out:
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
|
|
|
@ -1368,9 +1368,11 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
|
|||
int ret;
|
||||
u64 v;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out_finish;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (sscanf(ctx.body, "%llu", &v) != 1)
|
||||
|
@ -1389,7 +1391,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
|
|||
tg_conf_updated(tg, false);
|
||||
ret = 0;
|
||||
out_finish:
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
|
@ -1561,9 +1563,11 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
|
|||
int ret;
|
||||
int index = of_cft(of)->private;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out_finish;
|
||||
|
||||
tg = blkg_to_tg(ctx.blkg);
|
||||
tg_update_carryover(tg);
|
||||
|
@ -1662,7 +1666,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
|
|||
tg->td->limit_valid[LIMIT_LOW]);
|
||||
ret = 0;
|
||||
out_finish:
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue