md: Whenassemble the array, consult the superblock of the freshest device
[ Upstream commit dc1cc22ed58f11d58d8553c5ec5f11cbfc3e3039 ] Upon assembling the array, both kernel and mdadm allow the devices to have event counter difference of 1, and still consider them as up-to-date. However, a device whose event count is behind by 1, may in fact not be up-to-date, and array resync with such a device may cause data corruption. To avoid this, consult the superblock of the freshest device about the status of a device, whose event counter is behind by 1. Signed-off-by: Alex Lyakas <alex.lyakas@zadara.com> Signed-off-by: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/1702470271-16073-1-git-send-email-alex.lyakas@zadara.com Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
8ae4201900
commit
fd9a2c7003
|
@ -1145,6 +1145,7 @@ struct super_type {
|
|||
struct md_rdev *refdev,
|
||||
int minor_version);
|
||||
int (*validate_super)(struct mddev *mddev,
|
||||
struct md_rdev *freshest,
|
||||
struct md_rdev *rdev);
|
||||
void (*sync_super)(struct mddev *mddev,
|
||||
struct md_rdev *rdev);
|
||||
|
@ -1282,8 +1283,9 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
|
|||
|
||||
/*
|
||||
* validate_super for 0.90.0
|
||||
* note: we are not using "freshest" for 0.9 superblock
|
||||
*/
|
||||
static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
|
||||
{
|
||||
mdp_disk_t *desc;
|
||||
mdp_super_t *sb = page_address(rdev->sb_page);
|
||||
|
@ -1795,7 +1797,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
|
||||
{
|
||||
struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
|
||||
__u64 ev1 = le64_to_cpu(sb->events);
|
||||
|
@ -1891,13 +1893,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
|
|||
}
|
||||
} else if (mddev->pers == NULL) {
|
||||
/* Insist of good event counter while assembling, except for
|
||||
* spares (which don't need an event count) */
|
||||
++ev1;
|
||||
* spares (which don't need an event count).
|
||||
* Similar to mdadm, we allow event counter difference of 1
|
||||
* from the freshest device.
|
||||
*/
|
||||
if (rdev->desc_nr >= 0 &&
|
||||
rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
|
||||
(le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
|
||||
le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
|
||||
if (ev1 < mddev->events)
|
||||
if (ev1 + 1 < mddev->events)
|
||||
return -EINVAL;
|
||||
} else if (mddev->bitmap) {
|
||||
/* If adding to array with a bitmap, then we can accept an
|
||||
|
@ -1918,8 +1922,38 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
|
|||
rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
|
||||
role = MD_DISK_ROLE_SPARE;
|
||||
rdev->desc_nr = -1;
|
||||
} else
|
||||
} else if (mddev->pers == NULL && freshest && ev1 < mddev->events) {
|
||||
/*
|
||||
* If we are assembling, and our event counter is smaller than the
|
||||
* highest event counter, we cannot trust our superblock about the role.
|
||||
* It could happen that our rdev was marked as Faulty, and all other
|
||||
* superblocks were updated with +1 event counter.
|
||||
* Then, before the next superblock update, which typically happens when
|
||||
* remove_and_add_spares() removes the device from the array, there was
|
||||
* a crash or reboot.
|
||||
* If we allow current rdev without consulting the freshest superblock,
|
||||
* we could cause data corruption.
|
||||
* Note that in this case our event counter is smaller by 1 than the
|
||||
* highest, otherwise, this rdev would not be allowed into array;
|
||||
* both kernel and mdadm allow event counter difference of 1.
|
||||
*/
|
||||
struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page);
|
||||
u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev);
|
||||
|
||||
if (rdev->desc_nr >= freshest_max_dev) {
|
||||
/* this is unexpected, better not proceed */
|
||||
pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n",
|
||||
mdname(mddev), rdev->bdev, rdev->desc_nr,
|
||||
freshest->bdev, freshest_max_dev);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]);
|
||||
pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n",
|
||||
mdname(mddev), rdev->bdev, role, role, freshest->bdev);
|
||||
} else {
|
||||
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
|
||||
}
|
||||
switch(role) {
|
||||
case MD_DISK_ROLE_SPARE: /* spare */
|
||||
break;
|
||||
|
@ -2861,7 +2895,7 @@ static int add_bound_rdev(struct md_rdev *rdev)
|
|||
* and should be added immediately.
|
||||
*/
|
||||
super_types[mddev->major_version].
|
||||
validate_super(mddev, rdev);
|
||||
validate_super(mddev, NULL/*freshest*/, rdev);
|
||||
if (add_journal)
|
||||
mddev_suspend(mddev);
|
||||
err = mddev->pers->hot_add_disk(mddev, rdev);
|
||||
|
@ -3775,7 +3809,7 @@ static int analyze_sbs(struct mddev *mddev)
|
|||
}
|
||||
|
||||
super_types[mddev->major_version].
|
||||
validate_super(mddev, freshest);
|
||||
validate_super(mddev, NULL/*freshest*/, freshest);
|
||||
|
||||
i = 0;
|
||||
rdev_for_each_safe(rdev, tmp, mddev) {
|
||||
|
@ -3790,7 +3824,7 @@ static int analyze_sbs(struct mddev *mddev)
|
|||
}
|
||||
if (rdev != freshest) {
|
||||
if (super_types[mddev->major_version].
|
||||
validate_super(mddev, rdev)) {
|
||||
validate_super(mddev, freshest, rdev)) {
|
||||
pr_warn("md: kicking non-fresh %pg from array!\n",
|
||||
rdev->bdev);
|
||||
md_kick_rdev_from_array(rdev);
|
||||
|
@ -6804,7 +6838,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
|
|||
rdev->saved_raid_disk = rdev->raid_disk;
|
||||
} else
|
||||
super_types[mddev->major_version].
|
||||
validate_super(mddev, rdev);
|
||||
validate_super(mddev, NULL/*freshest*/, rdev);
|
||||
if ((info->state & (1<<MD_DISK_SYNC)) &&
|
||||
rdev->raid_disk != info->raid_disk) {
|
||||
/* This was a hot-add request, but events doesn't
|
||||
|
|
Loading…
Reference in New Issue