btrfs-progs: scrub: Fix scrub cancel/resume not to skip most of the disk

When a scrub completes or is cancelled, statistics are updated for
reporting in a later btrfs scrub status command and for resuming the
scrub. Most statistics (such as bytes scrubbed) are additive so scrub
adds the statistics from the current run to the saved statistics.

However, the last_physical statistic is not additive. The value from the
current run should replace the saved value. The current code incorrectly
adds the last_physical from the current run to the previous saved value.

This bug causes the resume point to be incorrectly recorded, so large
areas of the disk are skipped when the scrub resumes. As an example,
assume a disk had 1000000 bytes and scrub was cancelled and resumed each
time 10% (100000 bytes) had been scrubbed.

Run | Start byte | bytes scrubbed | kernel last_physical | saved last_physical
  1 |          0 |         100000 |               100000 |              100000
  2 |     100000 |         100000 |               200000 |              300000
  3 |     300000 |         100000 |               400000 |              700000
  4 |     700000 |         100000 |               800000 |             1500000
  5 |    1500000 |              0 | immediately completes| completed

In this example, only 40% of the disk is actually scrubbed.

This patch changes the saved/displayed last_physical to track the last
reported value from the kernel.

Signed-off-by: Graham R. Cobb <g.btrfs@cobb.uk.net>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Graham R. Cobb 2019-06-18 09:08:25 +01:00 committed by David Sterba
parent 82cb226f51
commit bb1b07848e
1 changed files with 8 additions and 2 deletions

View File

@ -203,6 +203,10 @@ static void print_scrub_summary(struct btrfs_scrub_progress *p, struct scrub_sta
fs_stat->p.name += p->name; \
} while (0)
#define _SCRUB_FS_STAT_COPY(p, name, fs_stat) do { \
fs_stat->p.name = p->name; \
} while (0)
#define _SCRUB_FS_STAT_MIN(ss, name, fs_stat) \
do { \
if (fs_stat->s.name > ss->name) { \
@ -241,7 +245,7 @@ static void add_to_fs_stat(struct btrfs_scrub_progress *p,
_SCRUB_FS_STAT(p, malloc_errors, fs_stat);
_SCRUB_FS_STAT(p, uncorrectable_errors, fs_stat);
_SCRUB_FS_STAT(p, corrected_errors, fs_stat);
_SCRUB_FS_STAT(p, last_physical, fs_stat);
_SCRUB_FS_STAT_COPY(p, last_physical, fs_stat);
_SCRUB_FS_STAT_ZMIN(ss, t_start, fs_stat);
_SCRUB_FS_STAT_ZMIN(ss, t_resumed, fs_stat);
_SCRUB_FS_STAT_ZMAX(ss, duration, fs_stat);
@ -711,6 +715,8 @@ static int scrub_writev(int fd, char *buf, int max, const char *fmt, ...)
#define _SCRUB_SUM(dest, data, name) dest->scrub_args.progress.name = \
data->resumed->p.name + data->scrub_args.progress.name
#define _SCRUB_COPY(dest, data, name) dest->scrub_args.progress.name = \
data->scrub_args.progress.name
static struct scrub_progress *scrub_resumed_stats(struct scrub_progress *data,
struct scrub_progress *dest)
@ -731,7 +737,7 @@ static struct scrub_progress *scrub_resumed_stats(struct scrub_progress *data,
_SCRUB_SUM(dest, data, malloc_errors);
_SCRUB_SUM(dest, data, uncorrectable_errors);
_SCRUB_SUM(dest, data, corrected_errors);
_SCRUB_SUM(dest, data, last_physical);
_SCRUB_COPY(dest, data, last_physical);
dest->stats.canceled = data->stats.canceled;
dest->stats.finished = data->stats.finished;
dest->stats.t_resumed = data->stats.t_start;