Sun, 07 Jun 2020 04:13:13 -0700
Fix 42f56b7a4a7d ("ext4: mballoc: introduce pcpu seqcnt for freeing PA
to improve ENOSPC handling") by 1) removing discard_pa_seq 2) then
adding s_mb_pa_seq in ext4 sbi, with the net result of making
discard_pa_seq non percpu but superblock specific.
The new counter is a copy of the seqcount in <linux/seqlock.h>.
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1501,6 +1501,7 @@ struct ext4_sb_info {
atomic_t s_mb_preallocated;
atomic_t s_mb_discarded;
atomic_t s_lock_busy;
+ unsigned int s_mb_pa_seq;
/* locality groups */
struct ext4_locality_group __percpu *s_locality_groups;
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -351,35 +351,6 @@ static void ext4_mb_generate_from_freeli
ext4_group_t group);
static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
-/*
- * The algorithm using this percpu seq counter goes below:
- * 1. We sample the percpu discard_pa_seq counter before trying for block
- * allocation in ext4_mb_new_blocks().
- * 2. We increment this percpu discard_pa_seq counter when we either allocate
- * or free these blocks i.e. while marking those blocks as used/free in
- * mb_mark_used()/mb_free_blocks().
- * 3. We also increment this percpu seq counter when we successfully identify
- * that the bb_prealloc_list is not empty and hence proceed for discarding
- * of those PAs inside ext4_mb_discard_group_preallocations().
- *
- * Now to make sure that the regular fast path of block allocation is not
- * affected, as a small optimization we only sample the percpu seq counter
- * on that cpu. Only when the block allocation fails and when freed blocks
- * found were 0, that is when we sample percpu seq counter for all cpus using
- * below function ext4_get_discard_pa_seq_sum(). This happens after making
- * sure that all the PAs on grp->bb_prealloc_list got freed or if it's empty.
- */
-static DEFINE_PER_CPU(u64, discard_pa_seq);
-static inline u64 ext4_get_discard_pa_seq_sum(void)
-{
- int __cpu;
- u64 __seq = 0;
-
- for_each_possible_cpu(__cpu)
- __seq += per_cpu(discard_pa_seq, __cpu);
- return __seq;
-}
-
static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
{
#if BITS_PER_LONG == 64
@@ -1471,6 +1442,13 @@ static void mb_buddy_mark_free(struct ex
}
}
+static inline void ext4_mb_update_pa_seq(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ sbi->s_mb_pa_seq++;
+ smp_mb();
+}
+
static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
int first, int count)
{
@@ -1491,7 +1469,7 @@ static void mb_free_blocks(struct inode
mb_check_buddy(e4b);
mb_free_blocks_double(inode, e4b, first, count);
- this_cpu_inc(discard_pa_seq);
+ ext4_mb_update_pa_seq(sb);
e4b->bd_info->bb_free += count;
if (first < e4b->bd_info->bb_first_free)
e4b->bd_info->bb_first_free = first;
@@ -1633,7 +1611,6 @@ static int mb_mark_used(struct ext4_budd
mb_check_buddy(e4b);
mb_mark_used_double(e4b, start, len);
- this_cpu_inc(discard_pa_seq);
e4b->bd_info->bb_free -= len;
if (e4b->bd_info->bb_first_free == start)
e4b->bd_info->bb_first_free += len;
@@ -1684,6 +1661,7 @@ static int mb_mark_used(struct ext4_budd
e4b->bd_info->bb_counters[ord]++;
}
mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
+ ext4_mb_update_pa_seq(e4b->bd_sb);
ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
mb_check_buddy(e4b);
@@ -4025,7 +4003,6 @@ ext4_mb_discard_group_preallocations(str
INIT_LIST_HEAD(&list);
repeat:
ext4_lock_group(sb, group);
- this_cpu_inc(discard_pa_seq);
list_for_each_entry_safe(pa, tmp,
&grp->bb_prealloc_list, pa_group_list) {
spin_lock(&pa->pa_lock);
@@ -4091,6 +4068,8 @@ out:
out_dbg:
mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n",
free, group, grp->bb_free);
+ if (free)
+ ext4_mb_update_pa_seq(sb);
return free;
}
@@ -4607,30 +4586,50 @@ static int ext4_mb_discard_preallocation
return freed;
}
+static inline bool ext4_mb_retry_pa_seq(struct super_block *sb, unsigned int *__seq)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ unsigned int seq;
+ bool retry;
+
+ smp_mb();
+ seq = sbi->s_mb_pa_seq;
+ retry = seq != *__seq;
+ *__seq = seq;
+ return retry;
+}
+
static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb,
- struct ext4_allocation_context *ac, u64 *seq)
+ struct ext4_allocation_context *ac, unsigned int *seq)
{
int freed;
- u64 seq_retry = 0;
bool ret = false;
+ bool retry = ext4_mb_retry_pa_seq(sb, seq);
freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
if (freed) {
ret = true;
goto out_dbg;
}
- seq_retry = ext4_get_discard_pa_seq_sum();
- if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_retry != *seq) {
+ if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK)) {
ac->ac_flags |= EXT4_MB_STRICT_CHECK;
- *seq = seq_retry;
ret = true;
- }
+ } else if (retry)
+ ret = true;
out_dbg:
mb_debug(sb, "freed %d, retry ? %s\n", freed, ret ? "yes" : "no");
return ret;
}
+static inline unsigned int ext4_mb_sample_pa_seq(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ unsigned int seq = sbi->s_mb_pa_seq;
+ smp_mb();
+ return seq;
+}
+
/*
* Main entry point into mballoc to allocate blocks
* it tries to use preallocation first, then falls back
@@ -4645,7 +4644,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
ext4_fsblk_t block = 0;
unsigned int inquota = 0;
unsigned int reserv_clstrs = 0;
- u64 seq;
+ unsigned int seq;
might_sleep();
sb = ar->inode->i_sb;
@@ -4707,8 +4706,8 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
goto out;
}
+ seq = ext4_mb_sample_pa_seq(sb);
ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
- seq = *this_cpu_ptr(&discard_pa_seq);
if (!ext4_mb_use_preallocated(ac)) {
ac->ac_op = EXT4_MB_HISTORY_ALLOC;
ext4_mb_normalize_request(ac, ar);