Tue, 02 Jun 2020 04:20:16 -0700
Fix 42f56b7a4a7d ("ext4: mballoc: introduce pcpu seqcnt for freeing PA
to improve ENOSPC handling") by redefining discard_pa_seq to be a simple
regular sequence counter to axe the need of percpu operation.
Memory barrier OTOH is added around the counter to mimic the seqcount
in linux/seqlock.h
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -352,32 +352,35 @@ static void ext4_mb_generate_from_freeli
static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
/*
- * The algorithm using this percpu seq counter goes below:
- * 1. We sample the percpu discard_pa_seq counter before trying for block
- * allocation in ext4_mb_new_blocks().
- * 2. We increment this percpu discard_pa_seq counter when we either allocate
- * or free these blocks i.e. while marking those blocks as used/free in
+ * Here a simple sequence counter is used
+ * 1. We sample the discard_pa_seq counter before trying for block allocation
+ * in ext4_mb_new_blocks().
+ * 2. We increment the counter when we either allocate or free these blocks
+ * i.e. while marking those blocks as used/free in
* mb_mark_used()/mb_free_blocks().
- * 3. We also increment this percpu seq counter when we successfully identify
- * that the bb_prealloc_list is not empty and hence proceed for discarding
- * of those PAs inside ext4_mb_discard_group_preallocations().
+ * 3. We also increment it when we successfully identify that the
+ * bb_prealloc_list is not empty and hence proceed for discarding of those
+ * PAs inside ext4_mb_discard_group_preallocations().
*
* Now to make sure that the regular fast path of block allocation is not
- * affected, as a small optimization we only sample the percpu seq counter
- * on that cpu. Only when the block allocation fails and when freed blocks
- * found were 0, that is when we sample percpu seq counter for all cpus using
- * below function ext4_get_discard_pa_seq_sum(). This happens after making
- * sure that all the PAs on grp->bb_prealloc_list got freed or if it's empty.
+ * affected, as a small optimization we only sample the seq counter on that
+ * cpu. Only when the block allocation fails and when freed blocks found were
+ * 0, that is when we sample percpu seq counter for all cpus using below
+ * function ext4_sample_discard_pa_seq(). This happens after making sure that
+ * all the PAs on grp->bb_prealloc_list got freed or if it's empty.
*/
-static DEFINE_PER_CPU(u64, discard_pa_seq);
-static inline u64 ext4_get_discard_pa_seq_sum(void)
+static unsigned int discard_pa_seq;
+
+static inline unsigned int ext4_sample_discard_pa_seq(void)
{
- int __cpu;
- u64 __seq = 0;
+ smp_mb();
+ return discard_pa_seq;
+}
- for_each_possible_cpu(__cpu)
- __seq += per_cpu(discard_pa_seq, __cpu);
- return __seq;
+static inline void ext4_inc_discard_pa_seq(void)
+{
+ discard_pa_seq++;
+ smp_mb();
}
static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
@@ -1491,7 +1494,7 @@ static void mb_free_blocks(struct inode
mb_check_buddy(e4b);
mb_free_blocks_double(inode, e4b, first, count);
- this_cpu_inc(discard_pa_seq);
+ ext4_inc_discard_pa_seq();
e4b->bd_info->bb_free += count;
if (first < e4b->bd_info->bb_first_free)
e4b->bd_info->bb_first_free = first;
@@ -1633,7 +1636,7 @@ static int mb_mark_used(struct ext4_budd
mb_check_buddy(e4b);
mb_mark_used_double(e4b, start, len);
- this_cpu_inc(discard_pa_seq);
+ ext4_inc_discard_pa_seq();
e4b->bd_info->bb_free -= len;
if (e4b->bd_info->bb_first_free == start)
e4b->bd_info->bb_first_free += len;
@@ -4025,7 +4028,7 @@ ext4_mb_discard_group_preallocations(str
INIT_LIST_HEAD(&list);
repeat:
ext4_lock_group(sb, group);
- this_cpu_inc(discard_pa_seq);
+ ext4_inc_discard_pa_seq();
list_for_each_entry_safe(pa, tmp,
&grp->bb_prealloc_list, pa_group_list) {
spin_lock(&pa->pa_lock);
@@ -4608,21 +4611,21 @@ static int ext4_mb_discard_preallocation
}
static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb,
- struct ext4_allocation_context *ac, u64 *seq)
+ struct ext4_allocation_context *ac, unsigned int *seq)
{
int freed;
- u64 seq_retry = 0;
bool ret = false;
+ unsigned int seq_sample;
freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
if (freed) {
ret = true;
goto out_dbg;
}
- seq_retry = ext4_get_discard_pa_seq_sum();
- if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_retry != *seq) {
+ seq_sample = ext4_sample_discard_pa_seq();
+ if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_sample != *seq) {
ac->ac_flags |= EXT4_MB_STRICT_CHECK;
- *seq = seq_retry;
+ *seq = seq_sample;
ret = true;
}
@@ -4645,7 +4648,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
ext4_fsblk_t block = 0;
unsigned int inquota = 0;
unsigned int reserv_clstrs = 0;
- u64 seq;
+ unsigned int seq;
might_sleep();
sb = ar->inode->i_sb;
@@ -4708,7 +4711,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
}
ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
- seq = *this_cpu_ptr(&discard_pa_seq);
+ seq = ext4_sample_discard_pa_seq();
if (!ext4_mb_use_preallocated(ac)) {
ac->ac_op = EXT4_MB_HISTORY_ALLOC;
ext4_mb_normalize_request(ac, ar);
--