[PATCH -mm] swsusp: Freeze filesystems during suspend

Rafael J. Wysocki

unread,

Nov 1, 2006, 6:02:37 AM11/1/06

to Andrew Morton

Freeze all filesystems during the suspend by calling freeze_bdev() for each of
them and thaw them during the resume using thaw_bdev().

This is needed by swsusp, because some filesystems (eg. XFS) use work queues
and worker_threads run with PF_NOFREEZE set, so they can cause some writes
to be performed after the suspend image has been created which may corrupt
the filesystem. The additional benefit of it is that if the resume fails, the
filesystems will be in a consistent state and there won't be any journal replays
needed.

The freezing of filesystems is carried out when processes are being frozen, so
on the majority of architectures it also will happen during a suspend to RAM.

Signed-off-by: Nigel Cunningham <ni...@suspend2.net>
Signed-off-by: Rafael J. Wysocki <r...@sisk.pl>
---
fs/buffer.c | 44 ++++++++++++++++++++++++++++++++++++++
include/linux/buffer_head.h | 2 +
include/linux/fs.h | 1
kernel/power/process.c | 50 +++++++++++++++++++++++++++++---------------
4 files changed, 80 insertions(+), 17 deletions(-)

Index: linux-2.6.19-rc4-mm1/kernel/power/process.c
===================================================================
--- linux-2.6.19-rc4-mm1.orig/kernel/power/process.c 2006-10-31 22:40:40.000000000 +0100
+++ linux-2.6.19-rc4-mm1/kernel/power/process.c 2006-10-31 22:41:03.000000000 +0100
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/freezer.h>
+#include <linux/buffer_head.h>

/*
* Timeout for stopping processes
@@ -119,7 +120,7 @@ int freeze_processes(void)
read_unlock(&tasklist_lock);
todo += nr_user;
if (!user_frozen && !nr_user) {
- sys_sync();
+ freeze_filesystems();
start_time = jiffies;
}
user_frozen = !nr_user;
@@ -156,28 +157,43 @@ int freeze_processes(void)
void thaw_some_processes(int all)
{
struct task_struct *g, *p;
- int pass = 0; /* Pass 0 = Kernel space, 1 = Userspace */

printk("Restarting tasks... ");
read_lock(&tasklist_lock);
- do {
- do_each_thread(g, p) {
- /*
- * is_user = 0 if kernel thread or borrowed mm,
- * 1 otherwise.
- */
- int is_user = !!(p->mm && !(p->flags & PF_BORROWED_MM));
- if (!freezeable(p) || (is_user != pass))
- continue;
- if (!thaw_process(p))
- printk(KERN_INFO
- "Strange, %s not stopped\n", p->comm);
- } while_each_thread(g, p);

- pass++;
- } while (pass < 2 && all);
+ do_each_thread(g, p) {
+ if (!freezeable(p))
+ continue;
+
+ /* Don't thaw userland processes, for now */
+ if (p->mm && !(p->flags & PF_BORROWED_MM))
+ continue;
+
+ if (!thaw_process(p))
+ printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
+ } while_each_thread(g, p);
+
+ read_unlock(&tasklist_lock);
+ if (!all)
+ goto Exit;
+
+ thaw_filesystems();
+ read_lock(&tasklist_lock);
+
+ do_each_thread(g, p) {
+ if (!freezeable(p))
+ continue;
+
+ /* Kernel threads should have been thawed already */
+ if (!p->mm || (p->flags & PF_BORROWED_MM))
+ continue;
+
+ if (!thaw_process(p))
+ printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
+ } while_each_thread(g, p);

read_unlock(&tasklist_lock);
+Exit:
schedule();
printk("done.\n");
}
Index: linux-2.6.19-rc4-mm1/include/linux/buffer_head.h
===================================================================
--- linux-2.6.19-rc4-mm1.orig/include/linux/buffer_head.h 2006-10-31 22:40:36.000000000 +0100
+++ linux-2.6.19-rc4-mm1/include/linux/buffer_head.h 2006-10-31 22:41:03.000000000 +0100
@@ -170,6 +170,8 @@ wait_queue_head_t *bh_waitq_head(struct
int fsync_bdev(struct block_device *);
struct super_block *freeze_bdev(struct block_device *);
void thaw_bdev(struct block_device *, struct super_block *);
+void freeze_filesystems(void);
+void thaw_filesystems(void);
int fsync_super(struct super_block *);
int fsync_no_super(struct block_device *);
struct buffer_head *__find_get_block(struct block_device *, sector_t, int);
Index: linux-2.6.19-rc4-mm1/include/linux/fs.h
===================================================================
--- linux-2.6.19-rc4-mm1.orig/include/linux/fs.h 2006-10-31 22:40:36.000000000 +0100
+++ linux-2.6.19-rc4-mm1/include/linux/fs.h 2006-10-31 22:41:03.000000000 +0100
@@ -120,6 +120,7 @@ extern int dir_notify_enable;
#define MS_PRIVATE (1<<18) /* change to private */
#define MS_SLAVE (1<<19) /* change to slave */
#define MS_SHARED (1<<20) /* change to shared */
+#define MS_FROZEN (1<<21) /* Frozen by freeze_filesystems() */
#define MS_ACTIVE (1<<30)
#define MS_NOUSER (1<<31)

Index: linux-2.6.19-rc4-mm1/fs/buffer.c
===================================================================
--- linux-2.6.19-rc4-mm1.orig/fs/buffer.c 2006-10-31 22:40:35.000000000 +0100
+++ linux-2.6.19-rc4-mm1/fs/buffer.c 2006-10-31 23:20:09.000000000 +0100
@@ -244,6 +244,50 @@ void thaw_bdev(struct block_device *bdev
}
EXPORT_SYMBOL(thaw_bdev);

+/**
+ * freeze_filesystems - lock all filesystems and force them into a consistent
+ * state
+ */
+void freeze_filesystems(void)
+{
+ struct super_block *sb;
+
+ lockdep_off();
+ /*
+ * Freeze in reverse order so filesystems dependant upon others are
+ * frozen in the right order (eg. loopback on ext3).
+ */
+ list_for_each_entry_reverse(sb, &super_blocks, s_list) {
+ if (!sb->s_root || !sb->s_bdev ||
+ (sb->s_frozen == SB_FREEZE_TRANS) ||
+ (sb->s_flags & MS_RDONLY) ||
+ (sb->s_flags & MS_FROZEN))
+ continue;
+
+ freeze_bdev(sb->s_bdev);
+ sb->s_flags |= MS_FROZEN;
+ }
+ lockdep_on();
+}
+
+/**
+ * thaw_filesystems - unlock all filesystems
+ */
+void thaw_filesystems(void)
+{
+ struct super_block *sb;
+
+ lockdep_off();
+
+ list_for_each_entry(sb, &super_blocks, s_list)
+ if (sb->s_flags & MS_FROZEN) {
+ sb->s_flags &= ~MS_FROZEN;
+ thaw_bdev(sb->s_bdev, sb);
+ }
+
+ lockdep_on();
+}
+
/*
* Various filesystems appear to want __find_get_block to be non-blocking.
* But it's the page lock which protects the buffers. To get around this,
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Pavel Machek

unread,

Nov 1, 2006, 6:47:48 AM11/1/06

to Rafael J. Wysocki

Hi!

> Freeze all filesystems during the suspend by calling freeze_bdev() for each of
> them and thaw them during the resume using thaw_bdev().
>
> This is needed by swsusp, because some filesystems (eg. XFS) use work queues
> and worker_threads run with PF_NOFREEZE set, so they can cause some writes
> to be performed after the suspend image has been created which may corrupt
> the filesystem. The additional benefit of it is that if the resume fails, the
> filesystems will be in a consistent state and there won't be any journal replays
> needed.
>
> The freezing of filesystems is carried out when processes are being frozen, so
> on the majority of architectures it also will happen during a
> suspend to RAM.

> @@ -119,7 +120,7 @@ int freeze_processes(void)
> read_unlock(&tasklist_lock);
> todo += nr_user;
> if (!user_frozen && !nr_user) {
> - sys_sync();
> + freeze_filesystems();
> start_time = jiffies;
> }
> user_frozen = !nr_user;

Do all filesystems implement freeze? If not, we may want to keep that
sync...

Could we do without the code duplication?

>
> +/**
> + * freeze_filesystems - lock all filesystems and force them into a consistent
> + * state
> + */
> +void freeze_filesystems(void)
> +{
> + struct super_block *sb;
> +
> + lockdep_off();

You should not just turn off lockdep because you don't like its
output.

Perhaps tasklist_lock does not nest with whatever freeze_bdev needs?

Pavel
--
Thanks, Sharp!

Rafael J. Wysocki

unread,

Nov 1, 2006, 7:08:59 AM11/1/06

to Pavel Machek

Hi,

On Wednesday, 1 November 2006 12:47, Pavel Machek wrote:
> Hi!
>
> > Freeze all filesystems during the suspend by calling freeze_bdev() for each of
> > them and thaw them during the resume using thaw_bdev().
> >
> > This is needed by swsusp, because some filesystems (eg. XFS) use work queues
> > and worker_threads run with PF_NOFREEZE set, so they can cause some writes
> > to be performed after the suspend image has been created which may corrupt
> > the filesystem. The additional benefit of it is that if the resume fails, the
> > filesystems will be in a consistent state and there won't be any journal replays
> > needed.
> >
> > The freezing of filesystems is carried out when processes are being frozen, so
> > on the majority of architectures it also will happen during a
> > suspend to RAM.
>
>
> > @@ -119,7 +120,7 @@ int freeze_processes(void)
> > read_unlock(&tasklist_lock);
> > todo += nr_user;
> > if (!user_frozen && !nr_user) {
> > - sys_sync();
> > + freeze_filesystems();
> > start_time = jiffies;
> > }
> > user_frozen = !nr_user;
>
>
> Do all filesystems implement freeze?

I think so.

> If not, we may want to keep that sync...

But the sync() won't hurt anyway I think.

Okay, I'll move the loop(s) into a separate function.

> > +/**
> > + * freeze_filesystems - lock all filesystems and force them into a consistent
> > + * state
> > + */
> > +void freeze_filesystems(void)
> > +{
> > + struct super_block *sb;
> > +
> > + lockdep_off();
>
> You should not just turn off lockdep because you don't like its
> output.
>
> Perhaps tasklist_lock does not nest with whatever freeze_bdev needs?

The locks taken in one call to freeze_bdev() nest with analogous locks
taken in the other calls to freeze_bdev(). Actually we take several locks of
the same (I think) class in a row and keep them all until thaw_filesystems()
is called, which is quite unusual. I don't think there's any way in which we
can convince lockdep that it's all okay other than switching it off.

Greetings,
Rafael

--
You never change things by fighting the existing reality.
R. Buckminster Fuller

Rafael J. Wysocki

unread,

Nov 1, 2006, 12:55:11 PM11/1/06

to Pavel Machek

Freeze all filesystems during the suspend by calling freeze_bdev() for each of
them and thaw them during the resume using thaw_bdev().

This is needed by swsusp, because some filesystems (eg. XFS) use work queues

and worker_threads run with PF_NOFREEZE set, so if these filesystems are not
frozen, they can cause some writes to be performed after the suspend image has
been created which may corrupt lead to a filesystem corruption.

The additional benefit of it is that if the resume fails, the filesystems will
be in a consistent state and there won't be any journal replays needed.

The freezing of filesystems is carried out when processes are being frozen, so
on the majority of architectures it also will happen during a suspend to RAM.

Signed-off-by: Nigel Cunningham <ni...@suspend2.net>

Signed-off-by: Rafael J. Wysocki <r...@sisk.pl>
---
fs/buffer.c | 44 +++++++++++++++++++++++++++++++++++

include/linux/buffer_head.h | 2 +
include/linux/freezer.h | 7 +----
include/linux/fs.h | 1
kernel/power/process.c | 54 +++++++++++++++++++++++++++++---------------
5 files changed, 85 insertions(+), 23 deletions(-)

Index: linux-2.6.19-rc4-mm1/kernel/power/process.c
===================================================================
--- linux-2.6.19-rc4-mm1.orig/kernel/power/process.c 2006-10-31 22:40:40.000000000 +0100

+++ linux-2.6.19-rc4-mm1/kernel/power/process.c 2006-11-01 17:53:18.000000000 +0100

@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/freezer.h>
+#include <linux/buffer_head.h>

/*
* Timeout for stopping processes

@@ -120,6 +121,7 @@ int freeze_processes(void)

todo += nr_user;
if (!user_frozen && !nr_user) {

sys_sync();
+ freeze_filesystems();
start_time = jiffies;
}
user_frozen = !nr_user;

@@ -153,31 +155,47 @@ int freeze_processes(void)
return 0;
}

-void thaw_some_processes(int all)
+#define FREEZER_KERNEL_THREADS 0
+#define FREEZER_USER_SPACE 1
+
+static void __thaw_tasks(int user_space)

{
struct task_struct *g, *p;
- int pass = 0; /* Pass 0 = Kernel space, 1 = Userspace */

- printk("Restarting tasks... ");

read_lock(&tasklist_lock);
- do {
- do_each_thread(g, p) {
- /*
- * is_user = 0 if kernel thread or borrowed mm,
- * 1 otherwise.
- */
- int is_user = !!(p->mm && !(p->flags & PF_BORROWED_MM));
- if (!freezeable(p) || (is_user != pass))

+ do_each_thread(g, p) {
+ if (!freezeable(p))
+ continue;
+

+ if (p->mm && !(p->flags & PF_BORROWED_MM)) {

+ /* It's a user space process */
+ if (!user_space)

continue;
- if (!thaw_process(p))
- printk(KERN_INFO
- "Strange, %s not stopped\n", p->comm);
- } while_each_thread(g, p);

+ } else {
+ /* It's a kernel thread */
+ if (user_space)

+ continue;
+ }
+ if (!thaw_process(p))
+ printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
+ } while_each_thread(g, p);

+ read_unlock(&tasklist_lock);
+}

- pass++;
- } while (pass < 2 && all);

+void thaw_processes(void)
+{
+ printk("Restarting tasks ... ");
+ __thaw_tasks(FREEZER_KERNEL_THREADS);
+ thaw_filesystems();
+ __thaw_tasks(FREEZER_USER_SPACE);
+ schedule();
+ printk("done.\n");
+}

- read_unlock(&tasklist_lock);
+void thaw_kernel_threads(void)
+{
+ printk("Restarting kernel threads ... ");
+ __thaw_tasks(FREEZER_KERNEL_THREADS);

schedule();
printk("done.\n");
}

+/**
+ * freeze_filesystems - lock all filesystems and force them into a consistent
+ * state
+ */
+void freeze_filesystems(void)
+{
+ struct super_block *sb;
+
+ lockdep_off();

+ /*
+ * Freeze in reverse order so filesystems dependant upon others are
+ * frozen in the right order (eg. loopback on ext3).
+ */
+ list_for_each_entry_reverse(sb, &super_blocks, s_list) {
+ if (!sb->s_root || !sb->s_bdev ||
+ (sb->s_frozen == SB_FREEZE_TRANS) ||
+ (sb->s_flags & MS_RDONLY) ||
+ (sb->s_flags & MS_FROZEN))
+ continue;
+
+ freeze_bdev(sb->s_bdev);
+ sb->s_flags |= MS_FROZEN;
+ }
+ lockdep_on();
+}
+
+/**
+ * thaw_filesystems - unlock all filesystems
+ */
+void thaw_filesystems(void)

+{
+ struct super_block *sb;
+
+ lockdep_off();

+
+ list_for_each_entry(sb, &super_blocks, s_list)
+ if (sb->s_flags & MS_FROZEN) {
+ sb->s_flags &= ~MS_FROZEN;
+ thaw_bdev(sb->s_bdev, sb);
+ }
+
+ lockdep_on();
+}
+
/*
* Various filesystems appear to want __find_get_block to be non-blocking.
* But it's the page lock which protects the buffers. To get around this,

Index: linux-2.6.19-rc4-mm1/include/linux/freezer.h
===================================================================
--- linux-2.6.19-rc4-mm1.orig/include/linux/freezer.h 2006-11-01 17:47:50.000000000 +0100
+++ linux-2.6.19-rc4-mm1/include/linux/freezer.h 2006-11-01 17:55:00.000000000 +0100
@@ -1,8 +1,5 @@
/* Freezer declarations */

-#define FREEZER_KERNEL_THREADS 0
-#define FREEZER_ALL_THREADS 1
-
#ifdef CONFIG_PM
/*
* Check if a process has been frozen
@@ -60,8 +57,8 @@ static inline void frozen_process(struct

extern void refrigerator(void);
extern int freeze_processes(void);
-#define thaw_processes() do { thaw_some_processes(FREEZER_ALL_THREADS); } while(0)
-#define thaw_kernel_threads() do { thaw_some_processes(FREEZER_KERNEL_THREADS); } while(0)
+extern void thaw_processes(void);
+extern void thaw_kernel_threads(void);

static inline int try_to_freeze(void)
{

Andrew Morton

unread,

Nov 1, 2006, 2:46:34 PM11/1/06

to Rafael J. Wysocki

On Wed, 1 Nov 2006 18:53:07 +0100
"Rafael J. Wysocki" <r...@sisk.pl> wrote:

> +void thaw_processes(void)
> +{
> + printk("Restarting tasks ... ");
> + __thaw_tasks(FREEZER_KERNEL_THREADS);
> + thaw_filesystems();
> + __thaw_tasks(FREEZER_USER_SPACE);
> + schedule();
> + printk("done.\n");
> +}
>
> - read_unlock(&tasklist_lock);
> +void thaw_kernel_threads(void)
> +{
> + printk("Restarting kernel threads ... ");
> + __thaw_tasks(FREEZER_KERNEL_THREADS);
> schedule();
> printk("done.\n");
> }

what do these random-looking schedule()s do??

Andrew Morton

unread,

Nov 1, 2006, 2:55:39 PM11/1/06

to Rafael J. Wysocki

On Wed, 1 Nov 2006 18:53:07 +0100
"Rafael J. Wysocki" <r...@sisk.pl> wrote:

argh.

The uncommented, unchangelogged lockdep_off() calls are completely
mysterious right now, even before the patch is merged. They will not
become less mysterious over time.

Please, take pity upon the readers of your code. Add a comment.

Rafael J. Wysocki

unread,

Nov 1, 2006, 3:32:21 PM11/1/06

to Andrew Morton

Of course. Sorry.

> Please, take pity upon the readers of your code. Add a comment.

OK (on top of the previous patch)

Signed-off-by: Rafael J. Wysocki <r...@sisk.pl>
---

fs/buffer.c | 9 +++++++++
1 file changed, 9 insertions(+)

Index: linux-2.6.19-rc4-mm1/fs/buffer.c
===================================================================
--- linux-2.6.19-rc4-mm1.orig/fs/buffer.c

+++ linux-2.6.19-rc4-mm1/fs/buffer.c
@@ -252,6 +252,11 @@ void freeze_filesystems(void)
{
struct super_block *sb;

+ /*
+ * We are going to take several locks of the same class in a row
+ * without releasing them until thaw_filesystems() is called and
+ * lockdep won't know this is all OK.
+ */
lockdep_off();
/*

* Freeze in reverse order so filesystems dependant upon others are

@@ -277,6 +282,10 @@ void thaw_filesystems(void)
{
struct super_block *sb;

+ /*
+ * We are going to release several locks of the same class in a row
+ * and lockdep would complain about it, unnecessarily.
+ */
lockdep_off();

list_for_each_entry(sb, &super_blocks, s_list)

Rafael J. Wysocki

unread,

Nov 1, 2006, 3:32:45 PM11/1/06

to Andrew Morton

On Wednesday, 1 November 2006 20:45, Andrew Morton wrote:
> On Wed, 1 Nov 2006 18:53:07 +0100
> "Rafael J. Wysocki" <r...@sisk.pl> wrote:
>
> > +void thaw_processes(void)
> > +{
> > + printk("Restarting tasks ... ");
> > + __thaw_tasks(FREEZER_KERNEL_THREADS);
> > + thaw_filesystems();
> > + __thaw_tasks(FREEZER_USER_SPACE);
> > + schedule();
> > + printk("done.\n");
> > +}
> >
> > - read_unlock(&tasklist_lock);
> > +void thaw_kernel_threads(void)
> > +{
> > + printk("Restarting kernel threads ... ");
> > + __thaw_tasks(FREEZER_KERNEL_THREADS);
> > schedule();
> > printk("done.\n");
> > }
>
> what do these random-looking schedule()s do??

My understanding is that they allow the thawed tasks to actually exit
the refrigerator, because __thaw_tasks() only changes their states.

Andrew Morton

unread,

Nov 1, 2006, 4:21:57 PM11/1/06

to Rafael J. Wysocki

On Wed, 1 Nov 2006 21:27:17 +0100

"Rafael J. Wysocki" <r...@sisk.pl> wrote:

> On Wednesday, 1 November 2006 20:45, Andrew Morton wrote:
> > On Wed, 1 Nov 2006 18:53:07 +0100
> > "Rafael J. Wysocki" <r...@sisk.pl> wrote:
> >
> > > +void thaw_processes(void)
> > > +{
> > > + printk("Restarting tasks ... ");
> > > + __thaw_tasks(FREEZER_KERNEL_THREADS);
> > > + thaw_filesystems();
> > > + __thaw_tasks(FREEZER_USER_SPACE);
> > > + schedule();
> > > + printk("done.\n");
> > > +}
> > >
> > > - read_unlock(&tasklist_lock);
> > > +void thaw_kernel_threads(void)
> > > +{
> > > + printk("Restarting kernel threads ... ");
> > > + __thaw_tasks(FREEZER_KERNEL_THREADS);
> > > schedule();
> > > printk("done.\n");
> > > }
> >
> > what do these random-looking schedule()s do??
>
> My understanding is that they allow the thawed tasks to actually exit
> the refrigerator, because __thaw_tasks() only changes their states.

I'd be surprised if this is doing what we thing it's doing. Calling
schedule() in state TASK_RUNNING is usually a no-op. It'll only actually
switch to another task if the scheduler decides that this task has expired
its timeslice, or another higher-priority task has become runnable, etc.

Rafael J. Wysocki

unread,

Nov 2, 2006, 3:56:18 PM11/2/06

to Andrew Morton

This actually can happen, it seems, because __thaw_tasks() calls
wake_up_process() for each frozen task which may call resched_task() for
current.