Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[RFC] ata port runtime pm

3 views
Skip to first unread message

Lin Ming

unread,
Oct 27, 2011, 11:30:01 PM10/27/11
to
Hi, all

I send this out early to get feedback how to do ata port runtime pm.

There was some discussion early this year trying to add runtime pm
support to sata_mv controller driver.
http://marc.info/?l=linux-ide&m=129403126729115&w=2

Here I focus on ata port runtime pm, not controller.

In below conceptual patch, I did

0. Set autosuspend delay to 3 minutes for ata port
1. Split a new function ata_port_request_pm from ata_host_request_pm
2. Add device_type "ata_port_type" which implements callbacks for
runtime pm core
3. Resume port in ata_scsi_queuecmd if needed
4. Request auto suspend in ata_scsi_queuecmd

CAUTION: this patch DOES NOT work at all.
I just threw it out for discussion.

Any idea?
Thanks.

Lin Ming
---
drivers/ata/libata-core.c | 126 +++++++++++++++++++++++++++++++++------------
drivers/ata/libata-scsi.c | 6 ++
2 files changed, 99 insertions(+), 33 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 4a3a5ae..e0c1a15 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -66,6 +66,7 @@
#include <asm/byteorder.h>
#include <linux/cdrom.h>
#include <linux/ratelimit.h>
+#include <linux/pm_runtime.h>

#include "libata.h"
#include "libata-transport.h"
@@ -5234,51 +5235,62 @@ bool ata_link_offline(struct ata_link *link)
}

#ifdef CONFIG_PM
-static int ata_host_request_pm(struct ata_host *host, pm_message_t mesg,
+static int ata_port_request_pm(struct ata_port *ap, pm_message_t mesg,
unsigned int action, unsigned int ehi_flags,
int wait)
{
+ struct ata_link *link;
unsigned long flags;
- int i, rc;
+ int rc;

- for (i = 0; i < host->n_ports; i++) {
- struct ata_port *ap = host->ports[i];
- struct ata_link *link;
+ /* Previous resume operation might still be in
+ * progress. Wait for PM_PENDING to clear.
+ */
+ if (ap->pflags & ATA_PFLAG_PM_PENDING) {
+ ata_port_wait_eh(ap);
+ WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING);
+ }

- /* Previous resume operation might still be in
- * progress. Wait for PM_PENDING to clear.
- */
- if (ap->pflags & ATA_PFLAG_PM_PENDING) {
- ata_port_wait_eh(ap);
- WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING);
- }
+ /* request PM ops to EH */
+ spin_lock_irqsave(ap->lock, flags);

- /* request PM ops to EH */
- spin_lock_irqsave(ap->lock, flags);
+ ap->pm_mesg = mesg;
+ if (wait) {
+ rc = 0;
+ ap->pm_result = &rc;
+ }

- ap->pm_mesg = mesg;
- if (wait) {
- rc = 0;
- ap->pm_result = &rc;
- }
+ ap->pflags |= ATA_PFLAG_PM_PENDING;
+ ata_for_each_link(link, ap, HOST_FIRST) {
+ link->eh_info.action |= action;
+ link->eh_info.flags |= ehi_flags;
+ }

- ap->pflags |= ATA_PFLAG_PM_PENDING;
- ata_for_each_link(link, ap, HOST_FIRST) {
- link->eh_info.action |= action;
- link->eh_info.flags |= ehi_flags;
- }
+ ata_port_schedule_eh(ap);

- ata_port_schedule_eh(ap);
+ spin_unlock_irqrestore(ap->lock, flags);

- spin_unlock_irqrestore(ap->lock, flags);
+ /* wait and check result */
+ if (wait) {
+ ata_port_wait_eh(ap);
+ WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING);
+ }

- /* wait and check result */
- if (wait) {
- ata_port_wait_eh(ap);
- WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING);
- if (rc)
- return rc;
- }
+ return rc;
+}
+
+static int ata_host_request_pm(struct ata_host *host, pm_message_t mesg,
+ unsigned int action, unsigned int ehi_flags,
+ int wait)
+{
+ int i, rc;
+
+ for (i = 0; i < host->n_ports; i++) {
+ struct ata_port *ap = host->ports[i];
+
+ rc = ata_port_request_pm(ap, mesg, action, ehi_flags, wait);
+ if (rc)
+ return rc;
}

return 0;
@@ -5874,6 +5886,45 @@ void ata_host_init(struct ata_host *host, struct device *dev,
host->ops = ops;
}

+#define to_ata_port(d) container_of(d, struct ata_port, tdev)
+
+static int ata_port_runtime_suspend(struct device *dev)
+{
+ struct ata_port *ap = to_ata_port(dev);
+ struct Scsi_Host *shost = ap->scsi_host;
+ int rc;
+
+ /* TODO: sync with hardware access */
+
+ if (shost->host_busy)
+ return -EBUSY;
+
+ rc = ata_port_request_pm(ap, PMSG_SUSPEND, 0, ATA_EHI_QUIET, 1);
+ return rc;
+}
+
+static int ata_port_runtime_resume(struct device *dev)
+{
+ struct ata_port *ap = to_ata_port(dev);
+ int rc;
+
+ rc = ata_port_request_pm(ap, PMSG_ON, ATA_EH_RESET,
+ ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET, 1);
+ return rc;
+}
+
+static const struct dev_pm_ops ata_port_pm_ops = {
+ .runtime_suspend = ata_port_runtime_suspend,
+ .runtime_resume = ata_port_runtime_resume,
+};
+
+static struct device_type ata_port_type = {
+ .name = "ata_port",
+#ifdef CONFIG_PM
+ .pm = &ata_port_pm_ops,
+#endif
+};
+
int ata_port_probe(struct ata_port *ap)
{
int rc = 0;
@@ -5903,6 +5954,15 @@ int ata_port_probe(struct ata_port *ap)
rc = ata_bus_probe(ap);
DPRINTK("ata%u: bus probe end\n", ap->print_id);
}
+
+ ap->tdev.type = &ata_port_type;
+ pm_runtime_set_active(&ap->tdev);
+ pm_runtime_use_autosuspend(&ap->tdev);
+ /* 3 minutes idle to auto suspend */
+ pm_runtime_set_autosuspend_delay(&ap->tdev, 180*1000);
+ pm_runtime_enable(&ap->tdev);
+ pm_request_autosuspend(&ap->tdev);
+
return rc;
}

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 46d087f..88fc7fe 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -48,6 +48,7 @@
#include <linux/hdreg.h>
#include <linux/uaccess.h>
#include <linux/suspend.h>
+#include <linux/pm_runtime.h>
#include <asm/unaligned.h>

#include "libata.h"
@@ -3208,6 +3209,11 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)

ap = ata_shost_to_port(shost);

+ if (pm_runtime_suspended(&ap->tdev))
+ pm_runtime_resume(&ap->tdev);
+ pm_runtime_mark_last_busy(&ap->tdev);
+ pm_request_autosuspend(&ap->tdev);
+
spin_lock_irqsave(ap->lock, irq_flags);

ata_scsi_dump_cdb(ap, cmd);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Lin Ming

unread,
Oct 27, 2011, 11:30:01 PM10/27/11
to

Jeff Garzik

unread,
Oct 27, 2011, 11:40:01 PM10/27/11
to
On 10/27/2011 11:21 PM, Lin Ming wrote:
> @@ -3208,6 +3209,11 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
>
> ap = ata_shost_to_port(shost);
>
> + if (pm_runtime_suspended(&ap->tdev))
> + pm_runtime_resume(&ap->tdev);
> + pm_runtime_mark_last_busy(&ap->tdev);
> + pm_request_autosuspend(&ap->tdev);
> +
> spin_lock_irqsave(ap->lock, irq_flags);
>


Putting this into the core command dispatch fast-path is rather
disappointing. That's at least one additional lock, plus some atomic
instructions and tests.

Jeff

Lin Ming

unread,
Oct 28, 2011, 1:40:01 AM10/28/11
to
On Fri, 2011-10-28 at 11:37 +0800, Jeff Garzik wrote:
> On 10/27/2011 11:21 PM, Lin Ming wrote:
> > @@ -3208,6 +3209,11 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
> >
> > ap = ata_shost_to_port(shost);
> >
> > + if (pm_runtime_suspended(&ap->tdev))
> > + pm_runtime_resume(&ap->tdev);
> > + pm_runtime_mark_last_busy(&ap->tdev);
> > + pm_request_autosuspend(&ap->tdev);
> > +
> > spin_lock_irqsave(ap->lock, irq_flags);
> >
>
>
> Putting this into the core command dispatch fast-path is rather
> disappointing. That's at least one additional lock, plus some atomic
> instructions and tests.

Maybe move suspend request to the resume function, as below.

static int ata_port_runtime_resume(struct device *dev)
{
struct ata_port *ap = to_ata_port(dev);
int rc;

rc = ata_port_request_pm(ap, PMSG_ON, ATA_EH_RESET,
ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET, 1);

pm_runtime_mark_last_busy(dev);
pm_request_autosuspend(dev);

return rc;
}

Then the fast-path looks like,

@@ -3208,6 +3209,9 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost,
struct scsi_cmnd *cmd)

ap = ata_shost_to_port(shost);

+ if (pm_runtime_suspended(&ap->tdev))
+ pm_runtime_resume(&ap->tdev);
+
spin_lock_irqsave(ap->lock, irq_flags);

ata_scsi_dump_cdb(ap, cmd);

What do you think?

Thanks,
Lin Ming

Rafael J. Wysocki

unread,
Oct 28, 2011, 1:30:01 PM10/28/11
to
Please CC PM-related patches to linu...@vger.kernel.org (now added).

Thanks,
Rafael

Alan Stern

unread,
Oct 28, 2011, 3:00:01 PM10/28/11
to
On Fri, 28 Oct 2011, Rafael J. Wysocki wrote:

> On Friday, October 28, 2011, Lin Ming wrote:
> > On Fri, 2011-10-28 at 11:37 +0800, Jeff Garzik wrote:
> > > On 10/27/2011 11:21 PM, Lin Ming wrote:
> > > > @@ -3208,6 +3209,11 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
> > > >
> > > > ap = ata_shost_to_port(shost);
> > > >
> > > > + if (pm_runtime_suspended(&ap->tdev))
> > > > + pm_runtime_resume(&ap->tdev);
> > > > + pm_runtime_mark_last_busy(&ap->tdev);
> > > > + pm_request_autosuspend(&ap->tdev);
> > > > +
> > > > spin_lock_irqsave(ap->lock, irq_flags);
> > > >
> > >
> > >
> > > Putting this into the core command dispatch fast-path is rather
> > > disappointing. That's at least one additional lock, plus some atomic
> > > instructions and tests.

And it calls pm_runtime_resume(), which requires process context, from
within a SCSI queuecmd routine, which runs in interrupt context.

> > Maybe move suspend request to the resume function, as below.
> >
> > static int ata_port_runtime_resume(struct device *dev)
> > {
> > struct ata_port *ap = to_ata_port(dev);
> > int rc;
> >
> > rc = ata_port_request_pm(ap, PMSG_ON, ATA_EH_RESET,
> > ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET, 1);
> >
> > pm_runtime_mark_last_busy(dev);
> > pm_request_autosuspend(dev);
> >
> > return rc;
> > }
> >
> > Then the fast-path looks like,
> >
> > @@ -3208,6 +3209,9 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost,
> > struct scsi_cmnd *cmd)
> >
> > ap = ata_shost_to_port(shost);
> >
> > + if (pm_runtime_suspended(&ap->tdev))
> > + pm_runtime_resume(&ap->tdev);
> > +

Unfortunately that won't work. It's got a race; the device might be
active when the pm_runtime_suspended() test runs and then it might
suspend immediately after.

> > spin_lock_irqsave(ap->lock, irq_flags);
> >
> > ata_scsi_dump_cdb(ap, cmd);
> >
> > What do you think?

See the example code in section 9 of
Documentation/power/runtime_pm.txt for the outline of a general
approach.

Alan Stern

Lin Ming

unread,
Nov 1, 2011, 4:20:01 AM11/1/11
to
On Sat, 2011-10-29 at 02:51 +0800, Alan Stern wrote:
> On Fri, 28 Oct 2011, Rafael J. Wysocki wrote:
>
> > On Friday, October 28, 2011, Lin Ming wrote:
> > > On Fri, 2011-10-28 at 11:37 +0800, Jeff Garzik wrote:
> > > > On 10/27/2011 11:21 PM, Lin Ming wrote:
> > > > > @@ -3208,6 +3209,11 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
> > > > >
> > > > > ap = ata_shost_to_port(shost);
> > > > >
> > > > > + if (pm_runtime_suspended(&ap->tdev))
> > > > > + pm_runtime_resume(&ap->tdev);
> > > > > + pm_runtime_mark_last_busy(&ap->tdev);
> > > > > + pm_request_autosuspend(&ap->tdev);
> > > > > +
> > > > > spin_lock_irqsave(ap->lock, irq_flags);
> > > > >
> > > >
> > > >
> > > > Putting this into the core command dispatch fast-path is rather
> > > > disappointing. That's at least one additional lock, plus some atomic
> > > > instructions and tests.
>
> And it calls pm_runtime_resume(), which requires process context, from
> within a SCSI queuecmd routine, which runs in interrupt context.

Hi,

Thanks to point this out. I change the code to do ata port runtime
suspend/resume through scsi layer.

scsi host runtime suspend/resume framework is already there(scsi_pm.c).
So I only need to insert hooks for ata port in
scsi_runtime_suspend/resume(...).

But I found a live lock when testing my patch.

<scsi host runtime suspend>
scsi_autopm_put_host
pm_runtime_put_sync
<scsi_host runtime pm status updated to RPM_SUSPENDING>
......
<call libata hook to do suspend>
<wake up scsi EH to handle suspend>
<wait for scsi EH ...>

<scsi EH wake up>
scsi_error_handler
<resume scsi host>
scsi_autopm_get_host
pm_runtime_get_sync
.....
<sleep to wait for the ongoing scsi host suspend>

libata schedules scsi EH to handle suspend, then dead lock happens
because scsi EH in turn waits for the ongoing suspend.

Any idea how to resolve this dead lock?

Thanks,
Lin Ming

Alan Stern

unread,
Nov 1, 2011, 3:40:02 PM11/1/11
to
This is a nasty problem. I've known for a long time that the
scsi_autopm_get_host() call in the error handler was going to lead to
problems.

For now, it seems best to assume that when the error handler starts,
the device will still be active. Therefore the scsi_autopm_get_host()
should be replaced by something that calls pm_runtime_get_noresume()
instead of pm_runtime_get_sync().

You can try replacing one function call with the other, or you can
define a new scsi_autopm_get_host_noresume() routine.

Alan Stern
0 new messages