Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH 1/3] iommu/amd: Add logic to decode AMD IOMMU event flag

157 views
Skip to first unread message

suravee.su...@amd.com

unread,
Mar 27, 2013, 8:00:01 PM3/27/13
to
From: Suravee Suthikulpanit <suravee.su...@amd.com>

Add logic to decode AMD IOMMU event flag based on information from AMD IOMMU specification.
This should simplify debugging IOMMU errors. Also, dump DTE information in additional cases.

Signed-off-by: Suravee Suthikulpanit <suravee.su...@amd.com>
---
drivers/iommu/amd_iommu.c | 161 +++++++++++++++++++++++++++++++++++++--------
1 file changed, 134 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 98f555d..477cfbb 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -592,7 +592,6 @@ static void amd_iommu_stats_init(void)
amd_iommu_stats_add(&invalidate_iotlb_all);
amd_iommu_stats_add(&pri_requests);
}
-
#endif

/****************************************************************************
@@ -601,6 +600,99 @@ static void amd_iommu_stats_init(void)
*
****************************************************************************/

+struct _event_log_flags {
+ u32 gn:1, /* 16 */
+ nx:1, /* 17 */
+ us:1, /* 18 */
+ i:1, /* 19 */
+ pr:1, /* 20 */
+ rw:1, /* 21 */
+ pe:1, /* 22 */
+ rz:1, /* 23 */
+ tr:1, /* 24 */
+ type:3, /* [27:25] */
+ _reserved_:20; /* Reserved */
+};
+
+static const char * const _invalid_transaction_desc[] = {
+ /* 000 */"Read request or non-posted write in the interrupt "
+ "addres range",
+ /* 001 */"Pretranslated transaction received from an I/O device "
+ "that has I=0 or V=0 in DTE",
+ /* 010 */"Port I/O space transaction received from an I/O device "
+ "that has IoCtl=00b in DTE",
+ /* 011 */"Posted write to invalid address range",
+ /* 100 */"Invalid read request or non-posted write",
+ /* 101 */"Posted write to the interrupt/EOI range from an I/O "
+ "device that has IntCtl=00b in DTE",
+ /* 110 */"Posted write to a reserved interrupt address range",
+ /* 111 */"Invalid transaction to the system management address range",
+};
+
+static const char * const _invalid_translation_desc[] = {
+ /* 000 */"Translation request received from an I/O device that has "
+ "I=0, or has V=0, or has V=1 and TV=0 in DTE",
+ /* 001 */"Translation request in invalid address range",
+ /* 010 */"Invalid translation request",
+ /* 011 */"Reserved",
+ /* 100 */"Reserved",
+ /* 101 */"Reserved",
+ /* 110 */"Reserved",
+ /* 111 */"Reserved",
+};
+
+static void dump_flags(int flags, int ev_type)
+{
+ struct _event_log_flags *p = (struct _event_log_flags *) &flags;
+ u32 err_type = p->type;
+
+ pr_err("AMD-Vi: Flags details:\n");
+ pr_err("AMD-Vi: Guest / Nested : %u\n", p->gn);
+ pr_err("AMD-Vi: No Execute : %u\n", p->nx);
+ pr_err("AMD-Vi: User-Supervisor : %u\n", p->us);
+ pr_err("AMD-Vi: Interrupt : %u\n", p->i);
+ pr_err("AMD-Vi: Present : %u\n", p->pr);
+ pr_err("AMD-Vi: Read / Write : %u\n", p->rw);
+ pr_err("AMD-Vi: Permission : %u\n", p->pe);
+ pr_err("AMD-Vi: Reserv bit not zero / Illegal level encoding : %u\n",
+ p->rz);
+ pr_err("AMD-Vi: Translation / Transaction : %u\n",
+ p->tr);
+ pr_err("AMD-Vi: Type of error : (0x%x) ", err_type);
+
+ if ((ev_type == EVENT_TYPE_DEV_TAB_ERR) ||
+ (ev_type == EVENT_TYPE_PAGE_TAB_ERR) ||
+ (ev_type == EVENT_TYPE_CMD_HARD_ERR)) {
+ /* Only supports up to 2 bits */
+ err_type &= 0x3;
+ switch (err_type) {
+ case 0:
+ pr_err("Reserved\n");
+ break;
+ case 1:
+ pr_err("Master Abort\n");
+ break;
+ case 2:
+ pr_err("Target Abort\n");
+ break;
+ case 3:
+ pr_err("Data Error\n");
+ break;
+ }
+ } else if (ev_type == EVENT_TYPE_INV_DEV_REQ) {
+ if (p->tr == 0) {
+ if (err_type < ARRAY_SIZE(_invalid_translation_desc))
+ printk("%s\n",
+ _invalid_translation_desc[err_type]);
+ } else {
+ if (err_type < ARRAY_SIZE(_invalid_transaction_desc))
+ printk("%s\n",
+ _invalid_transaction_desc[err_type]);
+ }
+ }
+ pr_err("AMD-Vi: (Note: Please refer to AMD IOMMU specification for details.)\n");
+}
+
static void dump_dte_entry(u16 devid)
{
int i;
@@ -619,31 +711,10 @@ static void dump_command(unsigned long phys_addr)
pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
}

-static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
+static void iommu_print_event(int type, int devid, int domid,
+ int flags, u64 address)
{
- int type, devid, domid, flags;
- volatile u32 *event = __evt;
- int count = 0;
- u64 address;
-
-retry:
- type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
- devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
- domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
- flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
- address = (u64)(((u64)event[3]) << 32) | event[2];
-
- if (type == 0) {
- /* Did we hit the erratum? */
- if (++count == LOOP_TIMEOUT) {
- pr_err("AMD-Vi: No event written to event log\n");
- return;
- }
- udelay(1);
- goto retry;
- }
-
- printk(KERN_ERR "AMD-Vi: Event logged [");
+ pr_err("AMD-Vi: Event logged [");

switch (type) {
case EVENT_TYPE_ILL_DEV:
@@ -651,6 +722,7 @@ retry:
"address=0x%016llx flags=0x%04x]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address, flags);
+ dump_flags(flags, type);
dump_dte_entry(devid);
break;
case EVENT_TYPE_IO_FAULT:
@@ -658,18 +730,22 @@ retry:
"domain=0x%04x address=0x%016llx flags=0x%04x]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
domid, address, flags);
+ dump_flags(flags, type);
+ dump_dte_entry(devid);
break;
case EVENT_TYPE_DEV_TAB_ERR:
printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
"address=0x%016llx flags=0x%04x]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address, flags);
+ dump_flags(flags, type);
break;
case EVENT_TYPE_PAGE_TAB_ERR:
printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
"domain=0x%04x address=0x%016llx flags=0x%04x]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
domid, address, flags);
+ dump_flags(flags, type);
break;
case EVENT_TYPE_ILL_CMD:
printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
@@ -678,6 +754,7 @@ retry:
case EVENT_TYPE_CMD_HARD_ERR:
printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
"flags=0x%04x]\n", address, flags);
+ dump_flags(flags, type);
break;
case EVENT_TYPE_IOTLB_INV_TO:
printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
@@ -690,11 +767,40 @@ retry:
"address=0x%016llx flags=0x%04x]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address, flags);
+ dump_flags(flags, type);
+ dump_dte_entry(devid);
break;
default:
- printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
+ printk("UNKNOWN type=0x%02x]\n", type);
+ }
+}
+
+static void iommu_handle_event(struct amd_iommu *iommu, void *__evt)
+{
+ int type, devid, domid, flags;
+ u32 *event = __evt;
+ int count = 0;
+ u64 address;
+
+retry:
+ type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
+ devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
+ domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
+ flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
+ address = (u64)(((u64)event[3]) << 32) | event[2];
+
+ if (type == 0) {
+ /* Did we hit the erratum? */
+ if (++count == LOOP_TIMEOUT) {
+ pr_err("AMD-Vi: No event written to event log\n");
+ return;
+ }
+ udelay(1);
+ goto retry;
}

+ iommu_print_event(type, devid, domid, flags, address);
+
memset(__evt, 0, 4 * sizeof(u32));
}

@@ -709,7 +815,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);

while (head != tail) {
- iommu_print_event(iommu, iommu->evt_buf + head);
+ iommu_handle_event(iommu, iommu->evt_buf + head);
head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
}

@@ -3270,6 +3376,7 @@ static int __init alloc_passthrough_domain(void)

return 0;
}
+
static int amd_iommu_domain_init(struct iommu_domain *dom)
{
struct protection_domain *domain;
--
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Suravee Suthikulanit

unread,
Apr 1, 2013, 9:50:02 AM4/1/13
to
Hi Joerg,

Do you have any comments or feedback about this patch set?

Thank you,

Suravee

Joerg Roedel

unread,
Apr 2, 2013, 10:40:01 AM4/2/13
to
Hi Suravee,

On Wed, Mar 27, 2013 at 06:51:23PM -0500, suravee.su...@amd.com wrote:
> From: Suravee Suthikulpanit <suravee.su...@amd.com>
>
> Add logic to decode AMD IOMMU event flag based on information from AMD IOMMU specification.
> This should simplify debugging IOMMU errors. Also, dump DTE information in additional cases.

The patch in general makes sense to have, but I have a couple of
comments.

> +static void dump_flags(int flags, int ev_type)
> +{
> + struct _event_log_flags *p = (struct _event_log_flags *) &flags;
> + u32 err_type = p->type;
> +
> + pr_err("AMD-Vi: Flags details:\n");
> + pr_err("AMD-Vi: Guest / Nested : %u\n", p->gn);
> + pr_err("AMD-Vi: No Execute : %u\n", p->nx);
> + pr_err("AMD-Vi: User-Supervisor : %u\n", p->us);
> + pr_err("AMD-Vi: Interrupt : %u\n", p->i);
> + pr_err("AMD-Vi: Present : %u\n", p->pr);
> + pr_err("AMD-Vi: Read / Write : %u\n", p->rw);
> + pr_err("AMD-Vi: Permission : %u\n", p->pe);
> + pr_err("AMD-Vi: Reserv bit not zero / Illegal level encoding : %u\n",
> + p->rz);
> + pr_err("AMD-Vi: Translation / Transaction : %u\n",
> + p->tr);
> + pr_err("AMD-Vi: Type of error : (0x%x) ", err_type);

Printing the flags multi-line is much too noisy for IOMMU events. Just
print a char-shortcut for each flag set on the same line.

> +
> + if ((ev_type == EVENT_TYPE_DEV_TAB_ERR) ||
> + (ev_type == EVENT_TYPE_PAGE_TAB_ERR) ||
> + (ev_type == EVENT_TYPE_CMD_HARD_ERR)) {
> + /* Only supports up to 2 bits */
> + err_type &= 0x3;
> + switch (err_type) {
> + case 0:
> + pr_err("Reserved\n");
> + break;
> + case 1:
> + pr_err("Master Abort\n");
> + break;
> + case 2:
> + pr_err("Target Abort\n");
> + break;
> + case 3:
> + pr_err("Data Error\n");
> + break;
> + }

Why do you create string-arrays for other type-encodings but not for
this one?

> + } else if (ev_type == EVENT_TYPE_INV_DEV_REQ) {
> + if (p->tr == 0) {
> + if (err_type < ARRAY_SIZE(_invalid_translation_desc))
> + printk("%s\n",
> + _invalid_translation_desc[err_type]);
> + } else {
> + if (err_type < ARRAY_SIZE(_invalid_transaction_desc))
> + printk("%s\n",
> + _invalid_transaction_desc[err_type]);

pr_cont instead of printk.


Joerg

Suravee Suthikulanit

unread,
Apr 2, 2013, 10:40:01 AM4/2/13
to
I will make the changes and send in for new patch for review.
>
>> +
>> + if ((ev_type == EVENT_TYPE_DEV_TAB_ERR) ||
>> + (ev_type == EVENT_TYPE_PAGE_TAB_ERR) ||
>> + (ev_type == EVENT_TYPE_CMD_HARD_ERR)) {
>> + /* Only supports up to 2 bits */
>> + err_type &= 0x3;
>> + switch (err_type) {
>> + case 0:
>> + pr_err("Reserved\n");
>> + break;
>> + case 1:
>> + pr_err("Master Abort\n");
>> + break;
>> + case 2:
>> + pr_err("Target Abort\n");
>> + break;
>> + case 3:
>> + pr_err("Data Error\n");
>> + break;
>> + }
> Why do you create string-arrays for other type-encodings but not for
> this one?
I can do the same way if that's preferred.

Thanks,

Suravee

Borislav Petkov

unread,
Apr 2, 2013, 10:50:02 AM4/2/13
to
On Tue, Apr 02, 2013 at 04:33:36PM +0200, Joerg Roedel wrote:
> Hi Suravee,
>
> On Wed, Mar 27, 2013 at 06:51:23PM -0500, suravee.su...@amd.com wrote:
> > From: Suravee Suthikulpanit <suravee.su...@amd.com>
> >
> > Add logic to decode AMD IOMMU event flag based on information from AMD IOMMU specification.
> > This should simplify debugging IOMMU errors. Also, dump DTE information in additional cases.
>
> The patch in general makes sense to have, but I have a couple of
> comments.

While you guys are at it, can someone fix this too pls (ASUS board with
a PD on it).

[ 0.220342] [Firmware Bug]: AMD-Vi: IOAPIC[9] not in IVRS table
[ 0.220398] [Firmware Bug]: AMD-Vi: IOAPIC[10] not in IVRS table
[ 0.220451] [Firmware Bug]: AMD-Vi: No southbridge IOAPIC found in IVRS table
[ 0.220506] AMD-Vi: Disabling interrupt remapping due to BIOS Bug(s)

Thanks.

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

Joerg Roedel

unread,
Apr 2, 2013, 11:10:02 AM4/2/13
to
On Tue, Apr 02, 2013 at 04:40:37PM +0200, Borislav Petkov wrote:
> While you guys are at it, can someone fix this too pls (ASUS board with
> a PD on it).
>
> [ 0.220342] [Firmware Bug]: AMD-Vi: IOAPIC[9] not in IVRS table
> [ 0.220398] [Firmware Bug]: AMD-Vi: IOAPIC[10] not in IVRS table
> [ 0.220451] [Firmware Bug]: AMD-Vi: No southbridge IOAPIC found in IVRS table
> [ 0.220506] AMD-Vi: Disabling interrupt remapping due to BIOS Bug(s)

That is actually a BIOS problem. I wonder whether it would help to turn this
into a WARN_ON to get the board vendors to release working BIOSes.
Opinions?


Joerg

Borislav Petkov

unread,
Apr 2, 2013, 11:40:01 AM4/2/13
to
On Tue, Apr 02, 2013 at 05:03:04PM +0200, Joerg Roedel wrote:
> On Tue, Apr 02, 2013 at 04:40:37PM +0200, Borislav Petkov wrote:
> > While you guys are at it, can someone fix this too pls (ASUS board with
> > a PD on it).
> >
> > [ 0.220342] [Firmware Bug]: AMD-Vi: IOAPIC[9] not in IVRS table
> > [ 0.220398] [Firmware Bug]: AMD-Vi: IOAPIC[10] not in IVRS table
> > [ 0.220451] [Firmware Bug]: AMD-Vi: No southbridge IOAPIC found in IVRS table
> > [ 0.220506] AMD-Vi: Disabling interrupt remapping due to BIOS Bug(s)
>
> That is actually a BIOS problem. I wonder whether it would help to turn this
> into a WARN_ON to get the board vendors to release working BIOSes.
> Opinions?

Good luck trying to get ASUS to fix anything in their BIOS :(.

Can't we detect the SB IOAPIC some other way in this case?

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

Suravee Suthikulpanit

unread,
Apr 2, 2013, 11:50:01 AM4/2/13
to
On 4/2/2013 10:29 AM, Borislav Petkov wrote:
> On Tue, Apr 02, 2013 at 05:03:04PM +0200, Joerg Roedel wrote:
>> On Tue, Apr 02, 2013 at 04:40:37PM +0200, Borislav Petkov wrote:
>>> While you guys are at it, can someone fix this too pls (ASUS board with
>>> a PD on it).
>>>
>>> [ 0.220342] [Firmware Bug]: AMD-Vi: IOAPIC[9] not in IVRS table
>>> [ 0.220398] [Firmware Bug]: AMD-Vi: IOAPIC[10] not in IVRS table
>>> [ 0.220451] [Firmware Bug]: AMD-Vi: No southbridge IOAPIC found in IVRS table
>>> [ 0.220506] AMD-Vi: Disabling interrupt remapping due to BIOS Bug(s)
>> That is actually a BIOS problem. I wonder whether it would help to turn this
>> into a WARN_ON to get the board vendors to release working BIOSes.
>> Opinions?
> Good luck trying to get ASUS to fix anything in their BIOS :(.
I have tried to contact Asus in the past to have them fix the issue, but
I got no luck. Once it is out in the field, it's very difficult to get
them to make changes. I am also addressing this issue with the BIOS
team for the future hardware.

Turning this into WARN_ON() at this point might break a lot of systems
currently out in the field. However, users can always switching to use
"intremap=off" but this might not be obvious.

Suravee

> Can't we detect the SB IOAPIC some other way in this case?
>


--

Joerg Roedel

unread,
Apr 2, 2013, 12:10:03 PM4/2/13
to
On Tue, Apr 02, 2013 at 05:29:56PM +0200, Borislav Petkov wrote:
> On Tue, Apr 02, 2013 at 05:03:04PM +0200, Joerg Roedel wrote:

> Good luck trying to get ASUS to fix anything in their BIOS :(.

Hmm...

> Can't we detect the SB IOAPIC some other way in this case?

I can certainly write a patch that works around your particular BIOS
bug. The problem is that such a fix will most certainly break other
systems.

Unfortunatly there is no reliable way to fixup the IO-APIC-ID->DEVID
mapping at runtime when the BIOS messed it up. The only thing I can do
is to check for potential problems and disable the intremap feature
then, so that the system will at least boot.


Joerg

Joerg Roedel

unread,
Apr 2, 2013, 12:10:03 PM4/2/13
to
On Tue, Apr 02, 2013 at 10:41:25AM -0500, Suthikulpanit, Suravee wrote:
> Turning this into WARN_ON() at this point might break a lot of
> systems currently out in the field. However, users can always
> switching to use "intremap=off" but this might not be obvious.

A WARN_ON doesn't break systems, it just creates more noise. Probably
enough noise to get board vendors to fix their stuff up. But there is
more to consider before making more noise, of course :)


Joerg

Borislav Petkov

unread,
Apr 2, 2013, 12:20:03 PM4/2/13
to
On Tue, Apr 02, 2013 at 06:04:00PM +0200, Joerg Roedel wrote:
> I can certainly write a patch that works around your particular BIOS
> bug. The problem is that such a fix will most certainly break other
> systems.
>
> Unfortunatly there is no reliable way to fixup the IO-APIC-ID->DEVID
> mapping at runtime when the BIOS messed it up. The only thing I can do
> is to check for potential problems and disable the intremap feature
> then, so that the system will at least boot.

Yeah, that could work:

* do not issue message but try to fixup the mapping
* if it works, fine
* if it doesn't, then give up and disable intremap.

And yes, I'm very sceptical about having a WARN_ON and it starts
screaming on machines all over the place. Good luck explaining to
people that you actually wanted to prod BIOS vendors to fix their
monkey-on-crack code but they weren't listening in the first place.

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

Joerg Roedel

unread,
Apr 2, 2013, 12:40:03 PM4/2/13
to
On Tue, Apr 02, 2013 at 06:17:57PM +0200, Borislav Petkov wrote:
> On Tue, Apr 02, 2013 at 06:04:00PM +0200, Joerg Roedel wrote:
> > I can certainly write a patch that works around your particular BIOS
> > bug. The problem is that such a fix will most certainly break other
> > systems.
> >
> > Unfortunatly there is no reliable way to fixup the IO-APIC-ID->DEVID
> > mapping at runtime when the BIOS messed it up. The only thing I can do
> > is to check for potential problems and disable the intremap feature
> > then, so that the system will at least boot.
>
> Yeah, that could work:
>
> * do not issue message but try to fixup the mapping
> * if it works, fine
> * if it doesn't, then give up and disable intremap.

I can't find out in the driver whether the fix works or not. It will be
noticed later when the x86 code tries to setup the timers and finds out
that they don't work, which causes a kernel panic.

Okay, in theory I could implement a feedback loop between timer-setup
and intremap code and try fixups until it works. But that seems not to
be worth it to work around a buggy BIOS.

What I actually thought about was providing an IVRS-override on the
kernel command line. So that you can specify the IOAPIC_ID->DEVID
mapping there and make it work this way. What do you think?

> And yes, I'm very sceptical about having a WARN_ON and it starts
> screaming on machines all over the place. Good luck explaining to
> people that you actually wanted to prod BIOS vendors to fix their
> monkey-on-crack code but they weren't listening in the first place.

Yeah, that's my fear too. So we leave it better as it is...


Joerg

suravee.su...@amd.com

unread,
Apr 2, 2013, 3:10:02 PM4/2/13
to
From: Suravee Suthikulpanit <suravee.su...@amd.com>

Add logic to decode AMD IOMMU event flag based on information from AMD IOMMU specification.
This should simplify debugging IOMMU errors. Also, dump DTE information in additional cases.

Signed-off-by: Suravee Suthikulpanit <suravee.su...@amd.com>
---
Changelog:
V2:
* Fix printing format to reduce noise
* Use string table instead of switch/case
* Use pr_cont instead of printk

drivers/iommu/amd_iommu.c | 170 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 136 insertions(+), 34 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b287ca3..3362678 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -601,6 +601,93 @@ static void amd_iommu_stats_init(void)
*
****************************************************************************/

+struct _event_log_flags {
+ u32 gn:1, /* 16 */
+ nx:1, /* 17 */
+ us:1, /* 18 */
+ i:1, /* 19 */
+ pr:1, /* 20 */
+ rw:1, /* 21 */
+ pe:1, /* 22 */
+ rz:1, /* 23 */
+ tr:1, /* 24 */
+ type:3, /* [27:25] */
+ _reserved_:20; /* Reserved */
+};
+
+static const char * const _type_field_encodings[] = {
+ /* 00 */"Reserved",
+ /* 01 */"Master Abort",
+ /* 10 */"Target Abort",
+ /* 11 */"Data Error",
+static void dump_flags(int flags, int ev_type)
+{
+ struct _event_log_flags *p = (struct _event_log_flags *) &flags;
+ u32 err_type = p->type;
+
+ pr_err("AMD-Vi: Flags details: %s, NX:%u, %s, %s, %s, %s, %s, %s, %s\n",
+ (p->gn ? "Use guest address" : "Use nested address"),
+ (p->nx),
+ (p->us ? "User privileges" : "Supervisor privileges"),
+ (p->i ? "Interrupt request" : "memory request"),
+ (p->pr ? "Page present or Interrupt remapped" :
+ "Page not present or Interrupt blocked"),
+ (p->rw ? "Write transaction" : "Read transaction"),
+ (p->pe ? "Does not have permission" : "Has permission"),
+ (p->rz ? "Reserv bit not zero" : "Illegal level encoding"),
+ (p->tr ? "Translation request" : "Transaction request"));
+
+ pr_err("AMD-Vi: Type of error: (0x%x) ", err_type);
+ if ((ev_type == EVENT_TYPE_DEV_TAB_ERR) ||
+ (ev_type == EVENT_TYPE_PAGE_TAB_ERR) ||
+ (ev_type == EVENT_TYPE_CMD_HARD_ERR)) {
+ if (err_type < ARRAY_SIZE(_type_field_encodings)) {
+ pr_cont("%s\n",
+ _type_field_encodings[err_type]);
+ }
+ } else if (ev_type == EVENT_TYPE_INV_DEV_REQ) {
+ if (p->tr == 0) {
+ if (err_type < ARRAY_SIZE(_invalid_translation_desc))
+ pr_cont("%s\n",
+ _invalid_translation_desc[err_type]);
+ } else {
+ if (err_type < ARRAY_SIZE(_invalid_transaction_desc))
+ pr_cont("%s\n",
+ _invalid_transaction_desc[err_type]);
+ }
+ }
+ pr_err("AMD-Vi: (Note: Please refer to AMD IOMMU specification for details.)\n");
+}
+
static void dump_dte_entry(u16 devid)
{
int i;
@@ -619,81 +706,95 @@ static void dump_command(unsigned long phys_addr)
pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
}

-static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
+void amd_iommu_print_event(int type, int devid, int domid,
- printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
+ pr_cont("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
"address=0x%016llx flags=0x%04x]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address, flags);
+ dump_flags(flags, type);
dump_dte_entry(devid);
break;
case EVENT_TYPE_IO_FAULT:
- printk("IO_PAGE_FAULT device=%02x:%02x.%x "
+ pr_cont("IO_PAGE_FAULT device=%02x:%02x.%x "
"domain=0x%04x address=0x%016llx flags=0x%04x]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
domid, address, flags);
+ dump_flags(flags, type);
+ dump_dte_entry(devid);
break;
case EVENT_TYPE_DEV_TAB_ERR:
- printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+ pr_cont("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
"address=0x%016llx flags=0x%04x]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address, flags);
+ dump_flags(flags, type);
break;
case EVENT_TYPE_PAGE_TAB_ERR:
- printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+ pr_cont("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
"domain=0x%04x address=0x%016llx flags=0x%04x]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
domid, address, flags);
+ dump_flags(flags, type);
break;
case EVENT_TYPE_ILL_CMD:
- printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
+ pr_cont("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
dump_command(address);
break;
case EVENT_TYPE_CMD_HARD_ERR:
- printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
+ pr_cont("COMMAND_HARDWARE_ERROR address=0x%016llx "
"flags=0x%04x]\n", address, flags);
+ dump_flags(flags, type);
break;
case EVENT_TYPE_IOTLB_INV_TO:
- printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
+ pr_cont("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
"address=0x%016llx]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address);
break;
case EVENT_TYPE_INV_DEV_REQ:
- printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
+ pr_cont("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
"address=0x%016llx flags=0x%04x]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address, flags);
+ dump_flags(flags, type);
+ dump_dte_entry(devid);
break;
default:
- printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
+ pr_cont("UNKNOWN type=0x%02x]\n", type);
}
+}
+
+static void iommu_handle_event(struct amd_iommu *iommu, void *__evt)
+{
+ int type, devid, domid, flags;
+ u32 *event = __evt;
+ int count = 0;
+ u64 address;
+
+retry:
+ type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
+ devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
+ domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
+ flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
+ address = (u64)(((u64)event[3]) << 32) | event[2];
+
+ if (type == 0) {
+ /* Did we hit the erratum? */
+ if (++count == LOOP_TIMEOUT) {
+ pr_err("AMD-Vi: No event written to event log\n");
+ return;
+ }
+ udelay(1);
+ goto retry;
+ }
+
+ amd_iommu_print_event(type, devid, domid, flags, address);

memset(__evt, 0, 4 * sizeof(u32));
}
@@ -709,7 +810,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);

while (head != tail) {
- iommu_print_event(iommu, iommu->evt_buf + head);
+ iommu_handle_event(iommu, iommu->evt_buf + head);
head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
}

@@ -3268,6 +3369,7 @@ static int __init alloc_passthrough_domain(void)

return 0;
}
+
static int amd_iommu_domain_init(struct iommu_domain *dom)
{
struct protection_domain *domain;
--
1.7.10.4

Borislav Petkov

unread,
Apr 2, 2013, 3:40:03 PM4/2/13
to
On Tue, Apr 02, 2013 at 06:33:18PM +0200, Joerg Roedel wrote:
> I can't find out in the driver whether the fix works or not. It will
> be noticed later when the x86 code tries to setup the timers and finds
> out that they don't work, which causes a kernel panic.
>
> Okay, in theory I could implement a feedback loop between timer-setup
> and intremap code and try fixups until it works. But that seems not to
> be worth it to work around a buggy BIOS.

Yeah, same here. It's not like we really need intremap to work - we're
only trying to fix the annoying error message currently. :-)

> What I actually thought about was providing an IVRS-override on the
> kernel command line. So that you can specify the IOAPIC_ID->DEVID
> mapping there and make it work this way. What do you think?

I guess that is workable. I can imagine people wanting this if they want
to do the intremap thing on such b0rked BIOSen. So how do I specify this
IOAPIC_ID->DEVID mapping on the cmdline exactly?

> > And yes, I'm very sceptical about having a WARN_ON and it starts
> > screaming on machines all over the place. Good luck explaining to
> > people that you actually wanted to prod BIOS vendors to fix their
> > monkey-on-crack code but they weren't listening in the first place.
>
> Yeah, that's my fear too. So we leave it better as it is...

Hohumm.

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

Joerg Roedel

unread,
Apr 2, 2013, 5:00:02 PM4/2/13
to
On Tue, Apr 02, 2013 at 09:32:40PM +0200, Borislav Petkov wrote:
> On Tue, Apr 02, 2013 at 06:33:18PM +0200, Joerg Roedel wrote:

> > Okay, in theory I could implement a feedback loop between timer-setup
> > and intremap code and try fixups until it works. But that seems not to
> > be worth it to work around a buggy BIOS.
>
> Yeah, same here. It's not like we really need intremap to work - we're
> only trying to fix the annoying error message currently. :-)

Right :)

> > What I actually thought about was providing an IVRS-override on the
> > kernel command line. So that you can specify the IOAPIC_ID->DEVID
> > mapping there and make it work this way. What do you think?
>
> I guess that is workable. I can imagine people wanting this if they want
> to do the intremap thing on such b0rked BIOSen. So how do I specify this
> IOAPIC_ID->DEVID mapping on the cmdline exactly?


Don't know yet, probably something like ivrs_ioapic[<apicid>]=0:14.2 and
ivrs_hpet[<hpet-id>]=0:14.2. But not entierly sure yet, at least parsing
shouldn't require lex and yacc ;)

> > Yeah, that's my fear too. So we leave it better as it is...
>
> Hohumm.

Thus shall it be!


Joerg

Joerg Roedel

unread,
Apr 2, 2013, 6:10:02 PM4/2/13
to
On Tue, Apr 02, 2013 at 02:05:14PM -0500, Suthikulpanit, Suravee wrote:
> From: Suravee Suthikulpanit <suravee.su...@amd.com>
> +static const char * const _type_field_encodings[] = {
> + /* 00 */"Reserved",
> + /* 01 */"Master Abort",
> + /* 10 */"Target Abort",
> + /* 11 */"Data Error",

In these arrays, please put the comments at the end of the line and
align them nicely like this:

static const char * const _type_field_encodings[] = {
"Reserved", /* 00 */
"Master Abort", /* 01 */
"Target Abort", /* 10 */
"Data Error", /* 11 */
};

This improves readability a lot.

> +static void dump_flags(int flags, int ev_type)
> +{
> + struct _event_log_flags *p = (struct _event_log_flags *) &flags;
> + u32 err_type = p->type;
> +
> + pr_err("AMD-Vi: Flags details: %s, NX:%u, %s, %s, %s, %s, %s, %s, %s\n",
> + (p->gn ? "Use guest address" : "Use nested address"),
> + (p->nx),
> + (p->us ? "User privileges" : "Supervisor privileges"),
> + (p->i ? "Interrupt request" : "memory request"),
> + (p->pr ? "Page present or Interrupt remapped" :
> + "Page not present or Interrupt blocked"),
> + (p->rw ? "Write transaction" : "Read transaction"),
> + (p->pe ? "Does not have permission" : "Has permission"),
> + (p->rz ? "Reserv bit not zero" : "Illegal level encoding"),
> + (p->tr ? "Translation request" : "Transaction request"));

These strings are too long, please just print useful shortcuts for them,
like
"US" : "SV" instead of
"User privileges" : "Supervisor privileges"

Also the commas between the strings are not needed then.

As a general rule, in the default configuration an event-log entry
should not take more than a single line in dmesg (which is also not too
long). If you want to print more information in some cases you can
enable that by a command line parameter like 'amd_iommu=verbose' or
something like that.

Joerg

suravee.su...@amd.com

unread,
Apr 2, 2013, 8:10:02 PM4/2/13
to
From: Suravee Suthikulpanit <suravee.su...@amd.com>

Add logic to decode AMD IOMMU event flag based on information from AMD IOMMU specification.
This should simplify debugging IOMMU errors. Also, dump DTE information in additional cases.

This is an example:
AMD-Vi: Event logged [IO_PAGE_FAULT device=51:00.0 domain=0x0000 address=0x0000000000000000 flags=0x0fff]
AMD-Vi: Flags details: Guest NX=1 User Intr Present Write No-Perm Rsrv-Bit Translation
AMD-Vi: Type of error: (0x7)
AMD-Vi: (Note: Please refer to AMD IOMMU specification for details.)
AMD-Vi: DTE[0]: 6000003fa75e2403
AMD-Vi: DTE[1]: 0000000000000014
AMD-Vi: DTE[2]: 2000003fa5e09011
AMD-Vi: DTE[3]: 0000000000000000

Signed-off-by: Suravee Suthikulpanit <suravee.su...@amd.com>
---
Changelog:
V3:
* Move comments to end of line
* Shorten the print out to be within one line
V2:
* Fix printing format to reduce noise
* Use string table instead of switch/case
* Use pr_cont instead of printk

drivers/iommu/amd_iommu.c | 171 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 137 insertions(+), 34 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b287ca3..593a1a3 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -601,6 +601,94 @@ static void amd_iommu_stats_init(void)
*
****************************************************************************/

+struct _event_log_flags {
+ u32 gn:1, /* 16 */
+ nx:1, /* 17 */
+ us:1, /* 18 */
+ i:1, /* 19 */
+ pr:1, /* 20 */
+ rw:1, /* 21 */
+ pe:1, /* 22 */
+ rz:1, /* 23 */
+ tr:1, /* 24 */
+ type:3, /* [27:25] */
+ _reserved_:20; /* Reserved */
+};
+
+static const char * const _type_field_encodings[] = {
+ "Reserved", /* 00 */
+ "Master Abort", /* 01 */
+ "Target Abort", /* 10 */
+ "Data Error", /* 11 */
+};
+
+static const char * const _invalid_transaction_desc[] = {
+ "Read request or non-posted write in the interrupt "
+ "addres range", /* 000 */
+ "Pretranslated transaction received from an "
+ "I/O device that has I=0 or V=0 in DTE", /* 001 */
+ "Port I/O space transaction received from an "
+ "I/O device that has IoCtl=00b in DTE", /* 010 */
+ "Posted write to invalid address range", /* 011 */
+ "Invalid read request or non-posted write", /* 100 */
+ "Posted write to the interrupt/EOI range from an "
+ "I/O device that has IntCtl=00b in DTE", /* 101 */
+ "Posted write to a reserved interrupt address range", /* 110 */
+ "Invalid transaction to the system management "
+ "address range", /* 111 */
+};
+
+static const char * const _invalid_translation_desc[] = {
+ "Translation request received from an I/O device "
+ "that has I=0, or has V=0, or has V=1 and "
+ "TV=0 in DTE", /* 000 */
+ "Translation request in invalid address range", /* 001 */
+ "Invalid translation request", /* 010 */
+ "Reserved", /* 011 */
+ "Reserved", /* 100 */
+ "Reserved", /* 101 */
+ "Reserved", /* 110 */
+ "Reserved", /* 111 */
+};
+
+static void dump_flags(int flags, int ev_type)
+{
+ struct _event_log_flags *p = (struct _event_log_flags *) &flags;
+ u32 err_type = p->type;
+
+ pr_err("AMD-Vi: Flags details: %s NX=%u %s %s %s %s %s %s %s\n",
+ (p->gn ? "Guest" : "Nested"),
+ (p->nx),
+ (p->us ? "User" : "Super"),
+ (p->i ? "Intr" : "Mem"),
+ (p->pr ? "Present" : "Absent"),
+ (p->rw ? "Write" : "Read"),
+ (p->pe ? "No-Perm" : "Has-Perm"),
+ (p->rz ? "Rsrv-Bit" : "Ill-Level"),
+ (p->tr ? "Translation" : "Transaction"));
@@ -619,81 +707,95 @@ static void dump_command(unsigned long phys_addr)
@@ -709,7 +811,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);

while (head != tail) {
- iommu_print_event(iommu, iommu->evt_buf + head);
+ iommu_handle_event(iommu, iommu->evt_buf + head);
head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
}

@@ -3268,6 +3370,7 @@ static int __init alloc_passthrough_domain(void)

return 0;
}
+
static int amd_iommu_domain_init(struct iommu_domain *dom)
{
struct protection_domain *domain;
--
1.7.10.4


Suthikulpanit, Suravee

unread,
Apr 8, 2013, 10:40:02 AM4/8/13
to
Joerg,

Do you have any more feedback about this patch?

Thanks,

Suravee
________________________________________
From: suravee.su...@amd.com [suravee.su...@amd.com]
Sent: Tuesday, April 02, 2013 7:06 PM
To: io...@lists.linux-foundation.org; jo...@8bytes.org
Cc: linux-...@vger.kernel.org; Suthikulpanit, Suravee
Subject: [PATCH V3] iommu/amd: Add logic to decode AMD IOMMU event flag

Borislav Petkov

unread,
Apr 8, 2013, 11:00:02 AM4/8/13
to
On Mon, Apr 08, 2013 at 02:33:32PM +0000, Suthikulpanit, Suravee wrote:
> Joerg,
>
> Do you have any more feedback about this patch?
>
> Thanks,
>
> Suravee
> ________________________________________
> From: suravee.su...@amd.com [suravee.su...@amd.com]
> Sent: Tuesday, April 02, 2013 7:06 PM
> To: io...@lists.linux-foundation.org; jo...@8bytes.org
> Cc: linux-...@vger.kernel.org; Suthikulpanit, Suravee
> Subject: [PATCH V3] iommu/amd: Add logic to decode AMD IOMMU event flag
>
> From: Suravee Suthikulpanit <suravee.su...@amd.com>
>
> Add logic to decode AMD IOMMU event flag based on information from AMD IOMMU specification.
> This should simplify debugging IOMMU errors. Also, dump DTE information in additional cases.
>
> This is an example:
> AMD-Vi: Event logged [IO_PAGE_FAULT device=51:00.0 domain=0x0000 address=0x0000000000000000 flags=0x0fff]
> AMD-Vi: Flags details: Guest NX=1 User Intr Present Write No-Perm Rsrv-Bit Translation
> AMD-Vi: Type of error: (0x7)
> AMD-Vi: (Note: Please refer to AMD IOMMU specification for details.)

Independent from Joerg's feedback on this, I have only one question:
you're not seriously considering on dumping this "Note:..." line above
on *every* IO-PF, right?

I very positively assume that people who stare at that output should, as
a first prerequisite, know where to find those fields' descriptions. :-)

Thanks.

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

Borislav Petkov

unread,
Apr 8, 2013, 2:50:02 PM4/8/13
to
On Mon, Apr 08, 2013 at 10:43:45AM -0500, Suravee Suthikulanit wrote:
> On 4/8/2013 9:50 AM, Borislav Petkov wrote:
> >Independent from Joerg's feedback on this, I have only one question:
> >you're not seriously considering on dumping this "Note:..." line above
> >on*every* IO-PF, right?
> If you think that is obvious, I can get rid of this also.

I don't know whether it is obvious or not, but this thing doesn't belong
there. I'm sure you can think of a much better fitting location to point
to the documentation. And while you do that, you can simply add a link
to the pdf version of the spec so that people don't have to look for it
at all.

Joerg Roedel

unread,
Apr 9, 2013, 5:50:01 AM4/9/13
to
On Tue, Apr 02, 2013 at 07:06:50PM -0500, Suthikulpanit, Suravee wrote:
> From: Suravee Suthikulpanit <suravee.su...@amd.com>
>
> Add logic to decode AMD IOMMU event flag based on information from AMD IOMMU specification.
> This should simplify debugging IOMMU errors. Also, dump DTE information in additional cases.
>
> This is an example:
> AMD-Vi: Event logged [IO_PAGE_FAULT device=51:00.0 domain=0x0000 address=0x0000000000000000 flags=0x0fff]
> AMD-Vi: Flags details: Guest NX=1 User Intr Present Write No-Perm Rsrv-Bit Translation
> AMD-Vi: Type of error: (0x7)
> AMD-Vi: (Note: Please refer to AMD IOMMU specification for details.)
> AMD-Vi: DTE[0]: 6000003fa75e2403
> AMD-Vi: DTE[1]: 0000000000000014
> AMD-Vi: DTE[2]: 2000003fa5e09011
> AMD-Vi: DTE[3]: 0000000000000000

This example should look like this instead:

AMD-Vi: Event logged [IO_PAGE_FAULT device=51:00.0 domain=0x0000 address=0x0 flags: G NX US I P Wr Rsvd Tr]

by default. When the user passes amd_iommu=verbose on the cmd-line then
you can print additional information like "type of error" or the dump
the DTE.


Joerg

Suravee Suthikulanit

unread,
Apr 9, 2013, 11:20:02 AM4/9/13
to
On 4/9/2013 4:41 AM, Joerg Roedel wrote:
> On Tue, Apr 02, 2013 at 07:06:50PM -0500, Suthikulpanit, Suravee wrote:
>> From: Suravee Suthikulpanit <suravee.su...@amd.com>
>>
>> Add logic to decode AMD IOMMU event flag based on information from AMD IOMMU specification.
>> This should simplify debugging IOMMU errors. Also, dump DTE information in additional cases.
>>
>> This is an example:
>> AMD-Vi: Event logged [IO_PAGE_FAULT device=51:00.0 domain=0x0000 address=0x0000000000000000 flags=0x0fff]
>> AMD-Vi: Flags details: Guest NX=1 User Intr Present Write No-Perm Rsrv-Bit Translation
>> AMD-Vi: Type of error: (0x7)
>> AMD-Vi: (Note: Please refer to AMD IOMMU specification for details.)
>> AMD-Vi: DTE[0]: 6000003fa75e2403
>> AMD-Vi: DTE[1]: 0000000000000014
>> AMD-Vi: DTE[2]: 2000003fa5e09011
>> AMD-Vi: DTE[3]: 0000000000000000
> This example should look like this instead:
>
> AMD-Vi: Event logged [IO_PAGE_FAULT device=51:00.0 domain=0x0000 address=0x0 flags: G NX US I P Wr Rsvd Tr]
>
> by default. When the user passes amd_iommu=verbose on the cmd-line then
> you can print additional information like "type of error" or the dump
> the DTE.
>
>
> Joerg
Joerg,

- I can fit DTE dump all in one line (to reduce the number of lines).
But it is necessary for debugging certain events.
- The "type of error" is also part of the flag translation for certain
event. It will only print if it is necessary.
- I can remove the "Note: ... " per Boris's request.
- I can shorten the flag details even more.

Here are some new examples:
[ 156.177883] AMD-Vi: Event logged [IO_PAGE_FAULT device=51:00.0
domain=0x0000 address=0x0000000000000000]
[ 156.177895] AMD-Vi: Flags: Gst NX Usr Int P W N-Perm Rsrv Trnslt
[ 156.177898] AMD-Vi: Type of error: (0x7)
[ 156.177899] AMD-Vi: DTE[0 .. 3]: 6000007fa764f403 0000000000000014
2000003fabb41811 0000000000000000

[ 197.353900] AMD-Vi: Event logged [PAGE_TAB_HARDWARE_ERROR
device=51:00.0 domain=0x0000 address=0x0000000000000000]
[ 197.353917] AMD-Vi: Flags: Gst NX Usr Int P W N-Perm Rsrv Trnslt

These messages are not "high volumn". What I am trying to achieve is
for users to be able to just send the errors to developers once they are
encountered without having to tell them to go back and reboot with the
"amd_iommu=verbose". In facts, they often difficult to reproduce from my
experience. This should reduce the amount of communications and efforts
required to debug the issue.

If you are ok with this, I will send out V4 in the next email.

Thank you,

Suravee

suravee.su...@amd.com

unread,
Apr 9, 2013, 3:10:03 PM4/9/13
to
From: Suravee Suthikulpanit <suravee.su...@amd.com>

Add logic to decode AMD IOMMU event flag based on information from AMD IOMMU specification.
This should simplify debugging IOMMU errors. Also, dump DTE information in additional cases.

This is an example:
AMD-Vi: Event logged [INVALID_DEVICE_REQUEST device=51:00.0 address=0x0000000000000000]
AMD-Vi: Flags details: Gst NX Usr Int P W N-Perm Rsrv Trnslt
AMD-Vi: Type of error: (0x7) Invalid transaction to the system management address range
AMD-Vi: DTE[0..3]: 600000bfa74e8403 0000000000000014 2000005fa718cc11 0000000000000000

Signed-off-by: Suravee Suthikulpanit <suravee.su...@amd.com>
---
V4:
* Change print out format to reduce noise
* Remove "(Note:...)"
V3:
* Move comments to end of line
* Shorten the print out to be within one line
V2:
* Fix printing format to reduce noise
* Use string table instead of switch/case
* Use pr_cont instead of printk

drivers/iommu/amd_iommu.c | 198 ++++++++++++++++++++++++++++++++++-----------
1 file changed, 151 insertions(+), 47 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b287ca3..1f8e5fd 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -601,13 +601,101 @@ static void amd_iommu_stats_init(void)
+ pr_err("AMD-Vi: Flags details: %s %s %s %s %s %s %s %s %s\n",
+ (p->gn ? "Gst" : "Nst"),
+ (p->nx ? "NX" : "EX"),
+ (p->us ? "Usr" : "Sup"),
+ (p->i ? "Int" : "Mem"),
+ (p->pr ? "P" : "NP"),
+ (p->rw ? "W" : "R"),
+ (p->pe ? "N-Perm" : "Perm"),
+ (p->rz ? "Rsrv" : "Ill"),
+ (p->tr ? "Trnslt" : "Trnsact"));
+
+ pr_err("AMD-Vi: Type of error: (0x%x) ", err_type);
+ if ((ev_type == EVENT_TYPE_DEV_TAB_ERR) ||
+ (ev_type == EVENT_TYPE_PAGE_TAB_ERR) ||
+ (ev_type == EVENT_TYPE_CMD_HARD_ERR)) {
+ if (err_type < ARRAY_SIZE(_type_field_encodings)) {
+ pr_cont("%s\n",
+ _type_field_encodings[err_type]);
+ }
+ } else if (ev_type == EVENT_TYPE_INV_DEV_REQ) {
+ if (p->tr == 0) {
+ if (err_type < ARRAY_SIZE(_invalid_translation_desc))
+ pr_cont("%s\n",
+ _invalid_translation_desc[err_type]);
+ } else {
+ if (err_type < ARRAY_SIZE(_invalid_transaction_desc))
+ pr_cont("%s\n",
+ _invalid_transaction_desc[err_type]);
+ }
+ }
+}
+
static void dump_dte_entry(u16 devid)
{
int i;

+ pr_err("AMD-Vi: DTE[0..3]:");
for (i = 0; i < 4; ++i)
- pr_err("AMD-Vi: DTE[%d]: %016llx\n", i,
- amd_iommu_dev_table[devid].data[i]);
+ pr_cont(" %016llx", amd_iommu_dev_table[devid].data[i]);
+ pr_cont("\n");
}

static void dump_command(unsigned long phys_addr)
@@ -619,82 +707,97 @@ static void dump_command(unsigned long phys_addr)
- "address=0x%016llx flags=0x%04x]\n",
+ pr_cont("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
+ "address=0x%016llx]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
- address, flags);
+ address);
+ dump_flags(flags, type);
dump_dte_entry(devid);
break;
case EVENT_TYPE_IO_FAULT:
- printk("IO_PAGE_FAULT device=%02x:%02x.%x "
- "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ pr_cont("IO_PAGE_FAULT device=%02x:%02x.%x "
+ "domain=0x%04x address=0x%016llx]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
- domid, address, flags);
+ domid, address);
+ dump_flags(flags, type);
+ dump_dte_entry(devid);
break;
case EVENT_TYPE_DEV_TAB_ERR:
- printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
- "address=0x%016llx flags=0x%04x]\n",
+ pr_cont("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+ "address=0x%016llx]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
- address, flags);
+ address);
+ dump_flags(flags, type);
break;
case EVENT_TYPE_PAGE_TAB_ERR:
- printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
- "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ pr_cont("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+ "domain=0x%04x address=0x%016llx]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
- domid, address, flags);
+ domid, address);
+ dump_flags(flags, type);
break;
case EVENT_TYPE_ILL_CMD:
- printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
+ pr_cont("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n",
+ address);
dump_command(address);
break;
case EVENT_TYPE_CMD_HARD_ERR:
- printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
- "flags=0x%04x]\n", address, flags);
+ pr_cont("COMMAND_HARDWARE_ERROR address=0x%016llx]\n",
+ address);
+ dump_flags(flags, type);
break;
case EVENT_TYPE_IOTLB_INV_TO:
- printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
+ pr_cont("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
"address=0x%016llx]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address);
break;
case EVENT_TYPE_INV_DEV_REQ:
- printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
- "address=0x%016llx flags=0x%04x]\n",
+ pr_cont("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
+ "address=0x%016llx]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
- address, flags);
+ address);
+ amd_iommu_print_event(type, devid, domid, flags, address);
+
memset(__evt, 0, 4 * sizeof(u32));
}

@@ -709,7 +812,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);

while (head != tail) {
- iommu_print_event(iommu, iommu->evt_buf + head);
+ iommu_handle_event(iommu, iommu->evt_buf + head);
head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
}

@@ -3268,6 +3371,7 @@ static int __init alloc_passthrough_domain(void)

return 0;
}
+
static int amd_iommu_domain_init(struct iommu_domain *dom)
{
struct protection_domain *domain;
--
1.7.10.4


Joerg Roedel

unread,
Apr 9, 2013, 4:00:02 PM4/9/13
to
On Tue, Apr 09, 2013 at 10:12:13AM -0500, Suthikulpanit, Suravee wrote:
> These messages are not "high volumn". What I am trying to achieve
> is for users to be able to just send the errors to developers once
> they are encountered without having to tell them to go back and
> reboot with the "amd_iommu=verbose". In facts, they often difficult
> to reproduce from my experience. This should reduce the amount of
> communications and efforts required to debug the issue.

More than one line per io-page-fault _is_ high volume. The current code
that prints only one line was sufficient to debug all related AMD IOMMU
driver problems in the past 5 years I am working on this driver.

What makes sense is decoding the flags field in the kernel. Doing this
by hand always costs some time that could be saved this way. If your
patch does more than that by default I am not going to merge it.


Joerg

suravee.su...@amd.com

unread,
Apr 10, 2013, 12:00:02 PM4/10/13
to
From: Suravee Suthikulpanit <suravee.su...@amd.com>

Add logic to decode AMD IOMMU event flag based on information from AMD IOMMU specification.
This should simplify debugging IOMMU errors. Also, dump DTE information in some additional
cases.

Example (default): The flags is now decoded.
AMD-Vi: Event logged [INVALID_DEVICE_REQUEST device=51:00.0 address=0x0000000000000000 flags:G Nx Usr I P W N-Pm Rsv Tl type(0x7)]
AMD-Vi: DTE[0..3]: 600000dfa760b403 0000000000000014 200000dfaba60c11 0000000000000000

Example (amd-iommu=verbose): The flags and error types are decoded
AMD-Vi: Event logged [INVALID_DEVICE_REQUEST device=51:00.0 address=0x0000000000000000 flags:G Nx Usr I P W N-Pm Rsv Tl]
AMD-Vi: Error type details: (0x7) Invalid transaction to the system management address range
AMD-Vi: DTE[0..3]: 600000dfa760b403 0000000000000014 200000dfaba60c11 0000000000000000

Signed-off-by: Suravee Suthikulpanit <suravee.su...@amd.com>
V5:
* Keeping flag decode in 1 line (as requested by Joerg)
* Use "amd-iommu=verbose" for additional printing
V4:
* Change print out format to reduce noise
* Remove "(Note:...)"
V3:
* Move comments to end of line
* Shorten the print out to be within one line
V2:
* Fix printing format to reduce noise
* Use string table instead of switch/case
* Use pr_cont instead of printk

---
drivers/iommu/amd_iommu.c | 215 +++++++++++++++++++++++++++++++++++----------
1 file changed, 168 insertions(+), 47 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b287ca3..beb61dd 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -601,13 +601,118 @@ static void amd_iommu_stats_init(void)
*
****************************************************************************/

+struct _event_log_flags {
+ u32 gn:1, /* 16 */
+ nx:1, /* 17 */
+ us:1, /* 18 */
+ i:1, /* 19 */
+ pr:1, /* 20 */
+ rw:1, /* 21 */
+ pe:1, /* 22 */
+ rz:1, /* 23 */
+ tr:1, /* 24 */
+ type:3, /* [27:25] */
+ _reserved_:20; /* Reserved */
+};
+
+static const char * const _type_field_encodings[] = {
+ "Reserved", /* 00 */
+ "Master Abort", /* 01 */
+ "Target Abort", /* 10 */
+ "Data Error", /* 11 */
+};
+
+static const char * const _invalid_trnsac_desc[] = {
+ "Read request or non-posted write in the interrupt "
+ "addres range", /* 000 */
+ "Pretranslated transaction received from an "
+ "I/O device that has I=0 or V=0 in DTE", /* 001 */
+ "Port I/O space transaction received from an "
+ "I/O device that has IoCtl=00b in DTE", /* 010 */
+ "Posted write to invalid address range", /* 011 */
+ "Invalid read request or non-posted write", /* 100 */
+ "Posted write to the interrupt/EOI range from an "
+ "I/O device that has IntCtl=00b in DTE", /* 101 */
+ "Posted write to a reserved interrupt address range", /* 110 */
+ "Invalid transaction to the system management "
+ "address range", /* 111 */
+};
+
+static const char * const _invalid_trnslt_desc[] = {
+ "Translation request received from an I/O device "
+ "that has I=0, or has V=0, or has V=1 and "
+ "TV=0 in DTE", /* 000 */
+ "Translation request in invalid address range", /* 001 */
+ "Invalid translation request", /* 010 */
+ "Reserved", /* 011 */
+ "Reserved", /* 100 */
+ "Reserved", /* 101 */
+ "Reserved", /* 110 */
+ "Reserved", /* 111 */
+};
+
+static void dump_detail_error(struct _event_log_flags *p, int ev_type)
+{
+ u32 err_type = p->type;
+
+ pr_err("AMD-Vi: Error type details: (0x%x) ", err_type);
+ if ((ev_type == EVENT_TYPE_DEV_TAB_ERR) ||
+ (ev_type == EVENT_TYPE_PAGE_TAB_ERR) ||
+ (ev_type == EVENT_TYPE_CMD_HARD_ERR)) {
+ if (err_type < ARRAY_SIZE(_type_field_encodings)) {
+ pr_cont("%s\n",
+ _type_field_encodings[err_type]);
+ }
+ } else if (ev_type == EVENT_TYPE_INV_DEV_REQ) {
+ if (p->tr == 0) {
+ if (err_type < ARRAY_SIZE(_invalid_trnslt_desc))
+ pr_cont("%s\n",
+ _invalid_trnslt_desc[err_type]);
+ } else {
+ if (err_type < ARRAY_SIZE(_invalid_trnsac_desc))
+ pr_cont("%s\n",
+ _invalid_trnsac_desc[err_type]);
+ }
+ }
+}
+
+static void dump_flags(int flags, int ev_type)
+{
+ struct _event_log_flags *p = (struct _event_log_flags *) &flags;
+ u32 err_type = p->type;
+
+ pr_cont(" flags:%s %s %s %s %s %s %s %s %s",
+ (p->gn ? "G" : "N"),
+ (p->nx ? "Nx" : "Ex"),
+ (p->us ? "Usr" : "Sup"),
+ (p->i ? "I" : "M"),
+ (p->pr ? "P" : "NP"),
+ (p->rw ? "W" : "R"),
+ (p->pe ? "N-Pm" : "Pm"),
+ (p->rz ? "Rsv" : "Ill"),
+ (p->tr ? "Tl" : "Ta"));
+
+ /* Error type only needed for certain events */
+ if (!amd_iommu_verbose) {
+ if ((ev_type == EVENT_TYPE_DEV_TAB_ERR) ||
+ (ev_type == EVENT_TYPE_PAGE_TAB_ERR) ||
+ (ev_type == EVENT_TYPE_CMD_HARD_ERR) ||
+ (ev_type == EVENT_TYPE_INV_DEV_REQ))
+ pr_cont(" type(0x%x)]\n", err_type);
+ } else {
+ pr_cont("]\n");
+ dump_detail_error(p, ev_type);
+ }
+}
+
static void dump_dte_entry(u16 devid)
{
int i;

+ pr_err("AMD-Vi: DTE[0..3]:");
for (i = 0; i < 4; ++i)
- pr_err("AMD-Vi: DTE[%d]: %016llx\n", i,
- amd_iommu_dev_table[devid].data[i]);
+ pr_cont(" %016llx", amd_iommu_dev_table[devid].data[i]);
+ pr_cont("\n");
}

static void dump_command(unsigned long phys_addr)
@@ -619,81 +724,96 @@ static void dump_command(unsigned long phys_addr)
+ "address=0x%016llx",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
- address, flags);
+ address);
+ dump_flags(flags, type);
dump_dte_entry(devid);
break;
case EVENT_TYPE_IO_FAULT:
- printk("IO_PAGE_FAULT device=%02x:%02x.%x "
- "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ pr_cont("IO_PAGE_FAULT device=%02x:%02x.%x "
+ "domain=0x%04x address=0x%016llx",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
- domid, address, flags);
+ domid, address);
+ dump_flags(flags, type);
+ dump_dte_entry(devid);
break;
case EVENT_TYPE_DEV_TAB_ERR:
- printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
- "address=0x%016llx flags=0x%04x]\n",
+ pr_cont("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+ "address=0x%016llx",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
- address, flags);
+ address);
+ dump_flags(flags, type);
break;
case EVENT_TYPE_PAGE_TAB_ERR:
- printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
- "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ pr_cont("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+ "domain=0x%04x address=0x%016llx",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
- domid, address, flags);
+ domid, address);
+ dump_flags(flags, type);
break;
case EVENT_TYPE_ILL_CMD:
- printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
+ pr_cont("ILLEGAL_COMMAND_ERROR address=0x%016llx\n",
+ address);
dump_command(address);
break;
case EVENT_TYPE_CMD_HARD_ERR:
- printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
- "flags=0x%04x]\n", address, flags);
+ pr_cont("COMMAND_HARDWARE_ERROR address=0x%016llx",
+ address);
+ dump_flags(flags, type);
break;
case EVENT_TYPE_IOTLB_INV_TO:
- printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
+ pr_cont("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
"address=0x%016llx]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address);
break;
case EVENT_TYPE_INV_DEV_REQ:
- printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
- "address=0x%016llx flags=0x%04x]\n",
+ pr_cont("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
+ "address=0x%016llx",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
- address, flags);
+ address);
+ dump_flags(flags, type);
+ dump_dte_entry(devid);
break;
default:
- printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
+ pr_cont("UNKNOWN type=0x%02x\n", type);
}
+}
+
+static void iommu_handle_event(struct amd_iommu *iommu, void *__evt)
+{
+ int type, devid, domid, flags;
+ u32 *event = __evt;
+ int count = 0;
+ u64 address;
+
+retry:
+ type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
+ devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
+ domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
+ flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
+ address = (u64)(((u64)event[3]) << 32) | event[2];
+
+ if (type == 0) {
+ /* Did we hit the erratum? */
+ if (++count == LOOP_TIMEOUT) {
+ pr_err("AMD-Vi: No event written to event log\n");
+ return;
+ }
+ udelay(1);
+ goto retry;
+ }
+
+ amd_iommu_print_event(type, devid, domid, flags, address);

memset(__evt, 0, 4 * sizeof(u32));
}
@@ -709,7 +829,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);

while (head != tail) {
- iommu_print_event(iommu, iommu->evt_buf + head);
+ iommu_handle_event(iommu, iommu->evt_buf + head);
head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
}

@@ -3268,6 +3388,7 @@ static int __init alloc_passthrough_domain(void)

return 0;
}
+
static int amd_iommu_domain_init(struct iommu_domain *dom)
{
struct protection_domain *domain;
--
1.7.10.4


Shuah Khan

unread,
Apr 10, 2013, 12:30:02 PM4/10/13
to
Good feature. Do you also plan to add decode logic for these flags.
For example, RZ is only meaningful when PR=1, RW is only meaningful
when
PR=1, TR=0, and I=0, and so on? This additional logic will be useful.

Reviewed-by: Shuah Khan <shua...@gmail.com>

-- Shuah

Shuah Khan

unread,
Apr 10, 2013, 12:30:02 PM4/10/13
to
On Wed, Apr 10, 2013 at 10:27 AM, Suravee Suthikulanit
<suravee.su...@amd.com> wrote:
> On 4/10/2013 11:21 AM, Shuah Khan wrote:
>>
>> Good feature. Do you also plan to add decode logic for these flags.
>> For example, RZ is only meaningful when PR=1, RW is only meaningful
>> when
>> PR=1, TR=0, and I=0, and so on? This additional logic will be useful.
>>
>> Reviewed-by: Shuah Khan<shua...@gmail.com>
>>
>> -- Shuah
>
> Additional filtering logic can also be added in the future. This will also
> be important if we are planning on handling IOMMU errors.
>

Correct. Additional logic isn't necessary in this patch.

-- Shuah

Suravee Suthikulanit

unread,
Apr 10, 2013, 12:50:02 PM4/10/13
to
On 4/10/2013 11:21 AM, Shuah Khan wrote:
> Good feature. Do you also plan to add decode logic for these flags.
> For example, RZ is only meaningful when PR=1, RW is only meaningful
> when
> PR=1, TR=0, and I=0, and so on? This additional logic will be useful.
>
> Reviewed-by: Shuah Khan<shua...@gmail.com>
>
> -- Shuah
Additional filtering logic can also be added in the future. This will
also be important if we are planning on handling IOMMU errors.

Suravee

Suravee Suthikulanit

unread,
Apr 18, 2013, 12:40:02 PM4/18/13
to
Joerg,

Do you have any more concerns about this patch?

Thank you,

Suravee
0 new messages