- Thread 1 loops through and calls irq_to_desc_alloc_node with new=0x66.
- Thread 2 has exited the loop with irq=0x66 and calls dynamic_irq_init(0x66)
setting desc->chip_data = NULL
- Thread 1 then dereferences NULL via desc_new->chip_data->vector
Fix by moving holding vector_lock until after the dynamic_irq_init().
BUG: unable to handle kernel NULL pointer dereference at 0000000000000088
IP: [<ffffffff8101df32>] create_irq_nr+0x62/0x100
PGD 23dc24067 PUD 23dc72067 PMD 0
Oops: 0000 [#1] SMP
last sysfs file: /sys/devices/pci0000:00/0000:00:1c.0/0000:08:00.0/net/eth2/type
CPU 12
Modules linked in: i2c_i801 igb(+) iTCO_wdt ixgbe(+) ioatdma(+) e1000e mptctl mdio usb_storage iTCO_vendor_support dca ses button sg pcspkr enclosure container ac usbhid uhci_hcd ehci_hcd usbcore sd_mod edd fan processor ide_pci_generic ide_core ata_generic ata_piix libata lpfc scsi_transport_fc scsi_tgt mptsas mptscsih mptbase scsi_transport_sas megaraid_sas scsi_mod thermal thermal_sys
Supported: Yes
Pid: 1684, comm: modprobe Not tainted 2.6.32.3-0.3-default #1 PRIMERGY RX300 S5
RIP: 0010:[<ffffffff8101df32>] [<ffffffff8101df32>] create_irq_nr+0x62/0x100
RSP: 0018:ffff88013ce0fc18 EFLAGS: 00010086
RAX: ffff88023e11ee00 RBX: 0000000000000066 RCX: 00000000000000c2
RDX: 00000000000000c2 RSI: 00000000ffffffff RDI: 0000000000000066
RBP: 0000000000000000 R08: ffffffff81767a85 R09: 000000000000000a
R10: 0000000000000000 R11: 0000000000000000 R12: 00000000ffffffff
R13: 0000000000000206 R14: ffff88013f381000 R15: 0000000000000080
FS: 00007f16d181e700(0000) GS:ffff880143d00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000088 CR3: 000000023d26c000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process modprobe (pid: 1684, threadinfo ffff88013ce0e000, task ffff88013d080340)
Stack:
0000000000000001 0000000000000000 ffff88023d2d8740 0000000000000064
<0> 0000000000000007 ffffffff8101f2ce 0000000900000009 ffff88013f381810
<0> ffffffff3f381000 0000000000000048 0000000000000009 ffff88013f381000
Call Trace:
[<ffffffff8101f2ce>] arch_setup_msi_irqs+0xce/0x190
[<ffffffff812574b9>] msix_capability_init+0x189/0x2f0
[<ffffffffa032b4a4>] igb_set_interrupt_capability+0xe4/0x1e0 [igb]
[<ffffffffa033634e>] igb_probe+0x3de/0xd15 [igb]
[<ffffffff8124d212>] local_pci_probe+0x12/0x20
[<ffffffff8124d4c0>] __pci_device_probe+0xe0/0xf0
[<ffffffff8124e3d3>] pci_device_probe+0x33/0x60
[<ffffffff812e72f7>] really_probe+0x77/0x230
[<ffffffff812e751a>] driver_probe_device+0x6a/0xc0
[<ffffffff812e7603>] __driver_attach+0x93/0xa0
[<ffffffff812e6928>] bus_for_each_dev+0x58/0x80
[<ffffffff812e6115>] bus_add_driver+0x195/0x2f0
[<ffffffff812e7919>] driver_register+0x79/0x170
[<ffffffff8124e648>] __pci_register_driver+0x58/0xe0
[<ffffffff810001e5>] do_one_initcall+0x35/0x190
[<ffffffff8108af34>] sys_init_module+0xe4/0x270
[<ffffffff81002f7b>] system_call_fastpath+0x16/0x1b
[<00007f16d13b234a>] 0x7f16d13b234a
Code: 2e 0f 1f 84 00 00 00 00 00 83 c3 01 39 1d e7 e2 9f 00 76 7d 44 89 e6 89 df e8 2b 2a 3d 00 48 85 c0 0f 84 8a 00 00 00 48 8b 68 40 <80> bd 88 00 00 00 00 75 d5 44 89 e6 48 89 c7 e8 6a 5c 09 00 49
RIP [<ffffffff8101df32>] create_irq_nr+0x62/0x100
RSP <ffff88013ce0fc18>
CR2: 0000000000000088
Signed-off-by: Brandon Philips <bphi...@suse.de>
---
arch/x86/kernel/apic/io_apic.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
Index: linux-2.6.32-SLE11-SP1/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux-2.6.32-SLE11-SP1.orig/arch/x86/kernel/apic/io_apic.c
+++ linux-2.6.32-SLE11-SP1/arch/x86/kernel/apic/io_apic.c
@@ -3212,7 +3212,6 @@ unsigned int create_irq_nr(unsigned int
irq = new;
break;
}
- spin_unlock_irqrestore(&vector_lock, flags);
if (irq > 0) {
dynamic_irq_init(irq);
@@ -3220,6 +3219,8 @@ unsigned int create_irq_nr(unsigned int
if (desc_new)
desc_new->chip_data = cfg_new;
}
+ spin_unlock_irqrestore(&vector_lock, flags);
+
return irq;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
two threads get same irq?
YH
can you check if
fix your problem with 2.6.32?
From 37ef2a3029fde884808ff1b369677abc7dd9a79a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yin...@kernel.org>
Date: Sat, 21 Nov 2009 00:23:37 -0800
Subject: [PATCH] x86: Re-get cfg_new in case reuse/move irq_desc
When irq_desc is moved, we need to make sure to use the right cfg_new.
Signed-off-by: Yinghai Lu <yin...@kernel.org>
LKML-Reference: <4B07A739...@kernel.org>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
arch/x86/kernel/apic/io_apic.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ff23719..085e60e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3186,6 +3186,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
continue;
desc_new = move_irq_desc(desc_new, node);
+ cfg_new = desc_new->chip_data;
if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
irq = new;
--
1.6.6.1
This race happened when two drivers were setting up MSI-X at the same
time via pci_enable_msix(). See this dmesg excerpt:
[ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X
[ 85.170611] alloc irq_desc for 99 on node -1
[ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X
[ 85.170614] alloc kstat_irqs on node -1
[ 85.170616] alloc irq_2_iommu on node -1
[ 85.170617] alloc irq_desc for 100 on node -1
[ 85.170619] alloc kstat_irqs on node -1
[ 85.170621] alloc irq_2_iommu on node -1
[ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X
[ 85.170626] alloc irq_desc for 101 on node -1
[ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X
[ 85.170630] alloc kstat_irqs on node -1
[ 85.170631] alloc irq_2_iommu on node -1
[ 85.170635] alloc irq_desc for 102 on node -1
[ 85.170636] alloc kstat_irqs on node -1
[ 85.170639] alloc irq_2_iommu on node -1
[ 85.170646] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000088
As you can see igb and ixgbe are both alternating on create_irq_nr()
via pci_enable_msix() in their probe function. So, let me rewrite my
explanation using this example:
ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe
choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and
calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data =
NULL via dynamic_irq_init().
igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[]
via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this:
cfg_new = irq_desc_ptrs[102]->chip_data;
if (cfg_new->vector != 0)
continue;
This hits the NULL deref.
Does that make sense? It is sort of a rare thing to reproduce- took
40+ reboots.
Thanks,
Brandon
please try following patch in addition to
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7edafc7..14099ba 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3280,12 +3280,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
}
spin_unlock_irqrestore(&vector_lock, flags);
- if (irq > 0) {
- dynamic_irq_init(irq);
- /* restore it, in case dynamic_irq_init clear it */
- if (desc_new)
- desc_new->chip_data = cfg_new;
- }
+ if (irq > 0)
+ dynamic_irq_init_keep_chip_data(irq);
+
return irq;
}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index d13492d..cd6b870 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -400,6 +400,7 @@ static inline int irq_has_action(unsigned int irq)
/* Dynamic irq helper functions */
extern void dynamic_irq_init(unsigned int irq);
+void dynamic_irq_init_keep_chip_data(unsigned int irq);
extern void dynamic_irq_cleanup(unsigned int irq);
/* Set/get chip/data for an IRQ: */
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index ecc3fa2..370dbc4 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -22,7 +22,7 @@
* dynamic_irq_init - initialize a dynamically allocated irq
* @irq: irq number to initialize
*/
-void dynamic_irq_init(unsigned int irq)
+static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data)
{
struct irq_desc *desc;
unsigned long flags;
@@ -41,7 +41,8 @@ void dynamic_irq_init(unsigned int irq)
desc->depth = 1;
desc->msi_desc = NULL;
desc->handler_data = NULL;
- desc->chip_data = NULL;
+ if (!keep_chip_data)
+ desc->chip_data = NULL;
desc->action = NULL;
desc->irq_count = 0;
desc->irqs_unhandled = 0;
@@ -54,6 +55,16 @@ void dynamic_irq_init(unsigned int irq)
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
+void dynamic_irq_init(unsigned int irq)
+{
+ dynamic_irq_init_x(irq, false);
+}
+
+void dynamic_irq_init_keep_chip_data(unsigned int irq)
+{
+ dynamic_irq_init_x(irq, true);
+}
+
/**
* dynamic_irq_cleanup - cleanup a dynamically allocated irq
* @irq: irq number to initialize
How is this commit related to this bug? The NULL deref I am hitting is
from this bit in create_irq_nr():
if (cfg_new->vector != 0)
continue;
Which comes before the assignment of cfg_new. I don't see how it is
related. Plus, node == -1 in this case so move_irq_desc() is a no-op.
> diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
> index 7edafc7..14099ba 100644
> --- a/arch/x86/kernel/apic/io_apic.c
> +++ b/arch/x86/kernel/apic/io_apic.c
> @@ -3280,12 +3280,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
> }
> spin_unlock_irqrestore(&vector_lock, flags);
>
> - if (irq > 0) {
> - dynamic_irq_init(irq);
> - /* restore it, in case dynamic_irq_init clear it */
> - if (desc_new)
> - desc_new->chip_data = cfg_new;
> - }
> + if (irq > 0)
> + dynamic_irq_init_keep_chip_data(irq);
> +
> return irq;
> }
That would solve it too but I don't think it is a great
solution. Keeping the vector_lock until we are completely done setting
up the irq is more straightforward and won't cost much time at all.
I am hesitant to have it tested since it is a really small race
window, reproducing took 40+ reboots initially and looks technically
correct.
Thanks,
Brandon
Brodon found:
race happened when two drivers were setting up MSI-X at the same
time via pci_enable_msix(). See this dmesg excerpt:
[ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X
[ 85.170611] alloc irq_desc for 99 on node -1
[ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X
[ 85.170614] alloc kstat_irqs on node -1
[ 85.170616] alloc irq_2_iommu on node -1
[ 85.170617] alloc irq_desc for 100 on node -1
[ 85.170619] alloc kstat_irqs on node -1
[ 85.170621] alloc irq_2_iommu on node -1
[ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X
[ 85.170626] alloc irq_desc for 101 on node -1
[ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X
[ 85.170630] alloc kstat_irqs on node -1
[ 85.170631] alloc irq_2_iommu on node -1
[ 85.170635] alloc irq_desc for 102 on node -1
[ 85.170636] alloc kstat_irqs on node -1
[ 85.170639] alloc irq_2_iommu on node -1
[ 85.170646] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000088
As you can see igb and ixgbe are both alternating on create_irq_nr()
via pci_enable_msix() in their probe function.
ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe
choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and
calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data =
NULL via dynamic_irq_init().
igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[]
via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this:
cfg_new = irq_desc_ptrs[102]->chip_data;
if (cfg_new->vector != 0)
continue;
This hits the NULL deref.
so let remove the save and restore code.
just don't clear it in that path
Reported-and-analyzed-by: Brandon Philips <bphi...@suse.de>
Signed-off-by: Yinghai Lu <yin...@kernel.org>
Cc: sta...@kernel.org
---
arch/x86/kernel/apic/io_apic.c | 9 +++------
include/linux/irq.h | 1 +
kernel/irq/chip.c | 15 +++++++++++++--
3 files changed, 17 insertions(+), 8 deletions(-)
Index: linux-2.6/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apic/io_apic.c
+++ linux-2.6/arch/x86/kernel/apic/io_apic.c
@@ -3280,12 +3280,9 @@ unsigned int create_irq_nr(unsigned int
}
spin_unlock_irqrestore(&vector_lock, flags);
- if (irq > 0) {
- dynamic_irq_init(irq);
- /* restore it, in case dynamic_irq_init clear it */
- if (desc_new)
- desc_new->chip_data = cfg_new;
- }
+ if (irq > 0)
+ dynamic_irq_init_keep_chip_data(irq);
+
return irq;
}
Index: linux-2.6/include/linux/irq.h
===================================================================
--- linux-2.6.orig/include/linux/irq.h
+++ linux-2.6/include/linux/irq.h
@@ -400,6 +400,7 @@ static inline int irq_has_action(unsigne
/* Dynamic irq helper functions */
extern void dynamic_irq_init(unsigned int irq);
+void dynamic_irq_init_keep_chip_data(unsigned int irq);
extern void dynamic_irq_cleanup(unsigned int irq);
/* Set/get chip/data for an IRQ: */
Index: linux-2.6/kernel/irq/chip.c
===================================================================
--- linux-2.6.orig/kernel/irq/chip.c
+++ linux-2.6/kernel/irq/chip.c
Nearly every function in kernel/irq/chip.c takes the desc->lock when
manipulating the fields of the irq_desc including chip_data. Should
create_irq_nr() do the same when getting the chip_data field?
I am just a bit confused on what protects the chip_data field now.
Actually, while looking at your patch there is a related race in
destroy_irq() that I just noticed. This race could happen via
pci_disable_msix() in a driver or in the number of error paths that
call free_msi_irqs():
destroy_irq()
dynamic_irq_cleanup() which sets desc->chip_data = NULL
...race window...
desc->chip_data = cfg;
It could race with create_irq_nr() in the same way in the irq destroy
path.
So, I will reply after this with a combined patch fixing this
potential race along with the minor things below.
Cheers,
Brandon
>
> Index: linux-2.6/include/linux/irq.h
> ===================================================================
> --- linux-2.6.orig/include/linux/irq.h
> +++ linux-2.6/include/linux/irq.h
> @@ -400,6 +400,7 @@ static inline int irq_has_action(unsigne
>
> /* Dynamic irq helper functions */
> extern void dynamic_irq_init(unsigned int irq);
> +void dynamic_irq_init_keep_chip_data(unsigned int irq);
> extern void dynamic_irq_cleanup(unsigned int irq);
Missing extern?
> /* Set/get chip/data for an IRQ: */
> Index: linux-2.6/kernel/irq/chip.c
> ===================================================================
> --- linux-2.6.orig/kernel/irq/chip.c
> +++ linux-2.6/kernel/irq/chip.c
> @@ -22,7 +22,7 @@
> * dynamic_irq_init - initialize a dynamically allocated irq
> * @irq: irq number to initialize
Update kerndoc?
Another possible race exists via pci_disable_msix() in a driver or in
the number of error paths that call free_msi_irqs():
destroy_irq()
dynamic_irq_cleanup() which sets desc->chip_data = NULL
...race window...
desc->chip_data = cfg;
Remove the save and restore code for cfg in create_irq_nr() and
destroy_irq() and take the desc->lock when checking the irq_cfg.
Reported-and-analyzed-by: Brandon Philips <bphi...@suse.de>
Signed-off-by: Yinghai Lu <yin...@kernel.org>
Signed-off-by: Brandon Phiilps <bphi...@suse.de>
Cc: sta...@kernel.org
---
arch/x86/kernel/apic/io_apic.c | 14 +++--------
include/linux/irq.h | 2 +
kernel/irq/chip.c | 52 +++++++++++++++++++++++++++++++++--------
3 files changed, 49 insertions(+), 19 deletions(-)
Index: linux-2.6/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apic/io_apic.c
+++ linux-2.6/arch/x86/kernel/apic/io_apic.c
@@ -3228,12 +3228,9 @@ unsigned int create_irq_nr(unsigned int
}
spin_unlock_irqrestore(&vector_lock, flags);
- if (irq > 0) {
- dynamic_irq_init(irq);
- /* restore it, in case dynamic_irq_init clear it */
- if (desc_new)
- desc_new->chip_data = cfg_new;
- }
+ if (irq > 0)
+ dynamic_irq_init_keep_chip_data(irq);
+
return irq;
}
@@ -3260,10 +3257,7 @@ void destroy_irq(unsigned int irq)
/* store it, in case dynamic_irq_cleanup clear it */
desc = irq_to_desc(irq);
- cfg = desc->chip_data;
- dynamic_irq_cleanup(irq);
- /* connect back irq_cfg */
- desc->chip_data = cfg;
+ dynamic_irq_cleanup_keep_chip_data(irq);
free_irte(irq);
spin_lock_irqsave(&vector_lock, flags);
Index: linux-2.6/include/linux/irq.h
===================================================================
--- linux-2.6.orig/include/linux/irq.h
+++ linux-2.6/include/linux/irq.h
@@ -400,7 +400,9 @@ static inline int irq_has_action(unsigne
/* Dynamic irq helper functions */
extern void dynamic_irq_init(unsigned int irq);
+extern void dynamic_irq_init_keep_chip_data(unsigned int irq);
extern void dynamic_irq_cleanup(unsigned int irq);
+extern void dynamic_irq_cleanup_keep_chip_data(unsigned int irq);
/* Set/get chip/data for an IRQ: */
extern int set_irq_chip(unsigned int irq, struct irq_chip *chip);
Index: linux-2.6/kernel/irq/chip.c
===================================================================
--- linux-2.6.orig/kernel/irq/chip.c
+++ linux-2.6/kernel/irq/chip.c
@@ -18,11 +18,7 @@
#include "internals.h"
-/**
- * dynamic_irq_init - initialize a dynamically allocated irq
- * @irq: irq number to initialize
- */
-void dynamic_irq_init(unsigned int irq)
+static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data)
{
struct irq_desc *desc;
unsigned long flags;
@@ -41,7 +37,8 @@ void dynamic_irq_init(unsigned int irq)
desc->depth = 1;
desc->msi_desc = NULL;
desc->handler_data = NULL;
- desc->chip_data = NULL;
+ if (!keep_chip_data)
+ desc->chip_data = NULL;
desc->action = NULL;
desc->irq_count = 0;
desc->irqs_unhandled = 0;
@@ -55,10 +52,26 @@ void dynamic_irq_init(unsigned int irq)
}
/**
- * dynamic_irq_cleanup - cleanup a dynamically allocated irq
+ * dynamic_irq_init - initialize a dynamically allocated irq
* @irq: irq number to initialize
*/
-void dynamic_irq_cleanup(unsigned int irq)
+void dynamic_irq_init(unsigned int irq)
+{
+ dynamic_irq_init_x(irq, false);
+}
+
+/**
+ * dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq
+ * @irq: irq number to initialize
+ *
+ * does not set irq_to_desc(irq)->chip_data to NULL
+ */
+void dynamic_irq_init_keep_chip_data(unsigned int irq)
+{
+ dynamic_irq_init_x(irq, true);
+}
+
+static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
@@ -77,7 +90,8 @@ void dynamic_irq_cleanup(unsigned int ir
}
desc->msi_desc = NULL;
desc->handler_data = NULL;
- desc->chip_data = NULL;
+ if (!keep_chip_data)
+ desc->chip_data = NULL;
desc->handle_irq = handle_bad_irq;
desc->chip = &no_irq_chip;
desc->name = NULL;
@@ -85,6 +99,26 @@ void dynamic_irq_cleanup(unsigned int ir
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
+/**
+ * dynamic_irq_cleanup - cleanup a dynamically allocated irq
+ * @irq: irq number to initialize
+ */
+void dynamic_irq_cleanup(unsigned int irq)
+{
+ dynamic_irq_init_x(irq, false);
+}
+
+/**
+ * dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq
+ * @irq: irq number to initialize
+ *
+ * does not set irq_to_desc(irq)->chip_data to NULL
+ */
+void dynamic_irq_cleanup_keep_chip_data(unsigned int irq)
+{
+ dynamic_irq_init_x(irq, true);
+}
+
/**
* set_irq_chip - set the irq chip for an irq
Quite the opposite -- function prototypes don't need "extern", and
putting "extern" on them is generally considered bad style.
-hpa
should be dynamic_irq_cleanup_x here.
> +}
> +
> +/**
> + * dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq
> + * @irq: irq number to initialize
> + *
> + * does not set irq_to_desc(irq)->chip_data to NULL
> + */
> +void dynamic_irq_cleanup_keep_chip_data(unsigned int irq)
> +{
> + dynamic_irq_init_x(irq, true);
should be dynamic_irq_cleanup_x
> +}
> +
>
> /**
> * set_irq_chip - set the irq chip for an irq
YH
Oops, right. I will fix this up along with the externs as hpa
suggested and send again.
What are your thoughts on locking? Does it look OK as is?
Thanks,
Brandon
ok to me.
YH
destroy_irq() to avoid the NULL.
+void dynamic_irq_init_keep_chip_data(unsigned int irq);
extern void dynamic_irq_cleanup(unsigned int irq);
+void dynamic_irq_cleanup_keep_chip_data(unsigned int irq);
/* Set/get chip/data for an IRQ: */
extern int set_irq_chip(unsigned int irq, struct irq_chip *chip);
Index: linux-2.6/kernel/irq/chip.c
===================================================================
--- linux-2.6.orig/kernel/irq/chip.c
+++ linux-2.6/kernel/irq/chip.c
@@ -18,11 +18,7 @@
#include "internals.h"
-/**
- * dynamic_irq_init - initialize a dynamically allocated irq
- * @irq: irq number to initialize
- */
-void dynamic_irq_init(unsigned int irq)
+static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data)
{
struct irq_desc *desc;
unsigned long flags;
@@ -41,7 +37,8 @@ void dynamic_irq_init(unsigned int irq)
desc->depth = 1;
desc->msi_desc = NULL;
desc->handler_data = NULL;
- desc->chip_data = NULL;
+ if (!keep_chip_data)
+ desc->chip_data = NULL;
desc->action = NULL;
desc->irq_count = 0;
desc->irqs_unhandled = 0;
@@ -55,10 +52,26 @@ void dynamic_irq_init(unsigned int irq)
}
/**
- * dynamic_irq_cleanup - cleanup a dynamically allocated irq
+ * dynamic_irq_init - initialize a dynamically allocated irq
* @irq: irq number to initialize
*/
-void dynamic_irq_cleanup(unsigned int irq)
+void dynamic_irq_init(unsigned int irq)
+{
+ dynamic_irq_init_x(irq, false);
+}
+
+/**
+ * dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq
+ * @irq: irq number to initialize
+ *
+ * does not set irq_to_desc(irq)->chip_data to NULL
+ */
+void dynamic_irq_init_keep_chip_data(unsigned int irq)
+{
+ dynamic_irq_init_x(irq, true);
+}
+
+static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
@@ -77,7 +90,8 @@ void dynamic_irq_cleanup(unsigned int ir
}
desc->msi_desc = NULL;
desc->handler_data = NULL;
- desc->chip_data = NULL;
+ if (!keep_chip_data)
+ desc->chip_data = NULL;
desc->handle_irq = handle_bad_irq;
desc->chip = &no_irq_chip;
desc->name = NULL;
@@ -85,6 +99,26 @@ void dynamic_irq_cleanup(unsigned int ir
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
+/**
+ * dynamic_irq_cleanup - cleanup a dynamically allocated irq
+ * @irq: irq number to initialize
+ */
+void dynamic_irq_cleanup(unsigned int irq)
+{
+ dynamic_irq_cleanup_x(irq, false);
+}
+
+/**
+ * dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq
+ * @irq: irq number to initialize
+ *
+ * does not set irq_to_desc(irq)->chip_data to NULL
+ */
+void dynamic_irq_cleanup_keep_chip_data(unsigned int irq)
+{
+ dynamic_irq_cleanup_x(irq, true);
+}
+
/**
* set_irq_chip - set the irq chip for an irq
destroy_irq() and take the desc->lock when checking the irq_cfg.
Reported-and-analyzed-by: Brandon Philips <bphi...@suse.de>
Signed-off-by: Yinghai Lu <yin...@kernel.org>
Signed-off-by: Brandon Phiilps <bphi...@suse.de>
Cc: sta...@kernel.org
---
arch/x86/kernel/apic/io_apic.c | 17 +++----------
include/linux/irq.h | 2 +
kernel/irq/chip.c | 52 +++++++++++++++++++++++++++++++++--------
3 files changed, 50 insertions(+), 21 deletions(-)
Index: linux-2.6/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apic/io_apic.c
+++ linux-2.6/arch/x86/kernel/apic/io_apic.c
@@ -3228,12 +3228,9 @@ unsigned int create_irq_nr(unsigned int
}
spin_unlock_irqrestore(&vector_lock, flags);
- if (irq > 0) {
- dynamic_irq_init(irq);
- /* restore it, in case dynamic_irq_init clear it */
- if (desc_new)
- desc_new->chip_data = cfg_new;
- }
+ if (irq > 0)
+ dynamic_irq_init_keep_chip_data(irq);
+
return irq;
}
@@ -3255,19 +3252,15 @@ int create_irq(void)
void destroy_irq(unsigned int irq)
{
unsigned long flags;
- struct irq_cfg *cfg;
struct irq_desc *desc;
/* store it, in case dynamic_irq_cleanup clear it */
desc = irq_to_desc(irq);
- cfg = desc->chip_data;
- dynamic_irq_cleanup(irq);
- /* connect back irq_cfg */
- desc->chip_data = cfg;
+ dynamic_irq_cleanup_keep_chip_data(irq);
free_irte(irq);
spin_lock_irqsave(&vector_lock, flags);
- __clear_irq_vector(irq, cfg);
+ __clear_irq_vector(irq, desc->chip_data);
spin_unlock_irqrestore(&vector_lock, flags);
==>
@@ -3308,17 +3305,12 @@ void destroy_irq(unsigned int irq)
{
unsigned long flags;
struct irq_cfg *cfg;
- struct irq_desc *desc;
- /* store it, in case dynamic_irq_cleanup clear it */
- desc = irq_to_desc(irq);
- cfg = desc->chip_data;
- dynamic_irq_cleanup(irq);
- /* connect back irq_cfg */
- desc->chip_data = cfg;
+ dynamic_irq_cleanup_keep_chip_data(irq);
free_irte(irq);
spin_lock_irqsave(&vector_lock, flags);
+ cfg = irq_to_desc(irq)->chip_data;
__clear_irq_vector(irq, cfg);
spin_unlock_irqrestore(&vector_lock, flags);
}
Yinghai
Or even two lines better!
void destroy_irq(unsigned int irq)
{
unsigned long flags;
- struct irq_cfg *cfg;
- struct irq_desc *desc;
- /* store it, in case dynamic_irq_cleanup clear it */
- desc = irq_to_desc(irq);
- cfg = desc->chip_data;
- dynamic_irq_cleanup(irq);
- /* connect back irq_cfg */
- desc->chip_data = cfg;
+ dynamic_irq_cleanup_keep_chip_data(irq);
free_irte(irq);
spin_lock_irqsave(&vector_lock, flags);
- __clear_irq_vector(irq, cfg);
+ __clear_irq_vector(irq, get_irq_chip_data(irq));
spin_unlock_irqrestore(&vector_lock, flags);
}
Sigh... I guess I will send the patch again :D
Cheers,
Brandon
---
arch/x86/kernel/apic/io_apic.c | 20 +++------------
include/linux/irq.h | 2 +
kernel/irq/chip.c | 52 +++++++++++++++++++++++++++++++++--------
3 files changed, 50 insertions(+), 24 deletions(-)
Index: linux-2.6/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apic/io_apic.c
+++ linux-2.6/arch/x86/kernel/apic/io_apic.c
@@ -3228,12 +3228,9 @@ unsigned int create_irq_nr(unsigned int
}
spin_unlock_irqrestore(&vector_lock, flags);
- if (irq > 0) {
- dynamic_irq_init(irq);
- /* restore it, in case dynamic_irq_init clear it */
- if (desc_new)
- desc_new->chip_data = cfg_new;
- }
+ if (irq > 0)
+ dynamic_irq_init_keep_chip_data(irq);
+
return irq;
}
@@ -3255,19 +3252,12 @@ int create_irq(void)
void destroy_irq(unsigned int irq)
{
unsigned long flags;
- struct irq_cfg *cfg;
- struct irq_desc *desc;
- /* store it, in case dynamic_irq_cleanup clear it */
- desc = irq_to_desc(irq);
- cfg = desc->chip_data;
- dynamic_irq_cleanup(irq);
- /* connect back irq_cfg */
- desc->chip_data = cfg;
+ dynamic_irq_cleanup_keep_chip_data(irq);
free_irte(irq);
spin_lock_irqsave(&vector_lock, flags);
- __clear_irq_vector(irq, cfg);
+ __clear_irq_vector(irq, get_irq_chip_data(irq));
x86, irq: Keep chip_data in create_irq_nr and destroy_irq
LKML-Reference: <2010020721...@jenkins.home.ifup.org>
Signed-off-by: Brandon Phiilps <bphi...@suse.de>
Cc: sta...@kernel.org
Signed-off-by: H. Peter Anvin <h...@zytor.com>
---
arch/x86/kernel/apic/io_apic.c | 20 ++++-----------
include/linux/irq.h | 2 +
kernel/irq/chip.c | 52 +++++++++++++++++++++++++++++++++-------
3 files changed, 50 insertions(+), 24 deletions(-)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5e4cce2..e93a76b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3278,12 +3278,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
}
spin_unlock_irqrestore(&vector_lock, flags);
- if (irq > 0) {
- dynamic_irq_init(irq);
- /* restore it, in case dynamic_irq_init clear it */
- if (desc_new)
- desc_new->chip_data = cfg_new;
- }
+ if (irq > 0)
+ dynamic_irq_init_keep_chip_data(irq);
+
return irq;
}
@@ -3305,19 +3302,12 @@ int create_irq(void)
void destroy_irq(unsigned int irq)
{
unsigned long flags;
- struct irq_cfg *cfg;
- struct irq_desc *desc;
- /* store it, in case dynamic_irq_cleanup clear it */
- desc = irq_to_desc(irq);
- cfg = desc->chip_data;
- dynamic_irq_cleanup(irq);
- /* connect back irq_cfg */
- desc->chip_data = cfg;
+ dynamic_irq_cleanup_keep_chip_data(irq);
free_irte(irq);
spin_lock_irqsave(&vector_lock, flags);
- __clear_irq_vector(irq, cfg);
+ __clear_irq_vector(irq, get_irq_chip_data(irq));
spin_unlock_irqrestore(&vector_lock, flags);
}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 451481c..4d9b26e 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -400,7 +400,9 @@ static inline int irq_has_action(unsigned int irq)
/* Dynamic irq helper functions */
extern void dynamic_irq_init(unsigned int irq);
+void dynamic_irq_init_keep_chip_data(unsigned int irq);
extern void dynamic_irq_cleanup(unsigned int irq);
+void dynamic_irq_cleanup_keep_chip_data(unsigned int irq);
/* Set/get chip/data for an IRQ: */
extern int set_irq_chip(unsigned int irq, struct irq_chip *chip);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index ecc3fa2..d70394f 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -77,7 +90,8 @@ void dynamic_irq_cleanup(unsigned int irq)
}
desc->msi_desc = NULL;
desc->handler_data = NULL;
- desc->chip_data = NULL;
+ if (!keep_chip_data)
+ desc->chip_data = NULL;
desc->handle_irq = handle_bad_irq;
desc->chip = &no_irq_chip;
desc->name = NULL;
@@ -85,6 +99,26 @@ void dynamic_irq_cleanup(unsigned int irq)
x86: apic: Fix mismerge, add arch_probe_nr_irqs() again
Merge commit aef55d4922 mis-merged io_apic.c so we lost the
arch_probe_nr_irqs() method.
This caused subtle boot breakages (udev confusion likely
due to missing drivers) with certain configs.
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Yinghai Lu <yin...@kernel.org>
LKML-Reference: <2010020721...@jenkins.home.ifup.org>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
arch/x86/kernel/apic/io_apic.c | 22 ++++++++++++++++++++++
1 files changed, 22 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 527390c..c13ab8f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3874,6 +3874,28 @@ void __init probe_nr_irqs_gsi(void)
printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
}
+#ifdef CONFIG_SPARSE_IRQ
+int __init arch_probe_nr_irqs(void)
+{
+ int nr;
+
+ if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
+ nr_irqs = NR_VECTORS * nr_cpu_ids;
+
+ nr = nr_irqs_gsi + 8 * nr_cpu_ids;
+#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+ /*
+ * for MSI and HT dyn irq
+ */
+ nr += nr_irqs_gsi * 16;
+#endif
+ if (nr < nr_irqs)
+ nr_irqs = nr;
+
+ return 0;
+}
+#endif
+
static int __io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr)
{
for x86, with radix tree based irq_to_desc(),
removing arch_probe_nr_irqs is intentional. so we get more irq that could be used.
wonder if the udev for some of your test system have irq number limitation?
YH
was ancient udev: udev-095-17.fc6.
Ingo
> * Yinghai Lu <yin...@kernel.org> wrote:
>
>>
>> for x86, with radix tree based irq_to_desc(),
>> removing arch_probe_nr_irqs is intentional. so we get more irq that could be used.
>>
>> wonder if the udev for some of your test system have irq number limitation?
>
> was ancient udev: udev-095-17.fc6.
Something doesn't add up. Nowhere in the udev source is there a
single mention of irq.
gsi have fixed interrupt numbers so that would not change.
The dynamic irqs are allocated starting from the high gsi
and working up.
The irq numbers that get allocated should not have changed,
unless this was actually a bug fix in this configuration.
The other possibility is that somehow arch_probe_nr_irqs()
was returning a number greater than NR_IRQS.
Ingo do you have any idea what NR_IRQS or nr_irqs were/are on
that failing machine?
Eric
> Ingo Molnar <mi...@elte.hu> writes:
>
> > * Yinghai Lu <yin...@kernel.org> wrote:
> >
> >>
> >> for x86, with radix tree based irq_to_desc(),
> >> removing arch_probe_nr_irqs is intentional. so we get more irq that could be used.
> >>
> >> wonder if the udev for some of your test system have irq number limitation?
> >
> > was ancient udev: udev-095-17.fc6.
>
> Something doesn't add up. Nowhere in the udev source is there a
> single mention of irq.
>
> gsi have fixed interrupt numbers so that would not change.
>
> The dynamic irqs are allocated starting from the high gsi
> and working up.
>
> The irq numbers that get allocated should not have changed,
> unless this was actually a bug fix in this configuration.
>
> The other possibility is that somehow arch_probe_nr_irqs()
> was returning a number greater than NR_IRQS.
>
> Ingo do you have any idea what NR_IRQS or nr_irqs were/are on
> that failing machine?
Sorry, not - and the merge window doesnt leave much time to revisit the
problem right now.
But the failures were very real and 100% caused by this: they resulted in
non-existent /dev/sda* nodes and resulting fsck failure by rc.
Thanks,
Ingo
>> Ingo do you have any idea what NR_IRQS or nr_irqs were/are on
>> that failing machine?
>
> Sorry, not - and the merge window doesnt leave much time to revisit the
> problem right now.
>
> But the failures were very real and 100% caused by this: they resulted in
> non-existent /dev/sda* nodes and resulting fsck failure by rc.
I have looked it over a second time and I have convinced myself
that arch_probe_nr_irqs will in the worst case reduce nr_irqs,
and never increase it beyond NR_IRQS. So this revert (keeping
arch_probe_nr_irqs) is safe.
It makes little sense that a larger nr_irqs would be a problem,
but clearly there are assumptions somewhere that we still need
to remove.
Eric
x86: apic: Fix mismerge, add arch_probe_nr_irqs() again
Merge commit aef55d4922 mis-merged io_apic.c so we lost the
arch_probe_nr_irqs() method.
This caused subtle boot breakages (udev confusion likely
due to missing drivers) with certain configs.
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Yinghai Lu <yin...@kernel.org>
LKML-Reference: <2010020721...@jenkins.home.ifup.org>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
arch/x86/kernel/apic/io_apic.c | 22 ++++++++++++++++++++++
1 files changed, 22 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9795898..72ac2a3 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3876,6 +3876,28 @@ void __init probe_nr_irqs_gsi(void)
This sounds interesting for the Xen dom0 use case where we can have
essentially arbitrary numbers of interrupt sources.
Is there a tree somewhere that I can be looking at?
Ian
--
Ian Campbell
Current Noise: Rammstein - Morgenstern
Money will say more in one moment than the most eloquent lover can in years.
> On Fri, 2010-02-26 at 10:19 -0800, Yinghai Lu wrote:
>>
>>
>> for x86, with radix tree based irq_to_desc(),
>> removing arch_probe_nr_irqs is intentional. so we get more irq that
>> could be used.
>
> This sounds interesting for the Xen dom0 use case where we can have
> essentially arbitrary numbers of interrupt sources.
>
> Is there a tree somewhere that I can be looking at?
Ingo's x86 tip tree I imagine. I think this patch is slated for coming
in sometime this merge window so you should be able to see it
in 2.6.34-rc1.
This is a truly frustrating question. At this point all of the
practical limitations are in the Xen code itself, and it has been that
way for the last couple of years. Looking at the x86_64 tree it looks
like the core work went in at the end of 2006 about 2.6.19. Last I
looked the Xen dom0 kernel was stuck at the kernel just before the irq
scaling changes went in. Every time I look the Xen code is doing
something stupid and unnecessary that makes scaling to large numbers
of irqs hard.
The changes YH is working on are the last couple of changes so that
we can seriously remove NR_IRQs and not have to pay a penalty of
a large static array on small machines when we have a kernel built
to support large machines and a large number of interrupts.
As of 2.6.33 the limitations in DomU support are:
- xen_evtchn_do_upcall starts with the irq number instead of
the irq_desc, and happens to unnecessarily call into arch
specific code.
- Xen has an array irq_info[NR_IRQS] one of the last static arrays
sized at NR_IRQs in the entire kernel.
One of the big reasons Xen dom0 irq support was not merged was because
merging it would effectively be a revert of the irq scaling work.
If you can fix the Xen code so it isn't dragging the rest of the
kernel down when it comes to large numbers of irqs more power to you.
For now I will continue to write little patches every once in a while
that bonk Xen domU on the head when something Xen domU is doing becomes a
bottleneck for the rest of the kernel.
Eric
I saw a patch to fix this one recently, "xen: Remove unnecessary arch
specific xen irq functions.", right?
> - Xen has an array irq_info[NR_IRQS] one of the last static arrays
> sized at NR_IRQs in the entire kernel.
Hopefully the same info as is in that array could (and indeed should) be
instead stored in irq_desc->chip_data. Would you object to
arch_init_copy_chip_data and arch_free_chip_data becoming function
pointers within the struct irq_chip?
> If you can fix the Xen code so it isn't dragging the rest of the
> kernel down when it comes to large numbers of irqs more power to you.
If you know of other aspects of the Xen code where this is the case (or
find them in the future) please let me know, I'll do my best to fix
them.
Ian.
--
Ian Campbell
To the systems programmer, users and applications serve only to provide a
test load.
> On Mon, 2010-03-01 at 10:34 -0800, Eric W. Biederman wrote:
>>
>> As of 2.6.33 the limitations in DomU support are:
>> - xen_evtchn_do_upcall starts with the irq number instead of
>> the irq_desc, and happens to unnecessarily call into arch
>> specific code.
>
> I saw a patch to fix this one recently, "xen: Remove unnecessary arch
> specific xen irq functions.", right?
Yes.
You probably want to modify evtnchn_to_irq to return an irq_desc.
It is going to take a bit but our next big step for the irq methods
is to make them all take struct irq_desc pointers instead of unsigned
int irq, so we don't have to repeat the lookups.
>> - Xen has an array irq_info[NR_IRQS] one of the last static arrays
>> sized at NR_IRQs in the entire kernel.
>
> Hopefully the same info as is in that array could (and indeed should) be
> instead stored in irq_desc->chip_data. Would you object to
> arch_init_copy_chip_data and arch_free_chip_data becoming function
> pointers within the struct irq_chip?
No objections. Now that I see those methods it looks like they always
should have been in irq_chip.
>> If you can fix the Xen code so it isn't dragging the rest of the
>> kernel down when it comes to large numbers of irqs more power to you.
>
> If you know of other aspects of the Xen code where this is the case (or
> find them in the future) please let me know, I'll do my best to fix
> them.
Good to hear.
Right now I am being a bit of a jack in the box reviewer. I don't have
time to work on the irq code right now but I occasionally pop out of
my box and review the code and try to keep it from descending into an
unmaintainable disaster.
Yes, I was just about to suggest something like this. We just need a
mechanism of storing some per-irq info, and attaching it to the existing
desc structure is much cleaner than maintaining some parallel structure.
You're right about the chip_data lifetime functions; it seems very odd
that most of irq_chip is nicely factored out into a set of ops, but the
per-chip data management is per-architecture.
With that in place, we could eliminate any dependency on irq numbers.
J
That should hit 2.6.35.
> >> - Xen has an array irq_info[NR_IRQS] one of the last static arrays
> >> sized at NR_IRQs in the entire kernel.
> >
> > Hopefully the same info as is in that array could (and indeed should) be
> > instead stored in irq_desc->chip_data. Would you object to
> > arch_init_copy_chip_data and arch_free_chip_data becoming function
> > pointers within the struct irq_chip?
>
> No objections. Now that I see those methods it looks like they always
> should have been in irq_chip.
Agreed.
Thanks,
tglx
The following changes since commit c0a2d57e753717cc893b4b38bfb351c7f19469c3:
Ingo Molnar (1):
Merge branch 'perf/urgent'
are available in the git repository at:
git://xenbits.xensource.com/people/ianc/linux-2.6.git for-x86/irq
Ian Campbell (3):
x86: namespace some I/O APIC related structures and functions.
irq: move some interrupt arch_* functions into struct irq_chip.
x86: irq_desc->chip_data is always correct whether or not SPARSE_IRQ is enabled.
arch/powerpc/kernel/irq.c | 2 +-
arch/x86/include/asm/hw_irq.h | 25 +++--
arch/x86/kernel/apic/io_apic.c | 233 ++++++++++++++++++++++++---------------
arch/x86/kernel/uv_irq.c | 19 ++--
include/linux/interrupt.h | 2 +-
include/linux/irq.h | 12 ++-
kernel/irq/handle.c | 2 +-
kernel/irq/numa_migrate.c | 12 ++-
kernel/softirq.c | 3 +-
9 files changed, 195 insertions(+), 115 deletions(-)
--
Ian Campbell
Current Noise: Mistress - Lord Worm
Nothing is easier than to denounce the evildoer; nothing is more difficult
than to understand him.
-- Fyodor Dostoevski
Move arch_init_copy_chip_data and arch_free_chip_data into function
pointers in struct irq_chip since they operate on irq_desc->chip_data.
arch_init_chip_data cannot be moved into struct irq_chip at this time
because irq_desc->chip is not known at the time the irq_desc is
setup. For now rename arch_init_chip_data to arch_init_irq_desc (for
PowerPC, the only other user, whose usage better matches the new name)
and on x86 convert arch_init_chip_data to ioapic_init_chip_data and
call this whenever the IO APIC code allocates a new IRQ.
I've retained the chip_data behaviour for uv_irq although it isn't
clear to me if these interrupt types support migration or how closely
related to the APIC modes they really are. If it weren't for this the
ioapic_{init,copy,free}_chip_data functions could be static to
io_apic.c.
I've tested by booting on a 64 bit system, but it's not clear to me
what actions I need to take to actually exercise some of these code
paths.
Signed-off-by: Ian Campbell <ian.ca...@citrix.com>
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Eric W. Biederman <ebie...@xmission.com>
Cc: Yinghai Lu <yin...@kernel.org>
Cc: Jeremy Fitzhardinge <jer...@goop.org>
Cc: Benjamin Herrenschmidt <be...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: x...@kernel.org
Cc: linuxp...@ozlabs.org
Cc: linux-...@vger.kernel.org
---
arch/powerpc/kernel/irq.c | 2 +-
arch/x86/include/asm/hw_irq.h | 11 +++++++-
arch/x86/kernel/apic/io_apic.c | 58 +++++++++++++++++++++++++++++++++++++---
arch/x86/kernel/uv_irq.c | 5 +++
include/linux/interrupt.h | 2 +-
include/linux/irq.h | 12 +++++---
kernel/irq/handle.c | 2 +-
kernel/irq/numa_migrate.c | 12 +++++++-
kernel/softirq.c | 3 +-
9 files changed, 91 insertions(+), 16 deletions(-)
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 64f6f20..002d87f 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -1088,7 +1088,7 @@ int arch_early_irq_init(void)
return 0;
}
-int arch_init_chip_data(struct irq_desc *desc, int node)
+int arch_init_irq_desc(struct irq_desc *desc, int node)
{
desc->status |= IRQ_NOREQUEST;
return 0;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 0642186..b9d7310 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -20,9 +20,9 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/smp.h>
+#include <linux/irq.h>
#include <asm/atomic.h>
-#include <asm/irq.h>
#include <asm/sections.h>
/* Interrupt handlers registered during init_IRQ */
@@ -61,6 +61,15 @@ extern void init_VISWS_APIC_irqs(void);
extern void setup_IO_APIC(void);
extern void disable_IO_APIC(void);
+extern int ioapic_init_chip_data(struct irq_desc *desc, int node);
+
+#ifdef CONFIG_SPARSE_IRQ
+extern void ioapic_copy_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc, int node);
+extern void ioapic_free_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc);
+#endif
+
struct io_apic_irq_attr {
int ioapic;
int ioapic_pin;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index a57d974..74d5d96 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -211,7 +211,7 @@ static struct ioapic_irq_cfg *get_one_free_irq_cfg(int node)
return cfg;
}
-int arch_init_chip_data(struct irq_desc *desc, int node)
+int ioapic_init_chip_data(struct irq_desc *desc, int node)
{
struct ioapic_irq_cfg *cfg;
@@ -289,8 +289,8 @@ static void free_irq_2_pin(struct ioapic_irq_cfg *old_cfg,
old_cfg->irq_2_pin = NULL;
}
-void arch_init_copy_chip_data(struct irq_desc *old_desc,
- struct irq_desc *desc, int node)
+void ioapic_copy_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc, int node)
{
struct ioapic_irq_cfg *cfg;
struct ioapic_irq_cfg *old_cfg;
@@ -314,7 +314,7 @@ static void free_irq_cfg(struct ioapic_irq_cfg *old_cfg)
kfree(old_cfg);
}
-void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+void ioapic_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
{
struct ioapic_irq_cfg *old_cfg, *cfg;
@@ -338,6 +338,11 @@ struct ioapic_irq_cfg *ioapic_irq_cfg(unsigned int irq)
return irq < nr_irqs ? irq_cfgx + irq : NULL;
}
+int ioapic_init_chip_data(struct irq_desc *desc, int node)
+{
+ return 0;
+}
+
#endif
struct io_apic {
@@ -1526,6 +1531,7 @@ static void __init setup_IO_APIC_irqs(void)
printk(KERN_INFO "can not get irq_desc for %d\n", irq);
continue;
}
+ ioapic_init_chip_data(desc, node);
cfg = desc->chip_data;
add_pin_to_irq_node(cfg, node, apic_id, pin);
/*
@@ -1576,6 +1582,7 @@ void setup_IO_APIC_irq_extra(u32 gsi)
printk(KERN_INFO "can not get irq_desc for %d\n", irq);
return;
}
+ ioapic_init_chip_data(desc, node);
cfg = desc->chip_data;
add_pin_to_irq_node(cfg, node, apic_id, pin);
@@ -2746,6 +2753,11 @@ static struct irq_chip ioapic_chip __read_mostly = {
.set_affinity = set_ioapic_affinity_irq,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = ioapic_copy_chip_data,
+ .free_chip_data = ioapic_free_chip_data,
+#endif
};
static struct irq_chip ir_ioapic_chip __read_mostly = {
@@ -2761,6 +2773,11 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
#endif
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = ioapic_copy_chip_data,
+ .free_chip_data = ioapic_free_chip_data,
+#endif
};
static inline void init_IO_APIC_traps(void)
@@ -3268,6 +3285,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
printk(KERN_INFO "can not get irq_desc for %d\n", new);
continue;
}
+ ioapic_init_chip_data(desc_new, node);
cfg_new = desc_new->chip_data;
if (cfg_new->vector != 0)
@@ -3474,6 +3492,11 @@ static struct irq_chip msi_chip = {
.set_affinity = set_msi_irq_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = ioapic_copy_chip_data,
+ .free_chip_data = ioapic_free_chip_data,
+#endif
};
static struct irq_chip msi_ir_chip = {
@@ -3487,6 +3510,11 @@ static struct irq_chip msi_ir_chip = {
#endif
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = ioapic_copy_chip_data,
+ .free_chip_data = ioapic_free_chip_data,
+#endif
};
/*
@@ -3646,6 +3674,11 @@ static struct irq_chip dmar_msi_type = {
.set_affinity = dmar_msi_set_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = ioapic_copy_chip_data,
+ .free_chip_data = ioapic_free_chip_data,
+#endif
};
int arch_setup_dmar_msi(unsigned int irq)
@@ -3703,6 +3736,11 @@ static struct irq_chip ir_hpet_msi_type = {
#endif
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = ioapic_copy_chip_data,
+ .free_chip_data = ioapic_free_chip_data,
+#endif
};
static struct irq_chip hpet_msi_type = {
@@ -3714,6 +3752,11 @@ static struct irq_chip hpet_msi_type = {
.set_affinity = hpet_msi_set_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = ioapic_copy_chip_data,
+ .free_chip_data = ioapic_free_chip_data,
+#endif
};
int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
@@ -3800,6 +3843,11 @@ static struct irq_chip ht_irq_chip = {
.set_affinity = set_ht_irq_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = ioapic_copy_chip_data,
+ .free_chip_data = ioapic_free_chip_data,
+#endif
};
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
@@ -3927,6 +3975,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
printk(KERN_INFO "can not get irq_desc %d\n", irq);
return 0;
}
+ ioapic_init_chip_data(desc, node);
pin = irq_attr->ioapic_pin;
trigger = irq_attr->trigger;
@@ -4321,6 +4370,7 @@ void __init pre_init_apic_IRQ0(void)
phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
#endif
desc = irq_to_desc_alloc_node(0, 0);
+ ioapic_init_chip_data(desc, 0);
setup_local_APIC();
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index 3520564..96020cb 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -55,6 +55,11 @@ struct irq_chip uv_irq_chip = {
.eoi = uv_ack_apic,
.end = uv_noop,
.set_affinity = uv_set_irq_affinity,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = ioapic_copy_chip_data,
+ .free_chip_data = ioapic_free_chip_data,
+#endif
};
/*
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 75f3f00..cc4cd22 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -611,6 +611,6 @@ struct irq_desc;
extern int early_irq_init(void);
extern int arch_probe_nr_irqs(void);
extern int arch_early_irq_init(void);
-extern int arch_init_chip_data(struct irq_desc *desc, int node);
+extern int arch_init_irq_desc(struct irq_desc *desc, int node);
#endif
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 707ab12..558bd2d 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -131,6 +131,14 @@ struct irq_chip {
void (*bus_lock)(unsigned int irq);
void (*bus_sync_unlock)(unsigned int irq);
+ /* for move_irq_desc */
+#ifdef CONFIG_SPARSE_IRQ
+ void (*copy_chip_data)(struct irq_desc *old_desc,
+ struct irq_desc *desc, int node);
+ void (*free_chip_data)(struct irq_desc *old_desc,
+ struct irq_desc *desc);
+#endif
+
/* Currently used only by UML, might disappear one day.*/
#ifdef CONFIG_IRQ_RELEASE_METHOD
void (*release)(unsigned int irq, void *dev_id);
@@ -208,10 +216,6 @@ struct irq_desc {
const char *name;
} ____cacheline_internodealigned_in_smp;
-extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
- struct irq_desc *desc, int node);
-extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
-
#ifndef CONFIG_SPARSE_IRQ
extern struct irq_desc irq_desc[NR_IRQS];
#endif
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 76d5a67..168e2f8 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -120,7 +120,6 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
BUG_ON(1);
}
init_desc_masks(desc);
- arch_init_chip_data(desc, node);
}
/*
@@ -228,6 +227,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
BUG_ON(1);
}
init_one_irq_desc(irq, desc, node);
+ arch_init_irq_desc(desc, node);
set_irq_desc(irq, desc);
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 963559d..9ea09c9 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -47,7 +47,8 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
init_copy_desc_masks(old_desc, desc);
- arch_init_copy_chip_data(old_desc, desc, node);
+ if (desc->chip->copy_chip_data)
+ desc->chip->copy_chip_data(old_desc, desc, node);
return true;
}
@@ -55,7 +56,8 @@ static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
{
free_kstat_irqs(old_desc, desc);
free_desc_masks(old_desc, desc);
- arch_free_chip_data(old_desc, desc);
+ if (desc->chip->free_chip_data)
+ desc->chip->free_chip_data(old_desc, desc);
}
static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
@@ -107,9 +109,15 @@ out_unlock:
struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
{
+
/* those static or target node is -1, do not move them */
if (desc->irq < NR_IRQS_LEGACY || node == -1)
return desc;
+ /* IRQ chip does not support movement */
+ if (desc->chip_data &&
+ (desc->chip->copy_chip_data == NULL ||
+ desc->chip->free_chip_data == NULL))
+ return desc;
if (desc->node != node)
desc = __real_move_irq_desc(desc, node);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7c1a67e..3f4b80e 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -895,8 +895,7 @@ int __init __weak arch_early_irq_init(void)
{
return 0;
}
-
-int __weak arch_init_chip_data(struct irq_desc *desc, int node)
+int __weak arch_init_irq_desc(struct irq_desc *desc, int node)
{
return 0;
}
--
1.5.6.5
arch_early_irq_init ensures that in the non-SPARSE_IRQ case that
chip_data is only set for irq < NR_IRQS.
Signed-off-by: Ian Campbell <ian.ca...@citrix.com>
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Eric W. Biederman <ebie...@xmission.com>
Cc: Yinghai Lu <yin...@kernel.org>
Cc: Jeremy Fitzhardinge <jer...@goop.org>
Cc: x...@kernel.org
Cc: linux-...@vger.kernel.org
---
arch/x86/kernel/apic/io_apic.c | 9 ++-------
1 files changed, 2 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 74d5d96..64a93c8 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -178,7 +178,6 @@ int __init arch_early_irq_init(void)
return 0;
}
-#ifdef CONFIG_SPARSE_IRQ
struct ioapic_irq_cfg *ioapic_irq_cfg(unsigned int irq)
{
struct ioapic_irq_cfg *cfg = NULL;
@@ -191,6 +190,8 @@ struct ioapic_irq_cfg *ioapic_irq_cfg(unsigned int irq)
return cfg;
}
+#ifdef CONFIG_SPARSE_IRQ
+
static struct ioapic_irq_cfg *get_one_free_irq_cfg(int node)
{
struct ioapic_irq_cfg *cfg;
@@ -333,16 +334,10 @@ void ioapic_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
/* end for move_irq_desc */
#else
-struct ioapic_irq_cfg *ioapic_irq_cfg(unsigned int irq)
-{
- return irq < nr_irqs ? irq_cfgx + irq : NULL;
-}
-
int ioapic_init_chip_data(struct irq_desc *desc, int node)
{
return 0;
}
-
#endif
struct io_apic {
--
1.5.6.5
It is not obvious that certain functions relate specifically to the
I/O APIC. Rename structures and functions as follows:
struct irq_cfg -> struct ioapic_irq_cfg
irq_cfg() -> ioapic_irq_cfg()
assign_irq_vector() -> ioapic_assign_irq_vector()
send_cleanup_vector() -> ioapic_send_cleanup_vector()
There is a slight preference towards ioapic vs io_apic in the current
code (563 occurances vs 146) so I went with that.
Signed-off-by: Ian Campbell <ian.ca...@citrix.com>
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Eric W. Biederman <ebie...@xmission.com>
Cc: Yinghai Lu <yin...@kernel.org>
Cc: Jeremy Fitzhardinge <jer...@goop.org>
Cc: x...@kernel.org
Cc: linux-...@vger.kernel.org
---
arch/x86/include/asm/hw_irq.h | 14 ++--
arch/x86/kernel/apic/io_apic.c | 174 +++++++++++++++++++++-------------------
arch/x86/kernel/uv_irq.c | 14 ++--
3 files changed, 106 insertions(+), 96 deletions(-)
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index a929c9e..0642186 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -83,7 +83,7 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
*
* Most irqs are mapped 1:1 with pins.
*/
-struct irq_cfg {
+struct ioapic_irq_cfg {
struct irq_pin_list *irq_2_pin;
cpumask_var_t domain;
cpumask_var_t old_domain;
@@ -91,13 +91,15 @@ struct irq_cfg {
u8 move_in_progress : 1;
};
-extern struct irq_cfg *irq_cfg(unsigned int);
-extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
-extern void send_cleanup_vector(struct irq_cfg *);
+extern struct ioapic_irq_cfg *ioapic_irq_cfg(unsigned int);
+extern int ioapic_assign_irq_vector(int, struct ioapic_irq_cfg *,
+ const struct cpumask *);
+extern void ioapic_send_cleanup_vector(struct ioapic_irq_cfg *);
struct irq_desc;
-extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *,
- unsigned int *dest_id);
+extern unsigned int ioapic_set_desc_affinity(struct irq_desc *,
+ const struct cpumask *,
+ unsigned int *dest_id);
extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
extern void setup_ioapic_dest(void);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e4e0ddc..a57d974 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -138,14 +138,14 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
#ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
+static struct ioapic_irq_cfg irq_cfgx[NR_IRQS_LEGACY];
#else
-static struct irq_cfg irq_cfgx[NR_IRQS];
+static struct ioapic_irq_cfg irq_cfgx[NR_IRQS];
#endif
int __init arch_early_irq_init(void)
{
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
struct irq_desc *desc;
int count;
int node;
@@ -179,9 +179,9 @@ int __init arch_early_irq_init(void)
}
#ifdef CONFIG_SPARSE_IRQ
-struct irq_cfg *irq_cfg(unsigned int irq)
+struct ioapic_irq_cfg *ioapic_irq_cfg(unsigned int irq)
{
- struct irq_cfg *cfg = NULL;
+ struct ioapic_irq_cfg *cfg = NULL;
struct irq_desc *desc;
desc = irq_to_desc(irq);
@@ -191,9 +191,9 @@ struct irq_cfg *irq_cfg(unsigned int irq)
return cfg;
}
-static struct irq_cfg *get_one_free_irq_cfg(int node)
+static struct ioapic_irq_cfg *get_one_free_irq_cfg(int node)
{
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
if (cfg) {
@@ -213,7 +213,7 @@ static struct irq_cfg *get_one_free_irq_cfg(int node)
int arch_init_chip_data(struct irq_desc *desc, int node)
{
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
cfg = desc->chip_data;
if (!cfg) {
@@ -229,7 +229,8 @@ int arch_init_chip_data(struct irq_desc *desc, int node)
/* for move_irq_desc */
static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
+init_copy_irq_2_pin(struct ioapic_irq_cfg *old_cfg,
+ struct ioapic_irq_cfg *cfg, int node)
{
struct irq_pin_list *old_entry, *head, *tail, *entry;
@@ -270,7 +271,8 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
cfg->irq_2_pin = head;
}
-static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
+static void free_irq_2_pin(struct ioapic_irq_cfg *old_cfg,
+ struct ioapic_irq_cfg *cfg)
{
struct irq_pin_list *entry, *next;
@@ -290,8 +292,8 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
void arch_init_copy_chip_data(struct irq_desc *old_desc,
struct irq_desc *desc, int node)
{
- struct irq_cfg *cfg;
- struct irq_cfg *old_cfg;
+ struct ioapic_irq_cfg *cfg;
+ struct ioapic_irq_cfg *old_cfg;
cfg = get_one_free_irq_cfg(node);
@@ -302,19 +304,19 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
old_cfg = old_desc->chip_data;
- memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+ memcpy(cfg, old_cfg, sizeof(struct ioapic_irq_cfg));
init_copy_irq_2_pin(old_cfg, cfg, node);
}
-static void free_irq_cfg(struct irq_cfg *old_cfg)
+static void free_irq_cfg(struct ioapic_irq_cfg *old_cfg)
{
kfree(old_cfg);
}
void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
{
- struct irq_cfg *old_cfg, *cfg;
+ struct ioapic_irq_cfg *old_cfg, *cfg;
old_cfg = old_desc->chip_data;
cfg = desc->chip_data;
@@ -331,7 +333,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
/* end for move_irq_desc */
#else
-struct irq_cfg *irq_cfg(unsigned int irq)
+struct ioapic_irq_cfg *ioapic_irq_cfg(unsigned int irq)
{
return irq < nr_irqs ? irq_cfgx + irq : NULL;
}
@@ -387,7 +389,7 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
writel(value, &io_apic->data);
}
-static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
+static bool io_apic_level_ack_pending(struct ioapic_irq_cfg *cfg)
{
struct irq_pin_list *entry;
unsigned long flags;
@@ -472,7 +474,8 @@ static void ioapic_mask_entry(int apic, int pin)
* fast in the common case, and fast for shared ISA-space IRQs.
*/
static int
-add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
+add_pin_to_irq_node_nopanic(struct ioapic_irq_cfg *cfg, int node,
+ int apic, int pin)
{
struct irq_pin_list **last, *entry;
@@ -497,7 +500,8 @@ add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
return 0;
}
-static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+static void add_pin_to_irq_node(struct ioapic_irq_cfg *cfg, int node,
+ int apic, int pin)
{
if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
@@ -506,7 +510,7 @@ static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin
/*
* Reroute an IRQ to a different pin.
*/
-static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
+static void __init replace_pin_at_irq_node(struct ioapic_irq_cfg *cfg, int node,
int oldapic, int oldpin,
int newapic, int newpin)
{
@@ -540,7 +544,7 @@ static void __io_apic_modify_irq(struct irq_pin_list *entry,
final(entry);
}
-static void io_apic_modify_irq(struct irq_cfg *cfg,
+static void io_apic_modify_irq(struct ioapic_irq_cfg *cfg,
int mask_and, int mask_or,
void (*final)(struct irq_pin_list *entry))
{
@@ -562,7 +566,7 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
-static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
+static void __unmask_IO_APIC_irq(struct ioapic_irq_cfg *cfg)
{
io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
}
@@ -578,14 +582,14 @@ static void io_apic_sync(struct irq_pin_list *entry)
readl(&io_apic->data);
}
-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
+static void __mask_IO_APIC_irq(struct ioapic_irq_cfg *cfg)
{
io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
}
static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
{
- struct irq_cfg *cfg = desc->chip_data;
+ struct ioapic_irq_cfg *cfg = desc->chip_data;
unsigned long flags;
BUG_ON(!cfg);
@@ -597,7 +601,7 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
{
- struct irq_cfg *cfg = desc->chip_data;
+ struct ioapic_irq_cfg *cfg = desc->chip_data;
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
@@ -1124,7 +1128,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
void lock_vector_lock(void)
{
/* Used to the online set of cpus does not change
- * during assign_irq_vector.
+ * during ioapic_assign_irq_vector.
*/
raw_spin_lock(&vector_lock);
}
@@ -1135,7 +1139,8 @@ void unlock_vector_lock(void)
}
static int
-__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+__ioapic_assign_irq_vector(int irq, struct ioapic_irq_cfg *cfg,
+ const struct cpumask *mask)
{
/*
* NOTE! The local APIC isn't very good at handling
@@ -1214,18 +1219,19 @@ next:
return err;
}
-int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+int ioapic_assign_irq_vector(int irq, struct ioapic_irq_cfg *cfg,
+ const struct cpumask *mask)
{
int err;
unsigned long flags;
raw_spin_lock_irqsave(&vector_lock, flags);
- err = __assign_irq_vector(irq, cfg, mask);
+ err = __ioapic_assign_irq_vector(irq, cfg, mask);
raw_spin_unlock_irqrestore(&vector_lock, flags);
return err;
}
-static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
+static void __clear_irq_vector(int irq, struct ioapic_irq_cfg *cfg)
{
int cpu, vector;
@@ -1256,7 +1262,7 @@ void __setup_vector_irq(int cpu)
{
/* Initialize vector_irq on a new cpu */
int irq, vector;
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
struct irq_desc *desc;
/*
@@ -1279,7 +1285,7 @@ void __setup_vector_irq(int cpu)
if (irq < 0)
continue;
- cfg = irq_cfg(irq);
+ cfg = ioapic_irq_cfg(irq);
if (!cpumask_test_cpu(cpu, cfg->domain))
per_cpu(vector_irq, cpu)[vector] = -1;
}
@@ -1424,7 +1430,7 @@ int setup_ioapic_entry(int apic_id, int irq,
static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc,
int trigger, int polarity)
{
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
struct IO_APIC_route_entry entry;
unsigned int dest;
@@ -1441,7 +1447,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
apic->vector_allocation_domain(0, cfg->domain);
- if (assign_irq_vector(irq, cfg, apic->target_cpus()))
+ if (ioapic_assign_irq_vector(irq, cfg, apic->target_cpus()))
return;
dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
@@ -1477,7 +1483,7 @@ static void __init setup_IO_APIC_irqs(void)
int apic_id, pin, idx, irq;
int notcon = 0;
struct irq_desc *desc;
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
int node = cpu_to_node(boot_cpu_id);
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
@@ -1545,7 +1551,7 @@ void setup_IO_APIC_irq_extra(u32 gsi)
int apic_id = 0, pin, idx, irq;
int node = cpu_to_node(boot_cpu_id);
struct irq_desc *desc;
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
/*
* Convert 'gsi' to 'ioapic.pin'.
@@ -1631,7 +1637,7 @@ __apicdebuginit(void) print_IO_APIC(void)
union IO_APIC_reg_02 reg_02;
union IO_APIC_reg_03 reg_03;
unsigned long flags;
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
struct irq_desc *desc;
unsigned int irq;
@@ -2243,7 +2249,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
{
int was_pending = 0;
unsigned long flags;
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
raw_spin_lock_irqsave(&ioapic_lock, flags);
if (irq < legacy_pic->nr_legacy_irqs) {
@@ -2251,7 +2257,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
if (legacy_pic->irq_pending(irq))
was_pending = 1;
}
- cfg = irq_cfg(irq);
+ cfg = ioapic_irq_cfg(irq);
__unmask_IO_APIC_irq(cfg);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2261,7 +2267,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
static int ioapic_retrigger_irq(unsigned int irq)
{
- struct irq_cfg *cfg = irq_cfg(irq);
+ struct ioapic_irq_cfg *cfg = ioapic_irq_cfg(irq);
unsigned long flags;
raw_spin_lock_irqsave(&vector_lock, flags);
@@ -2281,7 +2287,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
*/
#ifdef CONFIG_SMP
-void send_cleanup_vector(struct irq_cfg *cfg)
+void ioapic_send_cleanup_vector(struct ioapic_irq_cfg *cfg)
{
cpumask_var_t cleanup_mask;
@@ -2297,7 +2303,8 @@ void send_cleanup_vector(struct irq_cfg *cfg)
cfg->move_in_progress = 0;
}
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest,
+ struct ioapic_irq_cfg *cfg)
{
int apic, pin;
struct irq_pin_list *entry;
@@ -2327,10 +2334,10 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
* leaves desc->affinity untouched.
*/
unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
+ioapic_set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
unsigned int *dest_id)
{
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
unsigned int irq;
if (!cpumask_intersects(mask, cpu_online_mask))
@@ -2338,7 +2345,7 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
irq = desc->irq;
cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
+ if (ioapic_assign_irq_vector(irq, cfg, mask))
return -1;
cpumask_copy(desc->affinity, mask);
@@ -2350,7 +2357,7 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
static int
set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
{
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
unsigned long flags;
unsigned int dest;
unsigned int irq;
@@ -2360,7 +2367,7 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
cfg = desc->chip_data;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- ret = set_desc_affinity(desc, mask, &dest);
+ ret = ioapic_set_desc_affinity(desc, mask, &dest);
if (!ret) {
/* Only the high 8 bits are valid. */
dest = SET_APIC_LOGICAL_ID(dest);
@@ -2397,7 +2404,7 @@ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
static int
migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
{
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
struct irte irte;
unsigned int dest;
unsigned int irq;
@@ -2411,7 +2418,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
return ret;
cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
+ if (ioapic_assign_irq_vector(irq, cfg, mask))
return ret;
dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
@@ -2425,7 +2432,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
modify_irte(irq, &irte);
if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
+ ioapic_send_cleanup_vector(cfg);
cpumask_copy(desc->affinity, mask);
@@ -2468,7 +2475,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
unsigned int irq;
unsigned int irr;
struct irq_desc *desc;
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
irq = __get_cpu_var(vector_irq)[vector];
if (irq == -1)
@@ -2478,7 +2485,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
if (!desc)
continue;
- cfg = irq_cfg(irq);
+ cfg = ioapic_irq_cfg(irq);
raw_spin_lock(&desc->lock);
/*
@@ -2514,7 +2521,7 @@ unlock:
static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
{
struct irq_desc *desc = *descp;
- struct irq_cfg *cfg = desc->chip_data;
+ struct ioapic_irq_cfg *cfg = desc->chip_data;
unsigned me;
if (likely(!cfg->move_in_progress))
@@ -2523,7 +2530,7 @@ static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
me = smp_processor_id();
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
- send_cleanup_vector(cfg);
+ ioapic_send_cleanup_vector(cfg);
}
static void irq_complete_move(struct irq_desc **descp)
@@ -2534,7 +2541,7 @@ static void irq_complete_move(struct irq_desc **descp)
void irq_force_complete_move(int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg = desc->chip_data;
+ struct ioapic_irq_cfg *cfg = desc->chip_data;
__irq_complete_move(&desc, cfg->vector);
}
@@ -2569,7 +2576,7 @@ atomic_t irq_mis_count;
* Otherwise, we simulate the EOI message manually by changing the trigger
* mode to edge and then back to level, with RTE being masked during this.
*/
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+static void __eoi_ioapic_irq(unsigned int irq, struct ioapic_irq_cfg *cfg)
{
struct irq_pin_list *entry;
@@ -2594,7 +2601,7 @@ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
static void eoi_ioapic_irq(struct irq_desc *desc)
{
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
unsigned long flags;
unsigned int irq;
@@ -2611,7 +2618,7 @@ static void ack_apic_level(unsigned int irq)
struct irq_desc *desc = irq_to_desc(irq);
unsigned long v;
int i;
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
int do_unmask_irq = 0;
irq_complete_move(&desc);
@@ -2760,7 +2767,7 @@ static inline void init_IO_APIC_traps(void)
{
int irq;
struct irq_desc *desc;
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
/*
* NOTE! The local APIC isn't very good at handling
@@ -2928,7 +2935,7 @@ int timer_through_8259 __initdata;
static inline void __init check_timer(void)
{
struct irq_desc *desc = irq_to_desc(0);
- struct irq_cfg *cfg = desc->chip_data;
+ struct ioapic_irq_cfg *cfg = desc->chip_data;
int node = cpu_to_node(boot_cpu_id);
int apic1, pin1, apic2, pin2;
unsigned long flags;
@@ -2940,7 +2947,7 @@ static inline void __init check_timer(void)
* get/set the timer IRQ vector:
*/
legacy_pic->chip->mask(0);
- assign_irq_vector(0, cfg, apic->target_cpus());
+ ioapic_assign_irq_vector(0, cfg, apic->target_cpus());
/*
* As IRQ0 is to be enabled in the 8259A, the virtual
@@ -3247,7 +3254,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
unsigned int irq;
unsigned int new;
unsigned long flags;
- struct irq_cfg *cfg_new = NULL;
+ struct ioapic_irq_cfg *cfg_new = NULL;
struct irq_desc *desc_new = NULL;
irq = 0;
@@ -3269,7 +3276,8 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
desc_new = move_irq_desc(desc_new, node);
cfg_new = desc_new->chip_data;
- if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
+ if (__ioapic_assign_irq_vector(new, cfg_new,
+ apic->target_cpus()) == 0)
irq = new;
break;
}
@@ -3315,15 +3323,15 @@ void destroy_irq(unsigned int irq)
static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
struct msi_msg *msg, u8 hpet_id)
{
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
int err;
unsigned dest;
if (disable_apic)
return -ENXIO;
- cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
+ cfg = ioapic_irq_cfg(irq);
+ err = ioapic_assign_irq_vector(irq, cfg, apic->target_cpus());
if (err)
return err;
@@ -3392,11 +3400,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
- if (set_desc_affinity(desc, mask, &dest))
+ if (ioapic_set_desc_affinity(desc, mask, &dest))
return -1;
cfg = desc->chip_data;
@@ -3421,14 +3429,14 @@ static int
ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg = desc->chip_data;
+ struct ioapic_irq_cfg *cfg = desc->chip_data;
unsigned int dest;
struct irte irte;
if (get_irte(irq, &irte))
return -1;
- if (set_desc_affinity(desc, mask, &dest))
+ if (ioapic_set_desc_affinity(desc, mask, &dest))
return -1;
irte.vector = cfg->vector;
@@ -3445,7 +3453,7 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
* vector allocation.
*/
if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
+ ioapic_send_cleanup_vector(cfg);
return 0;
}
@@ -3606,11 +3614,11 @@ void arch_teardown_msi_irq(unsigned int irq)
static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
- if (set_desc_affinity(desc, mask, &dest))
+ if (ioapic_set_desc_affinity(desc, mask, &dest))
return -1;
cfg = desc->chip_data;
@@ -3661,11 +3669,11 @@ int arch_setup_dmar_msi(unsigned int irq)
static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
- if (set_desc_affinity(desc, mask, &dest))
+ if (ioapic_set_desc_affinity(desc, mask, &dest))
return -1;
cfg = desc->chip_data;
@@ -3768,10 +3776,10 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
unsigned int dest;
- if (set_desc_affinity(desc, mask, &dest))
+ if (ioapic_set_desc_affinity(desc, mask, &dest))
return -1;
cfg = desc->chip_data;
@@ -3796,14 +3804,14 @@ static struct irq_chip ht_irq_chip = {
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
{
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
int err;
if (disable_apic)
return -ENXIO;
- cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
+ cfg = ioapic_irq_cfg(irq);
+ err = ioapic_assign_irq_vector(irq, cfg, apic->target_cpus());
if (!err) {
struct ht_irq_msg msg;
unsigned dest;
@@ -3897,7 +3905,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr)
{
struct irq_desc *desc;
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
int node;
int ioapic, pin;
int trigger, polarity;
@@ -4305,7 +4313,7 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
/* Enable IOAPIC early just for system timer */
void __init pre_init_apic_IRQ0(void)
{
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
struct irq_desc *desc;
printk(KERN_INFO "Early APIC setup for system timer0\n");
@@ -4316,7 +4324,7 @@ void __init pre_init_apic_IRQ0(void)
setup_local_APIC();
- cfg = irq_cfg(0);
+ cfg = ioapic_irq_cfg(0);
add_pin_to_irq_node(cfg, 0, 0, 0);
set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index ece73d8..3520564 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -144,7 +144,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
{
const struct cpumask *eligible_cpu = cpumask_of(cpu);
struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct ioapic_irq_cfg *cfg;
int mmr_pnode;
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
@@ -153,9 +153,9 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
sizeof(unsigned long));
- cfg = irq_cfg(irq);
+ cfg = ioapic_irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, eligible_cpu);
+ err = ioapic_assign_irq_vector(irq, cfg, eligible_cpu);
if (err != 0)
return err;
@@ -181,7 +181,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
+ ioapic_send_cleanup_vector(cfg);
return irq;
}
@@ -208,14 +208,14 @@ static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg = desc->chip_data;
+ struct ioapic_irq_cfg *cfg = desc->chip_data;
unsigned int dest;
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
unsigned long mmr_offset;
unsigned mmr_pnode;
- if (set_desc_affinity(desc, mask, &dest))
+ if (ioapic_set_desc_affinity(desc, mask, &dest))
return -1;
mmr_value = 0;
@@ -236,7 +236,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
+ ioapic_send_cleanup_vector(cfg);
return 0;
}
--
1.5.6.5
One idea I had to improve this was to add a struct irq_chip * as a
parameter to irq_to_desc_alloc_node. The new parameter potentially could
be NULL for current behaviour. Does that sound like a reasonable
approach?
Ian.
can you just add another pointer field in irq_desc?
some kind of *irq_info etc.
YH
I think I don't understand what you are suggesting.
There is already a pointer for irq_chip specific use i.e.
irq_desc->chip_data. This patchset is just about ensuring that the field
really is available to any chip implementation rather than just assuming
it is always used for the acpi chip types (on x86 at least).
Does adding a second pointer with the same (intended?) semantics as the
existing one buy us anything?
Ian.
> From: Ian Campbell <ian.ca...@citrix.com>
>
> It is not obvious that certain functions relate specifically to the
> I/O APIC. Rename structures and functions as follows:
>
> struct irq_cfg -> struct ioapic_irq_cfg
> irq_cfg() -> ioapic_irq_cfg()
> assign_irq_vector() -> ioapic_assign_irq_vector()
> send_cleanup_vector() -> ioapic_send_cleanup_vector()
>
> There is a slight preference towards ioapic vs io_apic in the current
> code (563 occurances vs 146) so I went with that.
The rename is wrong. Only the optional irq_2_pin list in struct
irq_cfg is specific to ioapics.
The rest of the fields are all about the architectural vector based
interrupt delivery system of x86. Branding that structure with ioapic
is strongly misleading.
x86_irq_cfg might be an appropriate rename.
Eric
> On Wed, 2010-03-10 at 10:55 +0000, i...@hellion.org.uk wrote:
>>
>> arch_init_chip_data cannot be moved into struct irq_chip at this time
>> because irq_desc->chip is not known at the time the irq_desc is
>> setup. For now rename arch_init_chip_data to arch_init_irq_desc (for
>> PowerPC, the only other user, whose usage better matches the new name)
>> and on x86 convert arch_init_chip_data to ioapic_init_chip_data and
>> call this whenever the IO APIC code allocates a new IRQ.
>
> One idea I had to improve this was to add a struct irq_chip * as a
> parameter to irq_to_desc_alloc_node. The new parameter potentially could
> be NULL for current behaviour. Does that sound like a reasonable
> approach?
I don't follow why we have the restriction that irq_to_desc_alloc_node
must call arch_init_chip_data. Assuming that requirement to call arch_init_chip_data
is valid, passing something into init_one_irq_desc seems appropriate.
Eric
> On Wed, 2010-03-10 at 12:06 +0000, Yinghai Lu wrote:
>> On Wed, Mar 10, 2010 at 2:55 AM, <i...@hellion.org.uk> wrote:
>> > From: Ian Campbell <ian.ca...@citrix.com>
>> >
>> > Move arch_init_copy_chip_data and arch_free_chip_data into function
>> > pointers in struct irq_chip since they operate on irq_desc->chip_data.
>> >
>> > arch_init_chip_data cannot be moved into struct irq_chip at this time
>> > because irq_desc->chip is not known at the time the irq_desc is
>> > setup. For now rename arch_init_chip_data to arch_init_irq_desc (for
>> > PowerPC, the only other user, whose usage better matches the new name)
>> > and on x86 convert arch_init_chip_data to ioapic_init_chip_data and
>> > call this whenever the IO APIC code allocates a new IRQ.
>> >
>> > I've retained the chip_data behaviour for uv_irq although it isn't
>> > clear to me if these interrupt types support migration or how closely
>> > related to the APIC modes they really are. If it weren't for this the
>> > ioapic_{init,copy,free}_chip_data functions could be static to
>> > io_apic.c.
>> >
>> > I've tested by booting on a 64 bit system, but it's not clear to me
>> > what actions I need to take to actually exercise some of these code
>> > paths.
>> >
>>
>> can you just add another pointer field in irq_desc?
>>
>> some kind of *irq_info etc.
>
> I think I don't understand what you are suggesting.
YH another field doesn't make much sense. Xen is a bizarre subarch
with an incompatible irq model. Xen simply needs the ability to
handle the entire lifetime of an irq_chip.
All we need between the Xen and the rest of x86 is a convention
so that we never manage the same irqs. At least for domU we are
in an either/or situation so I don't see even that being a problem.
> There is already a pointer for irq_chip specific use i.e.
> irq_desc->chip_data. This patchset is just about ensuring that the field
> really is available to any chip implementation rather than just assuming
> it is always used for the acpi chip types (on x86 at least).
Ian Xen in this sense is simply not x86. irq_cfg is not acpi or ioapic
or anything but x86 specific. It has everything to do with having a per
cpu vector table of 256 entries and architecturally receiving a vector
number when an interrupt is fired.
It totally makes sense for Xen to do something different because
architecturally it has a completely different irq subsystem.
At the same time let's not pretend that the reason for this is anything
except that Xen has a completely different notion of interrupt delivery
than the rest of x86 and so it is it's own bizarre subarch.
This is not a case where you simply need a driver because something is
a bit different but fits into the existing model.
So the best solution here seems to be a parameter that we pass into
irq_to_desc_alloc_node that does what is needed. The second best
would be to have arch_init_chip_data to call something like
platfrom_init_chip_data(). But I think we can avoid that in
this case.
Eric
Yes, I suspect that could also be made to work.
The lifecycle of the irq_desc and chip_data isn't really clear to me --
I guess once allocated an irq_desc never gets freed (at least
currently)? The associated chip_data can be freed on migrate and
replaced with a new one, but is not freed otherwise.
My concern is that if the caller asks for an IRQ which already exists
(is that valid?) then you will get that existing irq_desc back,
including its existing chip_data, which potentially leaks the new one
which was passed in. Or is it the case that the only way this could
happen would be for legacy IRQs? In which case perhaps it is simply
invalid to pass a new chip data in for such an IRQ.
Ian.
OK, so that sounds like you would like the same patchset but without the
irq_cfg renaming? or potentially with renaming to x86_blah instead (I'll
rework to your preference).
Ian.
> On Wed, 2010-03-10 at 17:42 +0000, Eric W. Biederman wrote:
>>
>>
>> Ian Xen in this sense is simply not x86. irq_cfg is not acpi or
>> ioapic or anything but x86 specific. It has everything to do with
>> having a per cpu vector table of 256 entries and architecturally
>> receiving a vector number when an interrupt is fired.
>>
>> It totally makes sense for Xen to do something different because
>> architecturally it has a completely different irq subsystem.
>
> OK, so that sounds like you would like the same patchset but without the
> irq_cfg renaming? or potentially with renaming to x86_blah instead (I'll
> rework to your preference).
Currently the renaming really makes it unclear what you are doing and for
some reason the description of the renaming rubbed me the wrong way.
Eric
> On Wed, 2010-03-10 at 17:18 +0000, Eric W. Biederman wrote:
>> Ian Campbell <Ian.Ca...@citrix.com> writes:
>>
>> > On Wed, 2010-03-10 at 10:55 +0000, i...@hellion.org.uk wrote:
>> >>
>> >> arch_init_chip_data cannot be moved into struct irq_chip at this time
>> >> because irq_desc->chip is not known at the time the irq_desc is
>> >> setup. For now rename arch_init_chip_data to arch_init_irq_desc (for
>> >> PowerPC, the only other user, whose usage better matches the new name)
>> >> and on x86 convert arch_init_chip_data to ioapic_init_chip_data and
>> >> call this whenever the IO APIC code allocates a new IRQ.
>> >
>> > One idea I had to improve this was to add a struct irq_chip * as a
>> > parameter to irq_to_desc_alloc_node. The new parameter potentially could
>> > be NULL for current behaviour. Does that sound like a reasonable
>> > approach?
>>
>> I don't follow why we have the restriction that irq_to_desc_alloc_node
>> must call arch_init_chip_data. Assuming that requirement to call arch_init_chip_data
>> is valid, passing something into init_one_irq_desc seems appropriate.
>
> Yes, I suspect that could also be made to work.
>
> The lifecycle of the irq_desc and chip_data isn't really clear to me --
> I guess once allocated an irq_desc never gets freed (at least
> currently)? The associated chip_data can be freed on migrate and
> replaced with a new one, but is not freed otherwise.
Yes. That actually looks like a bug.
> My concern is that if the caller asks for an IRQ which already exists
> (is that valid?) then you will get that existing irq_desc back,
> including its existing chip_data, which potentially leaks the new one
> which was passed in. Or is it the case that the only way this could
> happen would be for legacy IRQs? In which case perhaps it is simply
> invalid to pass a new chip data in for such an IRQ.
The only irqs that should be allocated/freed are probably the msi
irqs as those are the only ones that dynamically come and go in the
system.
Unfortunately there appears to be a bigger mess here than first appeared.
Eric
Sorry, I started off a bit confused and then totally misunderstood what
related to what and I think that came through in the description.
I'll respin without the first patch.
Ian.
Dom0 too. This is part of the work implementing what we discussed a
while back - Xen now completely owns the local and IO apics, so dom0
only deals with Xen, not the hardware. Xen has a completely different
interrupt setup path, but at least it isn't a mishmash of Xen stuff and
native APIC stuff.
J
#ifdef CONFIG_INTR_REMAP
struct irq_2_iommu *irq_2_iommu;
#endif
struct irq_chip *chip;
struct msi_desc *msi_desc;
we already have that for irq_2_iommu and msi_desc
YH
Those are at different levels of the hierarchy. Adding another pointer
for Xen is like having a different iommu and so adding another pointer
to handle that kind of iommu.
Eric
Ack on the name change, it should be called arch_init_irq_desc(), the
existing name clearly comes from the fact that sparse IRQ was
implemented first on x86, and on x86 that routine init's the chip data
for a new irq_desc.
But semantically arch_init_irq_desc() is the right name, I was just too
lazy to change it when I enabled sparse IRQ for powerpc.
Can't comment on the rest of the patch.
cheers
Signed-off-by: Ian Campbell <ian.ca...@citrix.com>
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Eric W. Biederman <ebie...@xmission.com>
Cc: Yinghai Lu <yin...@kernel.org>
Cc: Jeremy Fitzhardinge <jer...@goop.org>
Cc: x...@kernel.org
Cc: linux-...@vger.kernel.org
---
arch/x86/kernel/apic/io_apic.c | 9 ++-------
1 files changed, 2 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 12e5cf4..afe81d6 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -178,7 +178,6 @@ int __init arch_early_irq_init(void)
return 0;
}
-#ifdef CONFIG_SPARSE_IRQ
struct irq_cfg *irq_cfg(unsigned int irq)
{
struct irq_cfg *cfg = NULL;
@@ -191,6 +190,8 @@ struct irq_cfg *irq_cfg(unsigned int irq)
return cfg;
}
+#ifdef CONFIG_SPARSE_IRQ
+
static struct irq_cfg *get_one_free_irq_cfg(int node)
{
struct irq_cfg *cfg;
@@ -331,16 +332,10 @@ void x86_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
/* end for move_irq_desc */
#else
-struct irq_cfg *irq_cfg(unsigned int irq)
-{
- return irq < nr_irqs ? irq_cfgx + irq : NULL;
-}
-
int x86_init_chip_data(struct irq_desc *desc, int node)
{
return 0;
}
-
#endif
struct io_apic {
--
1.5.6.5
--
There is an outstanding issue wrt when/how the chip_data field is
initialised. I am continuing to investigate this but the solution is not
turning out as easy/low-impact as expected.
Since last time I've dropped the renaming portion of the series since it
was basically wrong, the functions I'd implicated as ioapic specific are
not at all.
Ian.
The following changes since commit 1ebbdcc83e75697c0d75eb091df172b7d93c84c1:
Ingo Molnar (1):
Merge branch 'perf/urgent'
are available in the git repository at:
git://xenbits.xensource.com/people/ianc/linux-2.6.git for-x86/irq
Ian Campbell (2):
irq: move some interrupt arch_* functions into struct irq_chip.
x86: irq_desc->chip_data is always correct whether or not SPARSE_IRQ is enabled.
arch/powerpc/kernel/irq.c | 2 +-
arch/x86/include/asm/hw_irq.h | 11 ++++++-
arch/x86/kernel/apic/io_apic.c | 61 ++++++++++++++++++++++++++++++++++-----
arch/x86/kernel/uv_irq.c | 5 +++
include/linux/interrupt.h | 2 +-
include/linux/irq.h | 12 +++++--
kernel/irq/handle.c | 2 +-
kernel/irq/numa_migrate.c | 12 ++++++-
kernel/softirq.c | 3 +-
9 files changed, 90 insertions(+), 20 deletions(-)
arch_init_chip_data cannot be moved into struct irq_chip at this time
because irq_desc->chip is not known at the time the irq_desc is
setup. For now rename arch_init_chip_data to arch_init_irq_desc (for
PowerPC, the only other user, whose usage better matches the new name)
and on x86 convert arch_init_chip_data to x86_init_chip_data and
call this whenever a new struct irq_desc is allocated.
I've retained the chip_data behaviour for uv_irq although it isn't
clear to me if these interrupt types support migration or how closely
related to the APIC modes they really are. If it weren't for this the
x86_{init,copy,free}_chip_data functions could be static to
io_apic.c.
I've tested by booting on a 64 bit system, but it's not clear to me
what actions I need to take to actually exercise some of these code
paths.
Signed-off-by: Ian Campbell <ian.ca...@citrix.com>
Acked-by: Michael Ellerman <mic...@ellerman.id.au> [PowerPC rename portion]
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Eric W. Biederman <ebie...@xmission.com>
Cc: Yinghai Lu <yin...@kernel.org>
Cc: Jeremy Fitzhardinge <jer...@goop.org>
Cc: Benjamin Herrenschmidt <be...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: x...@kernel.org
Cc: linuxp...@ozlabs.org
Cc: linux-...@vger.kernel.org
---
arch/powerpc/kernel/irq.c | 2 +-
arch/x86/include/asm/hw_irq.h | 11 +++++++-
arch/x86/kernel/apic/io_apic.c | 58 +++++++++++++++++++++++++++++++++++++---
arch/x86/kernel/uv_irq.c | 5 +++
include/linux/interrupt.h | 2 +-
include/linux/irq.h | 12 +++++---
kernel/irq/handle.c | 2 +-
kernel/irq/numa_migrate.c | 12 +++++++-
kernel/softirq.c | 3 +-
9 files changed, 91 insertions(+), 16 deletions(-)
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 64f6f20..002d87f 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -1088,7 +1088,7 @@ int arch_early_irq_init(void)
return 0;
}
-int arch_init_chip_data(struct irq_desc *desc, int node)
+int arch_init_irq_desc(struct irq_desc *desc, int node)
{
desc->status |= IRQ_NOREQUEST;
return 0;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index a929c9e..1bc7063 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -20,9 +20,9 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/smp.h>
+#include <linux/irq.h>
#include <asm/atomic.h>
-#include <asm/irq.h>
#include <asm/sections.h>
/* Interrupt handlers registered during init_IRQ */
@@ -61,6 +61,15 @@ extern void init_VISWS_APIC_irqs(void);
extern void setup_IO_APIC(void);
extern void disable_IO_APIC(void);
+extern int x86_init_chip_data(struct irq_desc *desc, int node);
+
+#ifdef CONFIG_SPARSE_IRQ
+extern void x86_copy_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc, int node);
+extern void x86_free_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc);
+#endif
+
struct io_apic_irq_attr {
int ioapic;
int ioapic_pin;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e4e0ddc..12e5cf4 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -211,7 +211,7 @@ static struct irq_cfg *get_one_free_irq_cfg(int node)
return cfg;
}
-int arch_init_chip_data(struct irq_desc *desc, int node)
+int x86_init_chip_data(struct irq_desc *desc, int node)
{
struct irq_cfg *cfg;
@@ -287,8 +287,8 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
old_cfg->irq_2_pin = NULL;
}
-void arch_init_copy_chip_data(struct irq_desc *old_desc,
- struct irq_desc *desc, int node)
+void x86_copy_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc, int node)
{
struct irq_cfg *cfg;
struct irq_cfg *old_cfg;
@@ -312,7 +312,7 @@ static void free_irq_cfg(struct irq_cfg *old_cfg)
kfree(old_cfg);
}
-void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+void x86_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
{
struct irq_cfg *old_cfg, *cfg;
@@ -336,6 +336,11 @@ struct irq_cfg *irq_cfg(unsigned int irq)
return irq < nr_irqs ? irq_cfgx + irq : NULL;
}
+int x86_init_chip_data(struct irq_desc *desc, int node)
+{
+ return 0;
+}
+
#endif
struct io_apic {
@@ -1520,6 +1525,7 @@ static void __init setup_IO_APIC_irqs(void)
printk(KERN_INFO "can not get irq_desc for %d\n", irq);
continue;
}
+ x86_init_chip_data(desc, node);
cfg = desc->chip_data;
add_pin_to_irq_node(cfg, node, apic_id, pin);
/*
@@ -1570,6 +1576,7 @@ void setup_IO_APIC_irq_extra(u32 gsi)
printk(KERN_INFO "can not get irq_desc for %d\n", irq);
return;
}
+ x86_init_chip_data(desc, node);
cfg = desc->chip_data;
add_pin_to_irq_node(cfg, node, apic_id, pin);
@@ -2739,6 +2746,11 @@ static struct irq_chip ioapic_chip __read_mostly = {
.set_affinity = set_ioapic_affinity_irq,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
static struct irq_chip ir_ioapic_chip __read_mostly = {
@@ -2754,6 +2766,11 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
#endif
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
static inline void init_IO_APIC_traps(void)
@@ -3261,6 +3278,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
printk(KERN_INFO "can not get irq_desc for %d\n", new);
continue;
}
+ x86_init_chip_data(desc_new, node);
cfg_new = desc_new->chip_data;
if (cfg_new->vector != 0)
@@ -3466,6 +3484,11 @@ static struct irq_chip msi_chip = {
.set_affinity = set_msi_irq_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
static struct irq_chip msi_ir_chip = {
@@ -3479,6 +3502,11 @@ static struct irq_chip msi_ir_chip = {
#endif
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
/*
@@ -3638,6 +3666,11 @@ static struct irq_chip dmar_msi_type = {
.set_affinity = dmar_msi_set_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
int arch_setup_dmar_msi(unsigned int irq)
@@ -3695,6 +3728,11 @@ static struct irq_chip ir_hpet_msi_type = {
#endif
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
static struct irq_chip hpet_msi_type = {
@@ -3706,6 +3744,11 @@ static struct irq_chip hpet_msi_type = {
.set_affinity = hpet_msi_set_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
@@ -3792,6 +3835,11 @@ static struct irq_chip ht_irq_chip = {
.set_affinity = set_ht_irq_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
@@ -3919,6 +3967,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
printk(KERN_INFO "can not get irq_desc %d\n", irq);
return 0;
}
+ x86_init_chip_data(desc, node);
pin = irq_attr->ioapic_pin;
trigger = irq_attr->trigger;
@@ -4313,6 +4362,7 @@ void __init pre_init_apic_IRQ0(void)
phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
#endif
desc = irq_to_desc_alloc_node(0, 0);
+ x86_init_chip_data(desc, 0);
setup_local_APIC();
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index ece73d8..df2c6d6 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -55,6 +55,11 @@ struct irq_chip uv_irq_chip = {
.eoi = uv_ack_apic,
.end = uv_noop,
.set_affinity = uv_set_irq_affinity,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
-int __weak arch_init_chip_data(struct irq_desc *desc, int node)
+int __weak arch_init_irq_desc(struct irq_desc *desc, int node)
{
return 0;
}
--
1.5.6.5
you could put one dummy x86_copy_chip_data and x86_free_chip_data here too.
you move out init_chip_data for x86 out of irq_to_desc_alloc_node
current init_chip_data in irq_to_desc_alloc_node is protected by sparse_irq_lock.
wonder if you could let arch_init_irq_desc for x86 to call x86_init_chip_data?
or let add another irq_to_desc_alloc_node_f to take extra func call.
like
typedef int (*init_chip_data_f)(struct irq_desc *desc, int node);
struct irq_desc * __ref irq_to_desc_alloc_node_f(unsigned int irq, int node, init_chip_data_f *f);
struct irq_desc * __ref irq_to_desc_alloc_node_f(unsigned int irq, int node)
{
irq_to_desc_alloc_node(irq, node, NULL);
}
then for x86
int arch_init_irq_desc(struct irq_desc *desc, int node, init_chip_data_f *f)
{
if (!f)
return x86_init_chip_data(desc, node);
f(desc, node);
}
after that xen could use
irq_to_desc_alloc_node_f(irq, node, xen_init_chip_data);
as need...
at last we don't need to call x86_init_chip_data everywhere.
Thanks
Yinghai
Either that or we could have an arch specific wrapper say
"x86_irq_to_desc_alloc_node" that simply wraps irq_to_desc_alloc_node,
with the little extras we need. This assumes that none of the
initialization has to happen inside of the lock an arch specific
wrapper makes sense.
Let's evaluate this set of patches on it's merits. What this patchset
allows is a Xen implementation whose irq functions are fully decoupled
from the rest of x86 that supports SPARSE_IRQ. That is a huge
maintenance improvement that far outweighs the worst damage this
patch can do to maintenance.
So trying to evaluate races. The worse case for this particular piece
of code appears to be create_irq_nr. As this is the only place where
we are setting up irqs and possibly repurposing the structure. Today
we figure out if an irq has been assigned by looking at irq_cfg->vector,
and if it is non-zero the irq has been assigned.
The logic in x86_init_chip_data is correct we only assign desc->chip_data
if the generic layers are above it. However we need a lock to ensure that
two paths don't race in that comparison and that assignment. There is
no lock in x86_init_chip_data. Which unfortunately means as it stands
this patchset is buggy.
Eric
This was one of the things I was considering. It seems like one of the
easiest solutions to make work correctly with the current locking in
e.g. irq_to_desc_alloc_node since the callback would always happen under
the lock taken in that function.
> So trying to evaluate races. The worse case for this particular piece
> of code appears to be create_irq_nr. As this is the only place where
> we are setting up irqs and possibly repurposing the structure.
Yes, create_irq_nr was one of the functions I was struggling to solve
cleanly. There is a similar construct in the Xen code as well.
Part of the problem I'm having is the combination of lookup and allocate
in irq_to_desc_alloc_node but also the kind of "implicit" repurposing is
tricky to deal with. (by implicit I just mean that I can't find where
the previous user explicitly says they are finished with it, if you see
what I mean)
What do you think of adding an explicit free operation for the irq_desc
structs? (does one already exist? I couldn't find it). This would go
along with some tracking of allocation state, trivial in the sparse case
where you can treat a NULL node in the radix tree as unallocated, I
guess a flag would suffice in the static array non-sparse case?
Going further could we split the alloc and lookup functions into
separate operations instead of combining them in irq_to_desc_alloc_node?
We already have irq_to_desc for the lookup portion so this would largely
involve changes to the semantics of irq_to_desc_alloc_node, perhaps
returning ERR_PTR(-EBUSY) if the node was already allocated.
Having a variant which found a free IRQ rather than operating on a
specific requested IRQ could also be useful for create_irq_nr as well as
find_unbound_irq on the Xen side. I'm not convinced irq_alloc_virt on
powerpc isn't implementing broadly the same concept as well, although it
seems to work very differently from the other two.
> Today
> we figure out if an irq has been assigned by looking at irq_cfg->vector,
> and if it is non-zero the irq has been assigned.
Which is tricky to move into generic code hence my suggestions of
explicitly freeing the irq_desc and tracking the allocation status in
the generic code.
> The logic in x86_init_chip_data is correct we only assign desc->chip_data
> if the generic layers are above it. However we need a lock to ensure that
> two paths don't race in that comparison and that assignment. There is
> no lock in x86_init_chip_data. Which unfortunately means as it stands
> this patchset is buggy.
Yes, unfortunately I think you are right. The callback idea fixes this.
I'll respin with that.
Ian.
We have free_one_irq_desc used in the numa irq migration code.
We have dynamic_irq_cleanup.
I don't expect it would be fundamentally hard to put all of the pieces
together.
> Going further could we split the alloc and lookup functions into
> separate operations instead of combining them in irq_to_desc_alloc_node?
> We already have irq_to_desc for the lookup portion so this would largely
> involve changes to the semantics of irq_to_desc_alloc_node, perhaps
> returning ERR_PTR(-EBUSY) if the node was already allocated.
>
> Having a variant which found a free IRQ rather than operating on a
> specific requested IRQ could also be useful for create_irq_nr as well as
> find_unbound_irq on the Xen side. I'm not convinced irq_alloc_virt on
> powerpc isn't implementing broadly the same concept as well, although it
> seems to work very differently from the other two.
>
>> Today
>> we figure out if an irq has been assigned by looking at irq_cfg->vector,
>> and if it is non-zero the irq has been assigned.
>
> Which is tricky to move into generic code hence my suggestions of
> explicitly freeing the irq_desc and tracking the allocation status in
> the generic code.
Agreed. Getting an allocation assist into the generic code sounds
nlike something for the next round of patches after your current one.
At least for msi irqs, hi irqs and other irqs that are totally software
constructs this makes a lot of sense.
We really need to get to the point on x86 where sparse irq has no penalties
and stop making it an option. Otherwise it gets a bit tricky to rely
on sparse irq for an allocation helper.
I need to go look up my old patchset. I did something different with
create_irq(), that I think was cleaner and it may be worth resurrecting.
At the very least I generalized the msi case and the ht irq case into
using the same pointer in struct irq_desc.
>> The logic in x86_init_chip_data is correct we only assign desc->chip_data
>> if the generic layers are above it. However we need a lock to ensure that
>> two paths don't race in that comparison and that assignment. There is
>> no lock in x86_init_chip_data. Which unfortunately means as it stands
>> this patchset is buggy.
>
> Yes, unfortunately I think you are right. The callback idea fixes this.
> I'll respin with that.
Thanks.
Eric
arch_init_chip_data cannot be moved into struct irq_chip because
irq_desc->chip is not known at the time the irq_desc is setup. Instead
rename arch_init_chip_data to arch_init_irq_desc for PowerPC, the
only other user, whose usage better matches the new name.
To replace the x86 arch_init_chip_data functionality
irq_to_desc_alloc_node now takes a pointer to a function to allocate
the chip data. This is necessary to ensure the allocation happens
under the correct locking at the core level. On PowerPC and SH
architectures (the other users of irq_to_desc_alloc_node) pass in NULL
which retains existing chip_data behaviour.
I've retained the chip_data behaviour for uv_irq although it isn't
clear to me if these interrupt types support migration or how closely
related to the APIC modes they really are. If it weren't for this the
x86_{init,copy,free}_chip_data functions could be static to
io_apic.c.
I've tested by booting on an 64 bit x86 system with sparse IRQ enabled
and 32 bit without, but it's not clear to me what actions I need to
take to actually exercise some of these code paths.
Signed-off-by: Ian Campbell <ian.ca...@citrix.com>
Acked-by: Michael Ellerman <mic...@ellerman.id.au> [PowerPC rename portion]
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Eric W. Biederman <ebie...@xmission.com>
Cc: Yinghai Lu <yin...@kernel.org>
Cc: Jeremy Fitzhardinge <jer...@goop.org>
Cc: Benjamin Herrenschmidt <be...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: x...@kernel.org
Cc: linuxp...@ozlabs.org
Cc: linux-...@vger.kernel.org
Cc: Rusty Russell <ru...@rustcorp.com.au>
Cc: lgu...@ozlabs.org
Cc: Paul Mundt <let...@linux-sh.org>
Cc: linu...@vger.kernel.org
---
arch/powerpc/kernel/irq.c | 4 +-
arch/sh/kernel/cpu/irq/ipr.c | 2 +-
arch/x86/include/asm/hw_irq.h | 11 ++++++-
arch/x86/kernel/apic/io_apic.c | 64 ++++++++++++++++++++++++++++++++++-----
arch/x86/kernel/uv_irq.c | 5 +++
arch/x86/lguest/boot.c | 2 +-
drivers/sh/intc.c | 7 ++--
drivers/xen/events.c | 2 +-
include/linux/interrupt.h | 2 +-
include/linux/irq.h | 16 +++++++---
kernel/irq/handle.c | 15 ++++++---
kernel/irq/numa_migrate.c | 12 ++++++-
kernel/softirq.c | 3 +-
13 files changed, 112 insertions(+), 33 deletions(-)
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 64f6f20..dc5a8c1 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -670,7 +670,7 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq,
{
struct irq_desc *desc;
- desc = irq_to_desc_alloc_node(virq, 0);
+ desc = irq_to_desc_alloc_node(virq, 0, NULL);
if (!desc) {
pr_debug("irq: -> allocating desc failed\n");
goto error;
@@ -1088,7 +1088,7 @@ int arch_early_irq_init(void)
return 0;
}
-int arch_init_chip_data(struct irq_desc *desc, int node)
+int arch_init_irq_desc(struct irq_desc *desc, int node)
{
desc->status |= IRQ_NOREQUEST;
return 0;
diff --git a/arch/sh/kernel/cpu/irq/ipr.c b/arch/sh/kernel/cpu/irq/ipr.c
index 9282d96..cf31454 100644
--- a/arch/sh/kernel/cpu/irq/ipr.c
+++ b/arch/sh/kernel/cpu/irq/ipr.c
@@ -67,7 +67,7 @@ void register_ipr_controller(struct ipr_desc *desc)
BUG_ON(p->ipr_idx >= desc->nr_offsets);
BUG_ON(!desc->ipr_offsets[p->ipr_idx]);
- irq_desc = irq_to_desc_alloc_node(p->irq, numa_node_id());
+ irq_desc = irq_to_desc_alloc_node(p->irq, numa_node_id(), NULL);
if (unlikely(!irq_desc)) {
printk(KERN_INFO "can not get irq_desc for %d\n",
p->irq);
index e4e0ddc..035b6d2 100644
#endif
struct io_apic {
@@ -1515,7 +1520,7 @@ static void __init setup_IO_APIC_irqs(void)
apic->multi_timer_check(apic_id, irq))
continue;
- desc = irq_to_desc_alloc_node(irq, node);
+ desc = irq_to_desc_alloc_node(irq, node, x86_init_chip_data);
if (!desc) {
printk(KERN_INFO "can not get irq_desc for %d\n", irq);
continue;
@@ -1565,7 +1570,7 @@ void setup_IO_APIC_irq_extra(u32 gsi)
if (desc)
return;
#endif
- desc = irq_to_desc_alloc_node(irq, node);
+ desc = irq_to_desc_alloc_node(irq, node, x86_init_chip_data);
if (!desc) {
printk(KERN_INFO "can not get irq_desc for %d\n", irq);
return;
@@ -2739,6 +2744,11 @@ static struct irq_chip ioapic_chip __read_mostly = {
.set_affinity = set_ioapic_affinity_irq,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
static struct irq_chip ir_ioapic_chip __read_mostly = {
@@ -2754,6 +2764,11 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
#endif
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
static inline void init_IO_APIC_traps(void)
@@ -3256,7 +3271,8 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
raw_spin_lock_irqsave(&vector_lock, flags);
for (new = irq_want; new < nr_irqs; new++) {
- desc_new = irq_to_desc_alloc_node(new, node);
+ desc_new = irq_to_desc_alloc_node(new, node,
+ x86_init_chip_data);
if (!desc_new) {
printk(KERN_INFO "can not get irq_desc for %d\n", new);
continue;
@@ -3466,6 +3482,11 @@ static struct irq_chip msi_chip = {
.set_affinity = set_msi_irq_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
static struct irq_chip msi_ir_chip = {
@@ -3479,6 +3500,11 @@ static struct irq_chip msi_ir_chip = {
#endif
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
/*
@@ -3638,6 +3664,11 @@ static struct irq_chip dmar_msi_type = {
.set_affinity = dmar_msi_set_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
int arch_setup_dmar_msi(unsigned int irq)
@@ -3695,6 +3726,11 @@ static struct irq_chip ir_hpet_msi_type = {
#endif
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
static struct irq_chip hpet_msi_type = {
@@ -3706,6 +3742,11 @@ static struct irq_chip hpet_msi_type = {
.set_affinity = hpet_msi_set_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
@@ -3792,6 +3833,11 @@ static struct irq_chip ht_irq_chip = {
.set_affinity = set_ht_irq_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
@@ -3914,7 +3960,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
else
node = cpu_to_node(boot_cpu_id);
- desc = irq_to_desc_alloc_node(irq, node);
+ desc = irq_to_desc_alloc_node(irq, node, x86_init_chip_data);
if (!desc) {
printk(KERN_INFO "can not get irq_desc %d\n", irq);
return 0;
@@ -4312,7 +4358,7 @@ void __init pre_init_apic_IRQ0(void)
#ifndef CONFIG_SMP
phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
#endif
- desc = irq_to_desc_alloc_node(0, 0);
+ desc = irq_to_desc_alloc_node(0, 0, x86_init_chip_data);
setup_local_APIC();
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index ece73d8..df2c6d6 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -55,6 +55,11 @@ struct irq_chip uv_irq_chip = {
.eoi = uv_ack_apic,
.end = uv_noop,
.set_affinity = uv_set_irq_affinity,
+
+#ifdef CONFIG_SPARSE_IRQ
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
+#endif
};
/*
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 7e59dc1..659241a 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -841,7 +841,7 @@ static void __init lguest_init_IRQ(void)
*/
void lguest_setup_irq(unsigned int irq)
{
- irq_to_desc_alloc_node(irq, 0);
+ irq_to_desc_alloc_node(irq, 0, NULL);
set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
handle_level_irq, "level");
}
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 3a5a17d..99f89dd 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -870,7 +870,7 @@ void __init register_intc_controller(struct intc_desc *desc)
if (!vect->enum_id)
continue;
- irq_desc = irq_to_desc_alloc_node(irq, numa_node_id());
+ irq_desc = irq_to_desc_alloc_node(irq, numa_node_id(), NULL);
if (unlikely(!irq_desc)) {
pr_info("can't get irq_desc for %d\n", irq);
continue;
@@ -890,7 +890,8 @@ void __init register_intc_controller(struct intc_desc *desc)
* IRQ support, each vector still needs to have
* its own backing irq_desc.
*/
- irq_desc = irq_to_desc_alloc_node(irq2, numa_node_id());
+ irq_desc = irq_to_desc_alloc_node(irq2, numa_node_id(),
+ NULL);
if (unlikely(!irq_desc)) {
pr_info("can't get irq_desc for %d\n", irq2);
continue;
@@ -1012,7 +1013,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
__set_bit(new, intc_irq_map);
}
- desc = irq_to_desc_alloc_node(new, node);
+ desc = irq_to_desc_alloc_node(new, node, NULL);
if (unlikely(!desc)) {
pr_info("can't get irq_desc for %d\n", new);
goto out_unlock;
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 2f84137..0d4f0d3 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -341,7 +341,7 @@ static int find_unbound_irq(void)
if (irq == nr_irqs)
panic("No available IRQ to bind to: increase nr_irqs!\n");
- desc = irq_to_desc_alloc_node(irq, 0);
+ desc = irq_to_desc_alloc_node(irq, 0, NULL);
if (WARN_ON(desc == NULL))
return -1;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 75f3f00..cc4cd22 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -611,6 +611,6 @@ struct irq_desc;
extern int early_irq_init(void);
extern int arch_probe_nr_irqs(void);
extern int arch_early_irq_init(void);
-extern int arch_init_chip_data(struct irq_desc *desc, int node);
+extern int arch_init_irq_desc(struct irq_desc *desc, int node);
#endif
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 707ab12..e262ca0 100644
@@ -225,7 +229,9 @@ static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
}
#endif
-extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
+typedef int (*init_chip_data_fn)(struct irq_desc *, int node);
+extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node,
+ init_chip_data_fn fn);
/*
* Pick up the arch-dependent methods:
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 76d5a67..eb060e1 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -100,7 +100,8 @@ void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
}
}
-static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
+static void init_one_irq_desc(int irq, struct irq_desc *desc, int node,
+ init_chip_data_fn init_chip_data)
{
memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
@@ -120,7 +121,9 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
BUG_ON(1);
}
init_desc_masks(desc);
- arch_init_chip_data(desc, node);
+ arch_init_irq_desc(desc, node);
+ if (init_chip_data)
+ init_chip_data(desc, node);
}
/*
@@ -198,7 +201,8 @@ int __init early_irq_init(void)
return arch_early_irq_init();
}
-struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
+struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node,
+ init_chip_data_fn init_chip_data)
{
struct irq_desc *desc;
unsigned long flags;
@@ -227,7 +231,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
printk(KERN_ERR "can not alloc irq_desc\n");
BUG_ON(1);
}
- init_one_irq_desc(irq, desc, node);
+ init_one_irq_desc(irq, desc, node, init_chip_data);
set_irq_desc(irq, desc);
@@ -277,7 +281,8 @@ struct irq_desc *irq_to_desc(unsigned int irq)
return (irq < NR_IRQS) ? irq_desc + irq : NULL;
}
-struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
+struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node,
+ init_chip_data_fn init_chip_data)
{
return irq_to_desc(irq);
}
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 963559d..9ea09c9 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -47,7 +47,8 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
init_copy_desc_masks(old_desc, desc);
- arch_init_copy_chip_data(old_desc, desc, node);
+ if (desc->chip->copy_chip_data)
+ desc->chip->copy_chip_data(old_desc, desc, node);
return true;
}
@@ -55,7 +56,8 @@ static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
{
free_kstat_irqs(old_desc, desc);
free_desc_masks(old_desc, desc);
- arch_free_chip_data(old_desc, desc);
+ if (desc->chip->free_chip_data)
+ desc->chip->free_chip_data(old_desc, desc);
}
static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
@@ -107,9 +109,15 @@ out_unlock:
struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
{
+
/* those static or target node is -1, do not move them */
if (desc->irq < NR_IRQS_LEGACY || node == -1)
return desc;
+ /* IRQ chip does not support movement */
+ if (desc->chip_data &&
+ (desc->chip->copy_chip_data == NULL ||
+ desc->chip->free_chip_data == NULL))
+ return desc;
if (desc->node != node)
desc = __real_move_irq_desc(desc, node);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7c1a67e..3f4b80e 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -895,8 +895,7 @@ int __init __weak arch_early_irq_init(void)
{
return 0;
}
-
-int __weak arch_init_chip_data(struct irq_desc *desc, int node)
+int __weak arch_init_irq_desc(struct irq_desc *desc, int node)
{
return 0;
}
--
1.5.6.5
Since v2: pass x86_init_chip_data as a argument to
irq_to_desc_alloc_node instead of calling in the arch code in order to
get correct locking wrt the core code. Small impact on the SH arch code
which is the only other user outside x86 and powerpc.
Since v1: dropped the renaming portion of the series since it
was basically wrong, the functions I'd implicated as ioapic specific are
not at all.
Ian.
The following changes since commit 1ebbdcc83e75697c0d75eb091df172b7d93c84c1:
Ingo Molnar (1):
Merge branch 'perf/urgent'
are available in the git repository at:
git://xenbits.xensource.com/people/ianc/linux-2.6.git for-x86/irq
Ian Campbell (2):
irq: move some interrupt arch_* functions into struct irq_chip.
x86: irq_desc->chip_data is always correct whether or not SPARSE_IRQ is enabled.
arch/powerpc/kernel/irq.c | 4 +-
arch/sh/kernel/cpu/irq/ipr.c | 2 +-
arch/x86/include/asm/hw_irq.h | 11 ++++++-
arch/x86/kernel/apic/io_apic.c | 67 ++++++++++++++++++++++++++++++++--------
arch/x86/kernel/uv_irq.c | 5 +++
arch/x86/lguest/boot.c | 2 +-
drivers/sh/intc.c | 7 ++--
drivers/xen/events.c | 2 +-
include/linux/interrupt.h | 2 +-
include/linux/irq.h | 16 ++++++---
kernel/irq/handle.c | 15 ++++++---
kernel/irq/numa_migrate.c | 12 ++++++-
kernel/softirq.c | 3 +-
13 files changed, 111 insertions(+), 37 deletions(-)
Signed-off-by: Ian Campbell <ian.ca...@citrix.com>
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Eric W. Biederman <ebie...@xmission.com>
Cc: Yinghai Lu <yin...@kernel.org>
Cc: Jeremy Fitzhardinge <jer...@goop.org>
Cc: x...@kernel.org
Cc: linux-...@vger.kernel.org
---
arch/x86/kernel/apic/io_apic.c | 9 ++-------
1 files changed, 2 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 035b6d2..a7c45c5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -178,7 +178,6 @@ int __init arch_early_irq_init(void)
return 0;
}
-#ifdef CONFIG_SPARSE_IRQ
struct irq_cfg *irq_cfg(unsigned int irq)
{
struct irq_cfg *cfg = NULL;
@@ -191,6 +190,8 @@ struct irq_cfg *irq_cfg(unsigned int irq)
return cfg;
}
+#ifdef CONFIG_SPARSE_IRQ
+
static struct irq_cfg *get_one_free_irq_cfg(int node)
{
struct irq_cfg *cfg;
@@ -331,16 +332,10 @@ void x86_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
/* end for move_irq_desc */
#else
-struct irq_cfg *irq_cfg(unsigned int irq)
-{
- return irq < nr_irqs ? irq_cfgx + irq : NULL;
-}
-
int x86_init_chip_data(struct irq_desc *desc, int node)
{
return 0;
}
-
#endif
struct io_apic {
--
1.5.6.5
--
Subject: [PATCH -v4] irq: move some interrupt arch_* functions into struct irq_chip.
From: Ian Campbell <ian.ca...@citrix.com>
Move arch_init_copy_chip_data and arch_free_chip_data into function
pointers in struct irq_chip since they operate on irq_desc->chip_data.
arch_init_chip_data cannot be moved into struct irq_chip because
irq_desc->chip is not known at the time the irq_desc is setup. Instead
rename arch_init_chip_data to arch_init_irq_desc for PowerPC, the
only other user, whose usage better matches the new name.
To replace the x86 arch_init_chip_data functionality
irq_to_desc_alloc_node now takes a pointer to a function to allocate
the chip data. This is necessary to ensure the allocation happens
under the correct locking at the core level. On PowerPC and SH
architectures (the other users of irq_to_desc_alloc_node) pass in NULL
which retains existing chip_data behaviour.
I've retained the chip_data behaviour for uv_irq although it isn't
clear to me if these interrupt types support migration or how closely
related to the APIC modes they really are. If it weren't for this the
x86_{init,copy,free}_chip_data functions could be static to
io_apic.c.
I've tested by booting on an 64 bit x86 system with sparse IRQ enabled
and 32 bit without, but it's not clear to me what actions I need to
take to actually exercise some of these code paths.
-v4: yinghai add irq_to_desc_alloc_node_x...
so could leave default path not changed...
Signed-off-by: Ian Campbell <ian.ca...@citrix.com>
Acked-by: Michael Ellerman <mic...@ellerman.id.au> [PowerPC rename portion]
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Eric W. Biederman <ebie...@xmission.com>
Cc: Yinghai Lu <yin...@kernel.org>
Cc: Jeremy Fitzhardinge <jer...@goop.org>
Cc: Benjamin Herrenschmidt <be...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: x...@kernel.org
Cc: linuxp...@ozlabs.org
Cc: linux-...@vger.kernel.org
Cc: Rusty Russell <ru...@rustcorp.com.au>
Cc: lgu...@ozlabs.org
Cc: Paul Mundt <let...@linux-sh.org>
Cc: linu...@vger.kernel.org
---
arch/powerpc/kernel/irq.c | 2 -
arch/x86/include/asm/hw_irq.h | 7 +++++
arch/x86/kernel/apic/io_apic.c | 49 +++++++++++++++++++++++++++++++++++++----
arch/x86/kernel/uv_irq.c | 3 ++
drivers/xen/events.c | 7 +++++
include/linux/interrupt.h | 1
include/linux/irq.h | 20 ++++++++++++----
kernel/irq/chip.c | 7 +++++
kernel/irq/handle.c | 13 ++++++----
kernel/irq/numa_migrate.c | 12 ++++++++--
kernel/softirq.c | 5 ----
11 files changed, 101 insertions(+), 25 deletions(-)
Index: linux-2.6/arch/powerpc/kernel/irq.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/irq.c
+++ linux-2.6/arch/powerpc/kernel/irq.c
@@ -1088,7 +1088,7 @@ int arch_early_irq_init(void)
return 0;
}
-int arch_init_chip_data(struct irq_desc *desc, int node)
+int arch_init_irq_desc(struct irq_desc *desc, int node, init_chip_data_fn fn)
{
desc->status |= IRQ_NOREQUEST;
return 0;
Index: linux-2.6/arch/x86/include/asm/hw_irq.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/hw_irq.h
+++ linux-2.6/arch/x86/include/asm/hw_irq.h
@@ -20,9 +20,9 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/smp.h>
+#include <linux/irq.h>
#include <asm/atomic.h>
-#include <asm/irq.h>
#include <asm/sections.h>
/* Interrupt handlers registered during init_IRQ */
@@ -61,6 +61,11 @@ extern void init_VISWS_APIC_irqs(void);
extern void setup_IO_APIC(void);
extern void disable_IO_APIC(void);
+extern void x86_copy_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc, int node);
+extern void x86_free_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc);
+
struct io_apic_irq_attr {
int ioapic;
int ioapic_pin;
Index: linux-2.6/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apic/io_apic.c
+++ linux-2.6/arch/x86/kernel/apic/io_apic.c
@@ -211,7 +211,7 @@ static struct irq_cfg *get_one_free_irq_
return cfg;
}
-int arch_init_chip_data(struct irq_desc *desc, int node)
+static int x86_init_chip_data(struct irq_desc *desc, int node)
{
struct irq_cfg *cfg;
@@ -287,8 +287,8 @@ static void free_irq_2_pin(struct irq_cf
old_cfg->irq_2_pin = NULL;
}
-void arch_init_copy_chip_data(struct irq_desc *old_desc,
- struct irq_desc *desc, int node)
+void x86_copy_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc, int node)
{
struct irq_cfg *cfg;
struct irq_cfg *old_cfg;
@@ -312,7 +312,7 @@ static void free_irq_cfg(struct irq_cfg
kfree(old_cfg);
}
-void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+void x86_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
{
struct irq_cfg *old_cfg, *cfg;
@@ -329,6 +329,14 @@ void arch_free_chip_data(struct irq_desc
}
}
/* end for move_irq_desc */
+int arch_init_irq_desc(struct irq_desc *desc, int node,
+ init_chip_data_fn init_chip_data)
+{
+ if (!init_chip_data)
+ return x86_init_chip_data(desc, node);
+
+ return init_chip_data(desc, node);
+}
#else
struct irq_cfg *irq_cfg(unsigned int irq)
@@ -336,6 +344,15 @@ struct irq_cfg *irq_cfg(unsigned int irq
return irq < nr_irqs ? irq_cfgx + irq : NULL;
}
+void x86_copy_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc, int node)
+{
+}
+
+void x86_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+}
+
#endif
struct io_apic {
@@ -2747,6 +2764,9 @@ static struct irq_chip ioapic_chip __rea
.set_affinity = set_ioapic_affinity_irq,
#endif
.retrigger = ioapic_retrigger_irq,
+
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
};
static struct irq_chip ir_ioapic_chip __read_mostly = {
@@ -2762,6 +2782,9 @@ static struct irq_chip ir_ioapic_chip __
#endif
#endif
.retrigger = ioapic_retrigger_irq,
+
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
};
static inline void init_IO_APIC_traps(void)
@@ -3474,6 +3497,9 @@ static struct irq_chip msi_chip = {
.set_affinity = set_msi_irq_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
};
static struct irq_chip msi_ir_chip = {
@@ -3487,6 +3513,9 @@ static struct irq_chip msi_ir_chip = {
#endif
#endif
.retrigger = ioapic_retrigger_irq,
+
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
};
/*
@@ -3646,6 +3675,9 @@ static struct irq_chip dmar_msi_type = {
.set_affinity = dmar_msi_set_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
};
int arch_setup_dmar_msi(unsigned int irq)
@@ -3703,6 +3735,9 @@ static struct irq_chip ir_hpet_msi_type
#endif
#endif
.retrigger = ioapic_retrigger_irq,
+
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
};
static struct irq_chip hpet_msi_type = {
@@ -3714,6 +3749,9 @@ static struct irq_chip hpet_msi_type = {
.set_affinity = hpet_msi_set_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
};
int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
@@ -3800,6 +3838,9 @@ static struct irq_chip ht_irq_chip = {
.set_affinity = set_ht_irq_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
+
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
};
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
Index: linux-2.6/arch/x86/kernel/uv_irq.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/uv_irq.c
+++ linux-2.6/arch/x86/kernel/uv_irq.c
@@ -55,6 +55,9 @@ struct irq_chip uv_irq_chip = {
.eoi = uv_ack_apic,
.end = uv_noop,
.set_affinity = uv_set_irq_affinity,
+
+ .copy_chip_data = x86_copy_chip_data,
+ .free_chip_data = x86_free_chip_data,
};
/*
Index: linux-2.6/drivers/xen/events.c
===================================================================
--- linux-2.6.orig/drivers/xen/events.c
+++ linux-2.6/drivers/xen/events.c
@@ -329,6 +329,11 @@ static void unmask_evtchn(int port)
put_cpu();
}
+static int xen_init_chip_data(struct irq_desc *desc, int node)
+{
+ return 0;
+}
+
static int find_unbound_irq(void)
{
int irq;
@@ -341,7 +346,7 @@ static int find_unbound_irq(void)
if (irq == nr_irqs)
panic("No available IRQ to bind to: increase nr_irqs!\n");
- desc = irq_to_desc_alloc_node(irq, 0);
+ desc = irq_to_desc_alloc_node_x(irq, 0, xen_init_chip_data);
if (WARN_ON(desc == NULL))
return -1;
Index: linux-2.6/include/linux/interrupt.h
===================================================================
--- linux-2.6.orig/include/linux/interrupt.h
+++ linux-2.6/include/linux/interrupt.h
@@ -611,6 +611,5 @@ struct irq_desc;
extern int early_irq_init(void);
extern int arch_probe_nr_irqs(void);
extern int arch_early_irq_init(void);
-extern int arch_init_chip_data(struct irq_desc *desc, int node);
#endif
Index: linux-2.6/include/linux/irq.h
===================================================================
--- linux-2.6.orig/include/linux/irq.h
+++ linux-2.6/include/linux/irq.h
@@ -131,6 +131,12 @@ struct irq_chip {
void (*bus_lock)(unsigned int irq);
void (*bus_sync_unlock)(unsigned int irq);
+ /* for move_irq_desc */
+ void (*copy_chip_data)(struct irq_desc *old_desc,
+ struct irq_desc *desc, int node);
+ void (*free_chip_data)(struct irq_desc *old_desc,
+ struct irq_desc *desc);
+
/* Currently used only by UML, might disappear one day.*/
#ifdef CONFIG_IRQ_RELEASE_METHOD
void (*release)(unsigned int irq, void *dev_id);
@@ -208,10 +214,6 @@ struct irq_desc {
const char *name;
} ____cacheline_internodealigned_in_smp;
-extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
- struct irq_desc *desc, int node);
-extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
-
#ifndef CONFIG_SPARSE_IRQ
extern struct irq_desc irq_desc[NR_IRQS];
#endif
@@ -225,7 +227,15 @@ static inline struct irq_desc *move_irq_
}
#endif
-extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
+typedef int (*init_chip_data_fn)(struct irq_desc *, int node);
+int arch_init_irq_desc(struct irq_desc *desc, int node, init_chip_data_fn fn);
+struct irq_desc *irq_to_desc_alloc_node_x(unsigned int irq, int node,
+ init_chip_data_fn fn);
+static inline struct irq_desc *irq_to_desc_alloc_node(unsigned int irq,
+ int node)
+{
+ return irq_to_desc_alloc_node_x(irq, node, NULL);
+}
/*
* Pick up the arch-dependent methods:
Index: linux-2.6/kernel/irq/handle.c
===================================================================
--- linux-2.6.orig/kernel/irq/handle.c
+++ linux-2.6/kernel/irq/handle.c
@@ -100,7 +100,8 @@ void __ref init_kstat_irqs(struct irq_de
}
}
-static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
+static void init_one_irq_desc(int irq, struct irq_desc *desc, int node,
+ init_chip_data_fn init_chip_data)
{
memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
@@ -120,7 +121,7 @@ static void init_one_irq_desc(int irq, s
BUG_ON(1);
}
init_desc_masks(desc);
- arch_init_chip_data(desc, node);
+ arch_init_irq_desc(desc, node, init_chip_data);
}
/*
@@ -198,7 +199,8 @@ int __init early_irq_init(void)
return arch_early_irq_init();
}
-struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
+struct irq_desc * __ref irq_to_desc_alloc_node_x(unsigned int irq, int node,
+ init_chip_data_fn init_chip_data)
{
struct irq_desc *desc;
unsigned long flags;
@@ -227,7 +229,7 @@ struct irq_desc * __ref irq_to_desc_allo
printk(KERN_ERR "can not alloc irq_desc\n");
BUG_ON(1);
}
- init_one_irq_desc(irq, desc, node);
+ init_one_irq_desc(irq, desc, node, init_chip_data);
set_irq_desc(irq, desc);
@@ -277,7 +279,8 @@ struct irq_desc *irq_to_desc(unsigned in
return (irq < NR_IRQS) ? irq_desc + irq : NULL;
}
-struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
+struct irq_desc *irq_to_desc_alloc_node_x(unsigned int irq, int node,
+ init_chip_data_fn init_chip_data)
{
return irq_to_desc(irq);
}
Index: linux-2.6/kernel/irq/numa_migrate.c
===================================================================
--- linux-2.6.orig/kernel/irq/numa_migrate.c
+++ linux-2.6/kernel/irq/numa_migrate.c
@@ -47,7 +47,8 @@ static bool init_copy_one_irq_desc(int i
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
init_copy_desc_masks(old_desc, desc);
- arch_init_copy_chip_data(old_desc, desc, node);
+ if (desc->chip->copy_chip_data)
+ desc->chip->copy_chip_data(old_desc, desc, node);
return true;
}
@@ -55,7 +56,8 @@ static void free_one_irq_desc(struct irq
{
free_kstat_irqs(old_desc, desc);
free_desc_masks(old_desc, desc);
- arch_free_chip_data(old_desc, desc);
+ if (desc->chip->free_chip_data)
+ desc->chip->free_chip_data(old_desc, desc);
}
static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
@@ -107,9 +109,15 @@ out_unlock:
struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
{
+
/* those static or target node is -1, do not move them */
if (desc->irq < NR_IRQS_LEGACY || node == -1)
return desc;
+ /* IRQ chip does not support movement */
+ if (desc->chip_data &&
+ (desc->chip->copy_chip_data == NULL ||
+ desc->chip->free_chip_data == NULL))
+ return desc;
if (desc->node != node)
desc = __real_move_irq_desc(desc, node);
Index: linux-2.6/kernel/softirq.c
===================================================================
--- linux-2.6.orig/kernel/softirq.c
+++ linux-2.6/kernel/softirq.c
@@ -895,8 +895,3 @@ int __init __weak arch_early_irq_init(vo
{
return 0;
}
-
-int __weak arch_init_chip_data(struct irq_desc *desc, int node)
-{
- return 0;
-}
Index: linux-2.6/kernel/irq/chip.c
===================================================================
--- linux-2.6.orig/kernel/irq/chip.c
+++ linux-2.6/kernel/irq/chip.c
@@ -758,3 +758,10 @@ void __init set_irq_probe(unsigned int i
desc->status &= ~IRQ_NOPROBE;
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
+
+int __weak arch_init_irq_desc(struct irq_desc *desc, int node,
+ init_chip_data_fn init_chip_data)
+{
+ return 0;
+}
+