Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH 2/5] genirq: Allow the affinity of a percpu interrupt to be set/retrieved

9 views
Skip to first unread message

Marc Zyngier

unread,
Apr 11, 2016, 5:00:07 AM4/11/16
to
In order to prepare the genirq layer for the concept of partitionned
percpu interrupts, let's allow an affinity to be associated with
such an interrupt. We introduce:

- irq_set_percpu_devid_partition: flag an interrupt as a percpu-devid
interrupt, and associate it with an affinity
- irq_get_percpu_devid_partition: allow the affinity of that interrupt
to be retrieved.

This will allow a driver to discover which CPUs the per-cpu interrupt
can actually fire on.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
include/linux/irq.h | 4 ++++
include/linux/irqdesc.h | 1 +
kernel/irq/irqdesc.c | 26 +++++++++++++++++++++++++-
3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index c4de623..4d758a7 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -530,6 +530,10 @@ static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *c
}

extern int irq_set_percpu_devid(unsigned int irq);
+extern int irq_set_percpu_devid_partition(unsigned int irq,
+ const struct cpumask *affinity);
+extern int irq_get_percpu_devid_partition(unsigned int irq,
+ struct cpumask *affinity);

extern void
__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index dcca77c..b51beeb 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -66,6 +66,7 @@ struct irq_desc {
int threads_handled_last;
raw_spinlock_t lock;
struct cpumask *percpu_enabled;
+ const struct cpumask *percpu_affinity;
#ifdef CONFIG_SMP
const struct cpumask *affinity_hint;
struct irq_affinity_notify *affinity_notify;
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 0ccd028..8731e1c 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -595,7 +595,8 @@ void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus)
chip_bus_sync_unlock(desc);
}

-int irq_set_percpu_devid(unsigned int irq)
+int irq_set_percpu_devid_partition(unsigned int irq,
+ const struct cpumask *affinity)
{
struct irq_desc *desc = irq_to_desc(irq);

@@ -610,10 +611,33 @@ int irq_set_percpu_devid(unsigned int irq)
if (!desc->percpu_enabled)
return -ENOMEM;

+ if (affinity)
+ desc->percpu_affinity = affinity;
+ else
+ desc->percpu_affinity = cpu_possible_mask;
+
irq_set_percpu_devid_flags(irq);
return 0;
}

+int irq_set_percpu_devid(unsigned int irq)
+{
+ return irq_set_percpu_devid_partition(irq, NULL);
+}
+
+int irq_get_percpu_devid_partition(unsigned int irq, struct cpumask *affinity)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (!desc || !desc->percpu_enabled)
+ return -EINVAL;
+
+ if (affinity)
+ cpumask_copy(affinity, desc->percpu_affinity);
+
+ return 0;
+}
+
void kstat_incr_irq_this_cpu(unsigned int irq)
{
kstat_incr_irqs_this_cpu(irq_to_desc(irq));
--
2.1.4

Marc Zyngier

unread,
Apr 11, 2016, 5:00:07 AM4/11/16
to
When iterating over the irq domain list, we try to match a domain
either by calling a match() function or by comparing a number
of fields passed as parameters.

Both approaches are a bit restrictive:
- match() is DT specific and only takes a device node
- the fallback case only deals with the fwnode_handle

It would be useful if we had a per-domain function that would
actually perform the matching check on the whole of the
irq_fwspec structure. This would allow for a domain to triage
matching attempts that need to extend beyond the fwnode.

Let's introduce irq_find_matching_fwspec(), which takes a full
blown irq_fwspec structure, and call into a select() function
implemented by the irqdomain. irq_find_matching_fwnode() is
made a wrapper around irq_find_matching_fwspec in order to
preserve compatibility.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
include/linux/irqdomain.h | 15 ++++++++++++++-
kernel/irq/irqdomain.c | 19 ++++++++++---------
2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 2aed043..997373b 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -96,6 +96,8 @@ enum irq_domain_bus_token {
struct irq_domain_ops {
int (*match)(struct irq_domain *d, struct device_node *node,
enum irq_domain_bus_token bus_token);
+ int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token);
int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw);
void (*unmap)(struct irq_domain *d, unsigned int virq);
int (*xlate)(struct irq_domain *d, struct device_node *node,
@@ -211,7 +213,7 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
irq_hw_number_t first_hwirq,
const struct irq_domain_ops *ops,
void *host_data);
-extern struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
+extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
enum irq_domain_bus_token bus_token);
extern void irq_set_default_host(struct irq_domain *host);
extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
@@ -227,6 +229,17 @@ static inline bool is_fwnode_irqchip(struct fwnode_handle *fwnode)
return fwnode && fwnode->type == FWNODE_IRQCHIP;
}

+static inline
+struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
+ enum irq_domain_bus_token bus_token)
+{
+ struct irq_fwspec fwspec = {
+ .fwnode = fwnode,
+ };
+
+ return irq_find_matching_fwspec(&fwspec, bus_token);
+}
+
static inline struct irq_domain *irq_find_matching_host(struct device_node *node,
enum irq_domain_bus_token bus_token)
{
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 3a519a0..503c5b9 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -243,14 +243,15 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
EXPORT_SYMBOL_GPL(irq_domain_add_legacy);

/**
- * irq_find_matching_fwnode() - Locates a domain for a given fwnode
- * @fwnode: FW descriptor of the interrupt controller
+ * irq_find_matching_fwspec() - Locates a domain for a given fwspec
+ * @fwspec: FW specifier for an interrupt
* @bus_token: domain-specific data
*/
-struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
+struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
enum irq_domain_bus_token bus_token)
{
struct irq_domain *h, *found = NULL;
+ struct fwnode_handle *fwnode = fwspec->fwnode;
int rc;

/* We might want to match the legacy controller last since
@@ -264,7 +265,9 @@ struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
*/
mutex_lock(&irq_domain_mutex);
list_for_each_entry(h, &irq_domain_list, link) {
- if (h->ops->match)
+ if (h->ops->select && fwspec->param_count)
+ rc = h->ops->select(h, fwspec, bus_token);
+ else if (h->ops->match)
rc = h->ops->match(h, to_of_node(fwnode), bus_token);
else
rc = ((fwnode != NULL) && (h->fwnode == fwnode) &&
@@ -279,7 +282,7 @@ struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
mutex_unlock(&irq_domain_mutex);
return found;
}
-EXPORT_SYMBOL_GPL(irq_find_matching_fwnode);
+EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);

/**
* irq_set_default_host() - Set a "default" irq domain
@@ -574,11 +577,9 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
int virq;

if (fwspec->fwnode) {
- domain = irq_find_matching_fwnode(fwspec->fwnode,
- DOMAIN_BUS_WIRED);
+ domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED);
if (!domain)
- domain = irq_find_matching_fwnode(fwspec->fwnode,
- DOMAIN_BUS_ANY);
+ domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_ANY);
} else {
domain = irq_default_domain;
}
--
2.1.4

Marc Zyngier

unread,
Apr 11, 2016, 5:00:11 AM4/11/16
to
We've unfortunately started seeing a situation where percpu interrupts
are partitioned in the system: one arbitrary set of CPUs has an
interrupt connected to a type of device, while another disjoint set of
CPUs has the same interrupt connected to another type of device.

This makes it impossible to have a device driver requesting this
interrupt using the current percpu-interrupt abstraction, as the same
interrupt number is now potentially claimed by at least two drivers,
and we forbid interrupt sharing on per-cpu interrupt.

A potential solution to this has been proposed by Will Deacon,
expanding the handling in the core code:

http://lists.infradead.org/pipermail/linux-arm-kernel/2015-November/388800.html

followed by a counter-proposal from Thomas Gleixner, which Will tried
to implement, but ran into issues where the probing code was running
in preemptible context, making the percpu-ness of interrupts difficult
to guarantee.

Another approach to this is to turn things upside down. Let's assume
that our system describes all the possible partitions for a given
interrupt, and give each of them a unique identifier. It is then
possible to create a namespace where the affinity identifier itself is
a form of interrupt number. At this point, it becomes easy to
implement a set of partitions as a cascaded irqchip, each affinity
identifier being the secondary HW irq, as outlined in the following
example:

Aff-0: { cpu0 cpu3 }
Aff-1: { cpu1 cpu2 }
Aff-2: { cpu4 cpu5 cpu6 cpu7 }

Let's assume that HW interrupt 1 is partitioned over these 3
affinities. When HW interrupt 1 fires on a given CPU, all it takes is
to find out which affinity this CPU belongs to, which gives us a new
HW interrupt number. Bingo. Of course, this only works as long as you
don't have overlapping affinities (but if you do your system is broken
anyway).

This allows us to keep a number of nice properties:

- Each partition results in a separate percpu-interrupt (with a
restricted affinity), which keeps drivers happy. This alone
garantees that we do not have to change the programming model for
per-cpu interrupts.

- Because the underlying interrupt is still per-cpu, the overhead of
the indirection can be kept pretty minimal.

- The core code can ignore most of that crap.

For that purpose, we implement a small library that deals with some of
the boilerplate code, relying on platform-specific drivers to provide
a description of the affinity sets and a set of callbacks. This also
relies on a small change in the irqdomain layer, and now offers a way
for the affinity of a percpu interrupt to be retrieved by a driver.

As an example, the GICv3 driver has been adapted to use this new
feature. Patches on top of v4.6-r3, tested on an arm64 FVP model.

Marc Zyngier (5):
irqdomain: Allow domain matching on irq_fwspec
genirq: Allow the affinity of a percpu interrupt to be set/retrieved
irqchip: Add per-cpu interrupt partitioning library
irqchip/gic-v3: Add support for partitioned PPIs
DT: arm,gic-v3: Documment PPI partition support

.../bindings/interrupt-controller/arm,gic-v3.txt | 34 ++-
drivers/irqchip/Kconfig | 4 +
drivers/irqchip/Makefile | 1 +
drivers/irqchip/irq-gic-v3.c | 176 +++++++++++++-
drivers/irqchip/irq-partition-percpu.c | 256 +++++++++++++++++++++
include/linux/irq.h | 4 +
include/linux/irqchip/irq-partition-percpu.h | 59 +++++
include/linux/irqdesc.h | 1 +
include/linux/irqdomain.h | 15 +-
kernel/irq/irqdesc.c | 26 ++-
kernel/irq/irqdomain.c | 19 +-
11 files changed, 580 insertions(+), 15 deletions(-)
create mode 100644 drivers/irqchip/irq-partition-percpu.c
create mode 100644 include/linux/irqchip/irq-partition-percpu.h

--
2.1.4

Marc Zyngier

unread,
Apr 28, 2016, 10:50:07 AM4/28/16
to
Any comment on this? The Rockchip dudes have confirmed that this solves
their problems (big-little system with PMUs using the same PPI).

I've also posted a proof of concept patch for the ARM PMU over there:

https://lkml.org/lkml/2016/4/25/227

Thanks,

M.
--
Jazz is not dead. It just smells funny...

Thomas Gleixner

unread,
Apr 28, 2016, 1:30:07 PM4/28/16
to
On Thu, 28 Apr 2016, Marc Zyngier wrote:
> Any comment on this? The Rockchip dudes have confirmed that this solves
> their problems (big-little system with PMUs using the same PPI).

It's in my backlog ....

tip-bot for Marc Zyngier

unread,
May 2, 2016, 8:40:07 AM5/2/16
to
Commit-ID: 651e8b54abdeeaa36f5f54ffa05c18707a3cc1d0
Gitweb: http://git.kernel.org/tip/651e8b54abdeeaa36f5f54ffa05c18707a3cc1d0
Author: Marc Zyngier <marc.z...@arm.com>
AuthorDate: Mon, 11 Apr 2016 09:57:51 +0100
Committer: Thomas Gleixner <tg...@linutronix.de>
CommitDate: Mon, 2 May 2016 13:42:50 +0200

irqdomain: Allow domain matching on irq_fwspec

When iterating over the irq domain list, we try to match a domain
either by calling a match() function or by comparing a number
of fields passed as parameters.

Both approaches are a bit restrictive:
- match() is DT specific and only takes a device node
- the fallback case only deals with the fwnode_handle

It would be useful if we had a per-domain function that would
actually perform the matching check on the whole of the
irq_fwspec structure. This would allow for a domain to triage
matching attempts that need to extend beyond the fwnode.

Let's introduce irq_find_matching_fwspec(), which takes a full
blown irq_fwspec structure, and call into a select() function
implemented by the irqdomain. irq_find_matching_fwnode() is
made a wrapper around irq_find_matching_fwspec in order to
preserve compatibility.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
Cc: Mark Rutland <mark.r...@arm.com>
Cc: devic...@vger.kernel.org
Cc: Jason Cooper <ja...@lakedaemon.net>
Cc: Will Deacon <will....@arm.com>
Cc: Rob Herring <rob...@kernel.org>
Link: http://lkml.kernel.org/r/1460365075-7316-2-git-...@arm.com
Signed-off-by: Thomas Gleixner <tg...@linutronix.de>

---
include/linux/irqdomain.h | 15 ++++++++++++++-
kernel/irq/irqdomain.c | 19 ++++++++++---------
2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 736abd7..f1f36e0 100644

tip-bot for Marc Zyngier

unread,
May 2, 2016, 8:40:08 AM5/2/16
to
Commit-ID: 222df54fd8b7641dcc81476f157806bb3144ee1d
Gitweb: http://git.kernel.org/tip/222df54fd8b7641dcc81476f157806bb3144ee1d
Author: Marc Zyngier <marc.z...@arm.com>
AuthorDate: Mon, 11 Apr 2016 09:57:52 +0100
Committer: Thomas Gleixner <tg...@linutronix.de>
CommitDate: Mon, 2 May 2016 13:42:51 +0200

genirq: Allow the affinity of a percpu interrupt to be set/retrieved

In order to prepare the genirq layer for the concept of partitionned
percpu interrupts, let's allow an affinity to be associated with
such an interrupt. We introduce:

- irq_set_percpu_devid_partition: flag an interrupt as a percpu-devid
interrupt, and associate it with an affinity
- irq_get_percpu_devid_partition: allow the affinity of that interrupt
to be retrieved.

This will allow a driver to discover which CPUs the per-cpu interrupt
can actually fire on.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
Cc: Mark Rutland <mark.r...@arm.com>
Cc: devic...@vger.kernel.org
Cc: Jason Cooper <ja...@lakedaemon.net>
Cc: Will Deacon <will....@arm.com>
Cc: Rob Herring <rob...@kernel.org>
Link: http://lkml.kernel.org/r/1460365075-7316-3-git-...@arm.com
Signed-off-by: Thomas Gleixner <tg...@linutronix.de>

Geert Uytterhoeven

unread,
May 19, 2016, 7:10:06 AM5/19/16
to
Hi Marc,

On Mon, Apr 11, 2016 at 10:57 AM, Marc Zyngier <marc.z...@arm.com> wrote:
> In order to prepare the genirq layer for the concept of partitionned
> percpu interrupts, let's allow an affinity to be associated with
> such an interrupt. We introduce:
>
> - irq_set_percpu_devid_partition: flag an interrupt as a percpu-devid
> interrupt, and associate it with an affinity
> - irq_get_percpu_devid_partition: allow the affinity of that interrupt
> to be retrieved.
>
> This will allow a driver to discover which CPUs the per-cpu interrupt
> can actually fire on.
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>

> --- a/include/linux/irqdesc.h
> +++ b/include/linux/irqdesc.h
> @@ -66,6 +66,7 @@ struct irq_desc {
> int threads_handled_last;
> raw_spinlock_t lock;
> struct cpumask *percpu_enabled;
> + const struct cpumask *percpu_affinity;

Adding this field showed up on my bloat-o-meter radar...

Does it make sense to move it (and percpu_enabled) inside the "#ifdef
CONFIG_SMP" below, and rework the code to not need it on UP?

> #ifdef CONFIG_SMP
> const struct cpumask *affinity_hint;
> struct irq_affinity_notify *affinity_notify;

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

Marc Zyngier

unread,
May 19, 2016, 9:20:09 AM5/19/16
to
Hi Geert,

On 19/05/16 12:08, Geert Uytterhoeven wrote:
> Hi Marc,
>
> On Mon, Apr 11, 2016 at 10:57 AM, Marc Zyngier <marc.z...@arm.com> wrote:
>> In order to prepare the genirq layer for the concept of partitionned
>> percpu interrupts, let's allow an affinity to be associated with
>> such an interrupt. We introduce:
>>
>> - irq_set_percpu_devid_partition: flag an interrupt as a percpu-devid
>> interrupt, and associate it with an affinity
>> - irq_get_percpu_devid_partition: allow the affinity of that interrupt
>> to be retrieved.
>>
>> This will allow a driver to discover which CPUs the per-cpu interrupt
>> can actually fire on.
>>
>> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
>
>> --- a/include/linux/irqdesc.h
>> +++ b/include/linux/irqdesc.h
>> @@ -66,6 +66,7 @@ struct irq_desc {
>> int threads_handled_last;
>> raw_spinlock_t lock;
>> struct cpumask *percpu_enabled;
>> + const struct cpumask *percpu_affinity;
>
> Adding this field showed up on my bloat-o-meter radar...

By how much?

> Does it make sense to move it (and percpu_enabled) inside the "#ifdef
> CONFIG_SMP" below, and rework the code to not need it on UP?
>
>> #ifdef CONFIG_SMP
>> const struct cpumask *affinity_hint;
>> struct irq_affinity_notify *affinity_notify;

I wonder if we couldn't actually unify affinity_hint and
percpu_affinity. I'll have a look.

Geert Uytterhoeven

unread,
May 19, 2016, 9:30:08 AM5/19/16
to
Hi Marc,

On Thu, May 19, 2016 at 3:13 PM, Marc Zyngier <marc.z...@arm.com> wrote:
> On 19/05/16 12:08, Geert Uytterhoeven wrote:
>> On Mon, Apr 11, 2016 at 10:57 AM, Marc Zyngier <marc.z...@arm.com> wrote:
>>> In order to prepare the genirq layer for the concept of partitionned
>>> percpu interrupts, let's allow an affinity to be associated with
>>> such an interrupt. We introduce:
>>>
>>> - irq_set_percpu_devid_partition: flag an interrupt as a percpu-devid
>>> interrupt, and associate it with an affinity
>>> - irq_get_percpu_devid_partition: allow the affinity of that interrupt
>>> to be retrieved.
>>>
>>> This will allow a driver to discover which CPUs the per-cpu interrupt
>>> can actually fire on.
>>>
>>> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
>>
>>> --- a/include/linux/irqdesc.h
>>> +++ b/include/linux/irqdesc.h
>>> @@ -66,6 +66,7 @@ struct irq_desc {
>>> int threads_handled_last;
>>> raw_spinlock_t lock;
>>> struct cpumask *percpu_enabled;
>>> + const struct cpumask *percpu_affinity;
>>
>> Adding this field showed up on my bloat-o-meter radar...
>
> By how much?

By NR_IRQS * 4 bytes, which ranges from 32 to 1024 bytes on m68k,
depending on the platform.

>> Does it make sense to move it (and percpu_enabled) inside the "#ifdef
>> CONFIG_SMP" below, and rework the code to not need it on UP?
>>
>>> #ifdef CONFIG_SMP
>>> const struct cpumask *affinity_hint;
>>> struct irq_affinity_notify *affinity_notify;
>
> I wonder if we couldn't actually unify affinity_hint and
> percpu_affinity. I'll have a look.

Thanks!
0 new messages