Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH v3 47/59] KVM: arm/arm64: GICv4: Propagate property updates to VLPIs

173 views
Skip to first unread message

Marc Zyngier

unread,
Jul 31, 2017, 1:30:09 PM7/31/17
to
Upon updating a property, we propagate it all the way to the physical
ITS, and ask for an INV command to be executed there.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
virt/kvm/arm/vgic/vgic-its.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 65cc77fde609..9e46081e5f15 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -297,6 +297,9 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
spin_unlock(&irq->irq_lock);
}

+ if (irq->hw)
+ return its_prop_update_vlpi(irq->host_irq, prop, true);
+
return 0;
}

--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:30:09 PM7/31/17
to
Let's use the irq bypass mechanism introduced for platform device
interrupts to intercept the virtual PCIe endpoint configuration
and establish our LPI->VLPI mapping.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
include/kvm/arm_vgic.h | 8 ++++
virt/kvm/arm/arm.c | 27 ++++++++----
virt/kvm/arm/vgic/vgic-v4.c | 103 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 130 insertions(+), 8 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 359eeffe9857..050f78d4fb42 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -367,4 +367,12 @@ int kvm_vgic_set_forwarding(struct kvm *kvm, unsigned int host_irq,
void kvm_vgic_unset_forwarding(struct kvm *kvm, unsigned int host_irq,
unsigned int vintid);

+struct kvm_kernel_irq_routing_entry;
+
+int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
+ struct kvm_kernel_irq_routing_entry *irq_entry);
+
+int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
+ struct kvm_kernel_irq_routing_entry *irq_entry);
+
#endif /* __KVM_ARM_VGIC_H */
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index ebab6c29e3be..6803ea27c47d 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1457,11 +1457,16 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);

- if (prod->type != IRQ_BYPASS_VFIO_PLATFORM)
+ switch (prod->type) {
+ case IRQ_BYPASS_VFIO_PLATFORM:
+ return kvm_vgic_set_forwarding(irqfd->kvm, prod->irq,
+ irqfd->gsi + VGIC_NR_PRIVATE_IRQS);
+ case IRQ_BYPASS_VFIO_PCI_MSI:
+ return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
+ &irqfd->irq_entry);
+ default:
return 0;
-
- return kvm_vgic_set_forwarding(irqfd->kvm, prod->irq,
- irqfd->gsi + VGIC_NR_PRIVATE_IRQS);
+ }
}
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
@@ -1469,11 +1474,17 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);

- if (prod->type != IRQ_BYPASS_VFIO_PLATFORM)
- return;
+ switch (prod->type) {
+ case IRQ_BYPASS_VFIO_PLATFORM:
+ kvm_vgic_unset_forwarding(irqfd->kvm, prod->irq,
+ irqfd->gsi + VGIC_NR_PRIVATE_IRQS);
+ break;

- kvm_vgic_unset_forwarding(irqfd->kvm, prod->irq,
- irqfd->gsi + VGIC_NR_PRIVATE_IRQS);
+ case IRQ_BYPASS_VFIO_PCI_MSI:
+ kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
+ &irqfd->irq_entry);
+ break;
+ }
}

void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 207e1fda0dcd..338c86c5159f 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -72,3 +72,106 @@ void vgic_v4_teardown(struct kvm *kvm)
its_vm->nr_vpes = 0;
its_vm->vpes = NULL;
}
+
+static struct vgic_its *vgic_get_its(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *irq_entry)
+{
+ struct kvm_msi msi = (struct kvm_msi) {
+ .address_lo = irq_entry->msi.address_lo,
+ .address_hi = irq_entry->msi.address_hi,
+ .data = irq_entry->msi.data,
+ .flags = irq_entry->msi.flags,
+ .devid = irq_entry->msi.devid,
+ };
+
+ /*
+ * Get a reference on the LPI. If NULL, this is not a valid
+ * translation for any of our vITSs.
+ */
+ return vgic_msi_to_its(kvm, &msi);
+}
+
+int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
+ struct kvm_kernel_irq_routing_entry *irq_entry)
+{
+ struct vgic_its *its;
+ struct vgic_irq *irq;
+ struct its_vlpi_map map;
+ int ret;
+
+ if (!vgic_is_v4_capable(kvm))
+ return 0;
+
+ /*
+ * Get the ITS, and escape early on error (not a valid
+ * doorbell for any of our vITSs).
+ */
+ its = vgic_get_its(kvm, irq_entry);
+ if (IS_ERR(its))
+ return 0;
+
+ mutex_lock(&its->its_lock);
+
+ /* Perform then actual DevID/EventID -> LPI translation. */
+ ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
+ irq_entry->msi.data, &irq);
+ if (ret)
+ goto out;
+
+ /*
+ * Emit the mapping request. If it fails, the ITS probably
+ * isn't v4 compatible, so let's silently bail out. Holding
+ * the ITS lock should ensure that nothing can modify the
+ * target vcpu.
+ */
+ map = (struct its_vlpi_map) {
+ .vm = &kvm->arch.vgic.its_vm,
+ .vintid = irq->intid,
+ .db_enabled = true,
+ .vpe_idx = irq->target_vcpu->vcpu_id,
+ };
+
+ if (its_map_vlpi(virq, &map))
+ goto out;
+
+ irq->hw = true;
+ irq->host_irq = virq;
+
+out:
+ mutex_unlock(&its->its_lock);
+ return 0;
+}
+
+int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
+ struct kvm_kernel_irq_routing_entry *irq_entry)
+{
+ struct vgic_its *its;
+ struct vgic_irq *irq;
+ int ret;
+
+ if (!vgic_is_v4_capable(kvm))
+ return 0;
+
+ /*
+ * Get the ITS, and escape early on error (not a valid
+ * doorbell for any of our vITSs).
+ */
+ its = vgic_get_its(kvm, irq_entry);
+ if (IS_ERR(its))
+ return 0;
+
+ mutex_lock(&its->its_lock);
+
+ ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
+ irq_entry->msi.data, &irq);
+ if (ret)
+ goto out;
+
+ WARN_ON(!(irq->hw && irq->host_irq == virq));
+ irq->hw = false;
+ ret = its_unmap_vlpi(virq);
+
+out:
+ mutex_unlock(&its->its_lock);
+ return ret;
+}
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:30:09 PM7/31/17
to
When a guest issues a INVALL command targetting a collection, it must
be translated into a VINVALL for the VPE that has this collection.

This patch implements a hook that offers this functionallity to the
hypervisor.

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index d94a51489f6c..16cdd1f60ebf 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2226,6 +2226,10 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
its_vpe_deschedule(vpe);
return 0;

+ case INVALL_VPE:
+ its_send_vinvall(vpe);
+ return 0;
+
default:
return -EINVAL;
}
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:30:09 PM7/31/17
to
Add the required interfaces to schedule a VPE and perform a
VINVALL command.

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Reviewed-by: Eric Auger <eric....@redhat.com>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v4.c | 25 +++++++++++++++++++++++++
include/linux/irqchip/arm-gic-v4.h | 2 ++
2 files changed, 27 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
index f7fe6181ec8d..86b46fb532ab 100644
--- a/drivers/irqchip/irq-gic-v4.c
+++ b/drivers/irqchip/irq-gic-v4.c
@@ -71,3 +71,28 @@ void its_free_vcpu_irqs(struct its_vm *vm)
irq_domain_remove(vm->domain);
irq_domain_free_fwnode(vm->fwnode);
}
+
+static int its_send_vpe_cmd(struct its_vpe *vpe, struct its_cmd_info *info)
+{
+ return irq_set_vcpu_affinity(vpe->irq, info);
+}
+
+int its_schedule_vpe(struct its_vpe *vpe, bool on)
+{
+ struct its_cmd_info info;
+
+ WARN_ON(preemptible());
+
+ info.cmd_type = on ? SCHEDULE_VPE : DESCHEDULE_VPE;
+
+ return its_send_vpe_cmd(vpe, &info);
+}
+
+int its_invall_vpe(struct its_vpe *vpe)
+{
+ struct its_cmd_info info = {
+ .cmd_type = INVALL_VPE,
+ };
+
+ return its_send_vpe_cmd(vpe, &info);
+}
diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index b31fb8aeba03..faaafd12d8c7 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -91,5 +91,7 @@ struct its_cmd_info {

int its_alloc_vcpu_irqs(struct its_vm *vm);
void its_free_vcpu_irqs(struct its_vm *vm);
+int its_schedule_vpe(struct its_vpe *vpe, bool on);
+int its_invall_vpe(struct its_vpe *vpe);

#endif
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:30:14 PM7/31/17
to
Add a bunch of GICv4-specific data structures that will get used in
subsequent patches.

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
include/linux/irqchip/arm-gic-v4.h | 92 ++++++++++++++++++++++++++++++++++++++
1 file changed, 92 insertions(+)
create mode 100644 include/linux/irqchip/arm-gic-v4.h

diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
new file mode 100644
index 000000000000..5ba0fb6b094d
--- /dev/null
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2016,2017 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.z...@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __LINUX_IRQCHIP_ARM_GIC_V4_H
+#define __LINUX_IRQCHIP_ARM_GIC_V4_H
+
+struct its_vpe;
+
+/* Embedded in kvm.arch */
+struct its_vm {
+ struct fwnode_handle *fwnode;
+ struct irq_domain *domain;
+ struct page *vprop_page;
+ struct its_vpe **vpes;
+ int nr_vpes;
+ irq_hw_number_t db_lpi_base;
+ unsigned long *db_bitmap;
+ int nr_db_lpis;
+};
+
+/* Embedded in kvm_vcpu.arch */
+struct its_vpe {
+ struct page *vpt_page;
+ struct its_vm *its_vm;
+ /* Doorbell interrupt */
+ int irq;
+ irq_hw_number_t vpe_db_lpi;
+ /*
+ * This collection ID is used to indirect the target
+ * redistributor for this VPE. The ID itself isn't involved in
+ * programming of the ITS.
+ */
+ u16 col_idx;
+ /* Unique (system-wide) VPE identifier */
+ u16 vpe_id;
+ /* Implementation Defined Area Invalid */
+ bool idai;
+ /* Pending VLPIs on schedule out? */
+ bool pending_last;
+};
+
+/*
+ * struct its_vlpi_map: structure describing the mapping of a
+ * VLPI. Only to be interpreted in the context of a physical interrupt
+ * it complements. To be used as the vcpu_info passed to
+ * irq_set_vcpu_affinity().
+ *
+ * @vm: Pointer to the GICv4 notion of a VM
+ * @vintid: Virtual LPI number
+ * @db_enabled: Is the VPE doorbell to be generated?
+ * @vpe_idx: Index (0-based) of the VPE in this VM. Not the vpe_id!
+ */
+struct its_vlpi_map {
+ struct its_vm *vm;
+ u32 vintid;
+ bool db_enabled;
+ u16 vpe_idx;
+};
+
+enum its_vcpu_info_cmd_type {
+ MAP_VLPI,
+ GET_VLPI,
+ PROP_UPDATE_VLPI,
+ PROP_UPDATE_AND_INV_VLPI,
+ SCHEDULE_VPE,
+ DESCHEDULE_VPE,
+ INVALL_VPE,
+};
+
+struct its_cmd_info {
+ enum its_vcpu_info_cmd_type cmd_type;
+ union {
+ struct its_vlpi_map *map;
+ u8 config;
+ };
+};
+
+#endif
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:30:15 PM7/31/17
to
Add the basic GICv4 VPE (vcpu in GICv4 parlance) infrastructure
(irqchip, irq domain) that is going to be populated in the following
patches.

Reviewed-by: Eric Auger <eric....@redhat.com>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 33 ++++++++++++++++++++++++++++++++-
1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index b618c259be8b..1fa345d53088 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2026,6 +2026,13 @@ static const struct irq_domain_ops its_domain_ops = {
.deactivate = its_irq_domain_deactivate,
};

+static struct irq_chip its_vpe_irq_chip = {
+ .name = "GICv4-vpe",
+};
+
+static const struct irq_domain_ops its_vpe_domain_ops = {
+};
+
static int its_force_quiescent(void __iomem *base)
{
u32 count = 1000000; /* 1s */
@@ -2142,6 +2149,11 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its)
return 0;
}

+static int its_init_vpe_domain(void)
+{
+ return 0;
+}
+
static int __init its_compute_its_list_map(struct resource *res,
void __iomem *its_base)
{
@@ -2484,6 +2496,9 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
struct irq_domain *parent_domain)
{
struct device_node *of_node;
+ struct its_node *its;
+ bool has_v4 = false;
+ int err;

its_parent = parent_domain;
of_node = to_of_node(handle);
@@ -2498,5 +2513,21 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
}

gic_rdists = rdists;
- return its_alloc_lpi_tables();
+ err = its_alloc_lpi_tables();
+ if (err)
+ return err;
+
+ its_lpi_init(rdists->id_bits);
+
+ list_for_each_entry(its, &its_nodes, entry)
+ has_v4 |= its->is_v4;
+
+ if (has_v4 & rdists->has_vlpis) {
+ if (its_init_vpe_domain()) {
+ rdists->has_vlpis = false;
+ pr_err("ITS: Disabling GICv4 support\n");
+ }
+ }
+

Marc Zyngier

unread,
Jul 31, 2017, 1:30:16 PM7/31/17
to
This (monster of a) series implements full support for GICv4, bringing
direct injection of MSIs to KVM on arm and arm64, assuming you have
the right hardware (which is quite unlikely).

To get an idea of the design, I'd recommend you start with patches #33
and #57, which try to shed some light on the approach that I've
taken. And before that, please digest some of the GICv3/GICv4
architecture documentation[1] (less than 800 pages!). Once you feel
reasonably insane, you'll be in the right mood to read the code.

The structure of the series is fairly simple. The initial 35 patches
add some generic support for GICv4, while the rest of the code plugs
KVM into it. This series relies on Eric Auger's irq-bypass series[2],
which is a prerequisite for this work, as well as a KVM fix by
Shanker. The last two patches add a quirk for the Huawei D05 box on
which I tested this code, and which is added here for completeness and
for people to test.

The stack has been *very lightly* tested on an arm64 model, with a PCI
virtio block device passed from the host to a guest (using kvmtool and
Jean-Philippe Brucker's excellent VFIO support patches[3]), as well as
the Huawei D05 box I mentionend above using an Intel I350 Ethernet
card and passing a VF into the guest. I expect things to be (not so)
subtly broken, so go forward and test if you can, though I'm mostly
interested in people reviewing the code at the moment.

I've pushed out a branch based on 4.13-rc3 containing the dependencies:

git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git kvm-arm64/gicv4-kvm

* From v2:

- Lots of fixes all over the map (the doorbell code was amazingly
wrong, both at the GICv4 level and in KVM)
- KVM GICv4 enablement is now gated by a command-line option and
defaults to off
- Now properly deals with ordering of vITS creation
- Some debugging features
- More documentation so that I can forget what this is all about
- Huawei D05 quirks

* From v1:
- The bulk of the 30-something initial patches have seen countless
bugs being fixed, and some key data structures have been subtly
tweaked (or killed altogether). They are still quite similar to
what I had in v1 though.
- The whole KVM code is brand new and as I said above, only lightly
tested.
- Collected a bunch a R-bs from Thomas and Eric (many thanks, guys).

[1] https://static.docs.arm.com/ihi0069/c/IHI0069C_gic_architecture_specification.pfd
[2] http://www.spinics.net/lists/kvm/msg151463.html
[3] http://www.spinics.net/lists/kvm/msg151823.html

Marc Zyngier (59):
genirq: Let irq_set_vcpu_affinity() iterate over hierarchy
irqchip/gic-v3: Add redistributor iterator
irqchip/gic-v3: Add VLPI/DirectLPI discovery
irqchip/gic-v3-its: Move LPI definitions around
irqchip/gic-v3-its: Add probing for VLPI properties
irqchip/gic-v3-its: Macro-ize its_send_single_command
irqchip/gic-v3-its: Implement irq_set_irqchip_state for pending state
irqchip/gic-v3-its: Split out property table allocation
irqchip/gic-v3-its: Allow use of indirect VCPU tables
irqchip/gic-v3-its: Split out pending table allocation
irqchip/gic-v3-its: Rework LPI freeing
irqchip/gic-v3-its: Generalize device table allocation
irqchip/gic-v3-its: Generalize LPI configuration
irqchip/gic-v4: Add management structure definitions
irqchip/gic-v3-its: Add GICv4 ITS command definitions
irqchip/gic-v3-its: Add VLPI configuration hook
irqchip/gic-v3-its: Add VLPI map/unmap operations
irqchip/gic-v3-its: Add VLPI configuration handling
irqchip/gic-v3-its: Add VPE domain infrastructure
irqchip/gic-v3-its: Add VPE irq domain allocation/teardown
irqchip/gic-v3-its: Add VPE irq domain [de]activation
irqchip/gic-v3-its: Add VPENDBASER/VPROPBASER accessors
irqchip/gic-v3-its: Add VPE scheduling
irqchip/gic-v3-its: Add VPE invalidation hook
irqchip/gic-v3-its: Add VPE affinity changes
irqchip/gic-v3-its: Add VPE interrupt masking
irqchip/gic-v3-its: Support VPE doorbell invalidation even when
!DirectLPI
irqchip/gic-v3-its: Allow doorbell interrupts to be injected/cleared
irqchip/gic-v3-its: Set implementation defined bit to enable VLPIs
irqchip/gic-v4: Add per-VM VPE domain creation
irqchip/gic-v4: Add VPE command interface
irqchip/gic-v4: Add VLPI configuration interface
irqchip/gic-v4: Add some basic documentation
irqchip/gic-v4: Enable low-level GICv4 operations
irqchip/gic-v3: Advertise GICv4 support to KVM
KVM: arm: Select ARM_GIC_V3 and ARM_GIC_V3_ITS
KVM: arm/arm64: vgic: Move kvm_vgic_destroy call around
KVM: arm/arm64: vITS: Add MSI translation helpers
KVM: arm/arm64: GICv4: Add property field and per-VM predicate
KVM: arm/arm64: GICv4: Add init/teardown of the per-VM vPE irq domain
KVM: arm/arm64: GICv4: Wire mapping/unmapping of VLPIs in VFIO irq
bypass
KVM: arm/arm64: GICv4: Handle INT command applied to a VLPI
KVM: arm/arm64: GICv4: Unmap VLPI when freeing an LPI
KVM: arm/arm64: GICv4: Handle MOVI applied to a VLPI
KVM: arm/arm64: GICv4: Handle CLEAR applied to a VLPI
KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE
KVM: arm/arm64: GICv4: Propagate property updates to VLPIs
KVM: arm/arm64: GICv4: Handle INVALL applied to a vPE
KVM: arm/arm64: GICv4: Propagate VLPI properties at map time
KVM: arm/arm64: GICv4: Use pending_last as a scheduling hint
KVM: arm/arm64: GICv4: Add doorbell interrupt handling
KVM: arm/arm64: GICv4: Use the doorbell interrupt as an unblocking
source
KVM: arm/arm64: GICv4: Hook vPE scheduling into vgic flush/sync
KVM: arm/arm64: GICv4: Enable virtual cpuif if VLPIs can be delivered
KVM: arm/arm64: GICv4: Enable VLPI support
KVM: arm/arm64: GICv4: Prevent heterogenous systems from using GICv4
KVM: arm/arm64: GICv4: Theory of operations
irqchip/gic-v3-its: Pass its_node pointer to each command bulder
irqchip/gic-v3-its: Workaround Huawei D05 redistributor addressing

Documentation/admin-guide/kernel-parameters.txt | 4 +
arch/arm/include/asm/arch_gicv3.h | 33 +
arch/arm/kvm/Kconfig | 2 +
arch/arm/kvm/Makefile | 1 +
arch/arm64/include/asm/arch_gicv3.h | 6 +
arch/arm64/kvm/Makefile | 1 +
drivers/irqchip/Makefile | 2 +-
drivers/irqchip/irq-gic-v3-its.c | 1421 +++++++++++++++++++++--
drivers/irqchip/irq-gic-v3.c | 95 +-
drivers/irqchip/irq-gic-v4.c | 224 ++++
include/kvm/arm_vgic.h | 22 +
include/linux/irqchip/arm-gic-common.h | 2 +
include/linux/irqchip/arm-gic-v3.h | 86 ++
include/linux/irqchip/arm-gic-v4.h | 103 ++
kernel/irq/manage.c | 14 +-
virt/kvm/arm/arm.c | 33 +-
virt/kvm/arm/hyp/vgic-v3-sr.c | 9 +-
virt/kvm/arm/vgic/vgic-init.c | 9 +
virt/kvm/arm/vgic/vgic-its.c | 173 ++-
virt/kvm/arm/vgic/vgic-mmio-v3.c | 5 +
virt/kvm/arm/vgic/vgic-v3.c | 34 +
virt/kvm/arm/vgic/vgic-v4.c | 335 ++++++
virt/kvm/arm/vgic/vgic.c | 7 +
virt/kvm/arm/vgic/vgic.h | 11 +
24 files changed, 2419 insertions(+), 213 deletions(-)
create mode 100644 drivers/irqchip/irq-gic-v4.c
create mode 100644 include/linux/irqchip/arm-gic-v4.h
create mode 100644 virt/kvm/arm/vgic/vgic-v4.c

--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:30:17 PM7/31/17
to
A long time ago, GITS_CTLR[1] used to be called GITC_CTLR.EnableVLPI.
It has been subsequently deprecated and is now an "Implementation
Defined" bit that may ot may not be set for GICv4. Brilliant.

And the current crop of the FastModel requires that bit for VLPIs
to be enabled. Oh well... Let's set it and find out what breaks.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 7 +++++--
include/linux/irqchip/arm-gic-v3.h | 1 +
2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index a177af0b1657..a5b1317d89eb 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2610,7 +2610,7 @@ static int its_force_quiescent(void __iomem *base)
return 0;

/* Disable the generation of all interrupts to this ITS */
- val &= ~GITS_CTLR_ENABLE;
+ val &= ~(GITS_CTLR_ENABLE | GITS_CTLR_ImDe);
writel_relaxed(val, base + GITS_CTLR);

/* Poll GITS_CTLR and wait until ITS becomes quiescent */
@@ -2879,7 +2879,10 @@ static int __init its_probe_one(struct resource *res,

gits_write_cwriter(0, its->base + GITS_CWRITER);
ctlr = readl_relaxed(its->base + GITS_CTLR);
- writel_relaxed(ctlr | GITS_CTLR_ENABLE, its->base + GITS_CTLR);
+ ctlr |= GITS_CTLR_ENABLE;
+ if (its->is_v4)
+ ctlr |= GITS_CTLR_ImDe;
+ writel_relaxed(ctlr, its->base + GITS_CTLR);

err = its_init_domain(handle, its);
if (err)
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 6bc142cfa616..1ea576c8126f 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -298,6 +298,7 @@
#define GITS_TRANSLATER 0x10040

#define GITS_CTLR_ENABLE (1U << 0)
+#define GITS_CTLR_ImDe (1U << 1)
#define GITS_CTLR_ITS_NUMBER_SHIFT 4
#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT)
#define GITS_CTLR_QUIESCENT (1U << 31)
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:40:08 PM7/31/17
to
The redistributor needs to be told which vPE is about to be run,
and tells us whether there is any pending VLPI on exit.

Let's add the scheduling calls to the vgic flush/sync functions,
allowing the VLPIs to be delivered to the guest.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
virt/kvm/arm/vgic/vgic-v4.c | 24 ++++++++++++++++++++++++
virt/kvm/arm/vgic/vgic.c | 4 ++++
virt/kvm/arm/vgic/vgic.h | 1 +
3 files changed, 29 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 50721c4e3da5..0a8deefbcf1c 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -119,6 +119,30 @@ void vgic_v4_teardown(struct kvm *kvm)
its_vm->vpes = NULL;
}

+int vgic_v4_schedule(struct kvm_vcpu *vcpu, bool on)
+{
+ int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
+
+ if (!vgic_is_v4_capable(vcpu->kvm) || !irq)
+ return 0;
+
+ /*
+ * Before making the VPE resident, make sure the redistributor
+ * expects us here.
+ */
+ if (on) {
+ int err;
+
+ err = irq_set_affinity(irq, cpumask_of(smp_processor_id()));
+ if (err) {
+ kvm_err("failed irq_set_affinity IRQ%d (%d)\n", irq, err);
+ return err;
+ }
+ }
+
+ return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, on);
+}
+
static struct vgic_its *vgic_get_its(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *irq_entry)
{
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index dfac894f6f03..9ab52108989d 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -721,6 +721,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;

+ WARN_ON(vgic_v4_schedule(vcpu, false));
+
/* An empty ap_list_head implies used_lrs == 0 */
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
@@ -733,6 +735,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
/* Flush our emulation state into the GIC hardware before entering the guest. */
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
{
+ WARN_ON(vgic_v4_schedule(vcpu, true));
+
/*
* If there are no virtual interrupts active or pending for this
* VCPU, then there is no work to do and we can bail out without
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 1210bf4681dc..693b654acf4d 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -234,5 +234,6 @@ int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
bool vgic_is_v4_capable(struct kvm *kvm);
int vgic_v4_init(struct kvm *kvm);
void vgic_v4_teardown(struct kvm *kvm);
+int vgic_v4_schedule(struct kvm_vcpu *vcpu, bool on);

#endif
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:40:08 PM7/31/17
to
The doorbell interrupt is only useful if the vcpu is blocked on WFI.
In all other cases, recieving a doorbell interrupt is just a waste
of cycles.

So let's only enable the doorbell if a vcpu is getting blocked,
and disable it when it is unblocked. This is very similar to
what we're doing for the background timer.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
include/kvm/arm_vgic.h | 3 +++
virt/kvm/arm/arm.c | 2 ++
virt/kvm/arm/vgic/vgic-v4.c | 32 +++++++++++++++++++++++++++++++-
3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 050f78d4fb42..7b1cc7ff5b42 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -375,4 +375,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
struct kvm_kernel_irq_routing_entry *irq_entry);

+void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu);
+void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu);
+
#endif /* __KVM_ARM_VGIC_H */
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 6803ea27c47d..5dbc8dc8fbf4 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -316,11 +316,13 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
kvm_timer_schedule(vcpu);
+ kvm_vgic_v4_enable_doorbell(vcpu);
}

void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
kvm_timer_unschedule(vcpu);
+ kvm_vgic_v4_disable_doorbell(vcpu);
}

int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 6af3cde6d7d4..50721c4e3da5 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -16,6 +16,7 @@
*/

#include <linux/interrupt.h>
+#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/kvm_host.h>

@@ -73,6 +74,14 @@ int vgic_v4_init(struct kvm *kvm)
kvm_for_each_vcpu(i, vcpu, kvm) {
int irq = dist->its_vm.vpes[i]->irq;

+ /*
+ * Don't automatically enable the doorbell, as we're
+ * flipping it back and forth when the vcpu gets
+ * blocked. Also disable the lazy disabling, as the
+ * doorbell could kick us out of the guest too
+ * early...
+ */
+ irq_set_status_flags(irq, IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY);
ret = request_irq(irq, vgic_v4_doorbell_handler,
0, "vcpu", vcpu);
if (ret) {
@@ -98,7 +107,10 @@ void vgic_v4_teardown(struct kvm *kvm)

for (i = 0; i < its_vm->nr_vpes; i++) {
struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i);
- free_irq(its_vm->vpes[i]->irq, vcpu);
+ int irq = its_vm->vpes[i]->irq;
+
+ irq_clear_status_flags(irq, IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY);
+ free_irq(irq, vcpu);
}

its_free_vcpu_irqs(its_vm);
@@ -211,3 +223,21 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
mutex_unlock(&its->its_lock);
return ret;
}
+
+void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu)
+{
+ if (vgic_is_v4_capable(vcpu->kvm)) {
+ int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
+ if (irq)
+ enable_irq(irq);
+ }
+}
+
+void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu)
+{
+ if (vgic_is_v4_capable(vcpu->kvm)) {
+ int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
+ if (irq)
+ disable_irq(irq);
+ }
+}
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:40:08 PM7/31/17
to
When a vPE is not running, a VLPI being made pending results in a
doorbell interrupt being delivered. Let's handle this interrupt
and update the pending_last flag that indicates that VLPIs are
pending. The corresponding vcpu is also kicked into action.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
virt/kvm/arm/vgic/vgic-v4.c | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 534d3051a078..6af3cde6d7d4 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -21,6 +21,19 @@

#include "vgic.h"

+static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
+{
+ struct kvm_vcpu *vcpu = info;
+
+ if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
+ vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
+ kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
+ kvm_vcpu_kick(vcpu);
+ }
+
+ return IRQ_HANDLED;
+}
+
int vgic_v4_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
@@ -57,16 +70,37 @@ int vgic_v4_init(struct kvm *kvm)
return ret;
}

+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ int irq = dist->its_vm.vpes[i]->irq;
+
+ ret = request_irq(irq, vgic_v4_doorbell_handler,
+ 0, "vcpu", vcpu);
+ if (ret) {
+ kvm_err("failed to allocate vcpu IRQ%d\n", irq);
+ dist->its_vm.nr_vpes = i;
+ break;
+ }
+ }
+
+ if (ret)
+ vgic_v4_teardown(kvm);
+
return ret;
}

void vgic_v4_teardown(struct kvm *kvm)
{
struct its_vm *its_vm = &kvm->arch.vgic.its_vm;
+ int i;

if (!its_vm->vpes)
return;

+ for (i = 0; i < its_vm->nr_vpes; i++) {
+ struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i);
+ free_irq(its_vm->vpes[i]->irq, vcpu);
+ }
+
its_free_vcpu_irqs(its_vm);
kfree(its_vm->vpes);
its_vm->nr_vpes = 0;
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:40:08 PM7/31/17
to
If the guest issues an INT command targetting a VLPI, let's
call into the irq_set_irqchip_state() helper to make it pending
on the physical side.

This works just as well if userspace decides to inject an interrupt
using the normal userspace API...

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
virt/kvm/arm/vgic/vgic-its.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index a2217f53bbe5..40aeadef33fe 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -573,6 +573,10 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
if (err)
return err;

+ if (irq->hw)
+ return irq_set_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING, true);
+
spin_lock(&irq->irq_lock);
irq->pending_latch = true;
vgic_queue_irq_unlock(kvm, irq);
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:40:09 PM7/31/17
to
Yet another braindump so I can free some cells...

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
virt/kvm/arm/vgic/vgic-v4.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 68 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 0a8deefbcf1c..0c002d2be620 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -22,6 +22,74 @@

#include "vgic.h"

+/*
+ * How KVM uses GICv4 (insert rude comments here):
+ *
+ * The vgic-v4 layer acts as a bridge between several entities:
+ * - The GICv4 ITS representation offered by the ITS driver
+ * - VFIO, which is in charge of the PCI endpoint
+ * - The virtual ITS, which is the only thing the guest sees
+ *
+ * The configuration of VLPIs is triggered by a callback from VFIO,
+ * instructing KVM that a PCI device has been configured to deliver
+ * MSIs to a vITS.
+ *
+ * kvm_vgic_v4_set_forwarding() is thus called with the routing entry,
+ * and this is used to find the corresponding vITS data structures
+ * (ITS instance, device, event and irq) using a process that is
+ * extremely similar to the injection of an MSI.
+ *
+ * At this stage, we can link the guest's view of an LPI (uniquely
+ * identified by the routing entry) and the host irq, using the GICv4
+ * driver mapping operation. Should the mapping succeed, we've then
+ * successfully upgraded the guest's LPI to a VLPI. We can then start
+ * with updating GICv4's view of the property table and generating an
+ * INValidation in order to kickstart the delivery of this VLPI to the
+ * guest directly, without software intervention. Well, almost.
+ *
+ * When the PCI endpoint is deconfigured, this operation is reversed
+ * with VFIO calling kvm_vgic_v4_unset_forwarding().
+ *
+ * Once the VLPI has been mapped, it needs to follow any change the
+ * guest performs on its LPI through the vITS. For that, a number of
+ * command handlers have hooks to communicate these changes to the HW:
+ * - Any invalidation triggers a call to its_prop_update_vlpi()
+ * - The INT command results in a irq_set_irqchip_state(), which
+ * generates an INT on the corresponding VLPI.
+ * - The CLEAR command results in a irq_set_irqchip_state(), which
+ * generates an CLEAR on the corresponding VLPI.
+ * - DISCARD translates into an unmap, similar to a call to
+ * kvm_vgic_v4_unset_forwarding().
+ * - MOVI is translated by an update of the existing mapping, changing
+ * the target vcpu, resulting in a VMOVI being generated.
+ * - MOVALL is translated by a string of mapping updates (similar to
+ * the handling of MOVI). MOVALL is horrible.
+ *
+ * Note that a DISCARD/MAPTI sequence emitted from the guest without
+ * reprogramming the PCI endpoint after MAPTI does not result in a
+ * VLPI being mapped, as there is no callback from VFIO (the guest
+ * will get the interrupt via the normal SW injection). Fixing this is
+ * not trivial, and requires some horrible messing with the VFIO
+ * internals. Not fun. Don't do that.
+ *
+ * Then there is the scheduling. Each time a vcpu is about to run on a
+ * physical CPU, KVM must tell the corresponding redistributor about
+ * it. And if we've migrated our vcpu from one CPU to another, we must
+ * tell the ITS (so that the messages reach the right redistributor).
+ * This is done in two steps: first issue a irq_set_affinity() on the
+ * irq corresponding to the vcpu, then call its_schedule_vpe(). You
+ * must be in a non-preemptible context. On exit, another call to
+ * its_schedule_vpe() tells the redistributor that we're done with the
+ * vcpu.
+ *
+ * Finally, the doorbell handling: Each vcpu is allocated an interrupt
+ * which will fire each time a VLPI is made pending whilst the vcpu is
+ * not running. Each time the vcpu gets blocked, the doorbell
+ * interrupt gets enabled. When the vcpu is unblocked (for whatever
+ * reason), the doorbell interrupt is disabled. This behaviour is
+ * pretty similar to that of the backgroud timer.
+ */
+
static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
{
struct kvm_vcpu *vcpu = info;
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:40:09 PM7/31/17
to
The current implementation of MOVALL doesn't allow us to call
into the core ITS code as we hold a number of spinlocks.

Let's try a method used in other parts of the code, were we copy
the intids of the candicate interrupts, and then do whatever
we need to do with them outside of the critical section.

This allows us to move the interrupts one by one, at the expense
of a bit of CPU time. Who cares? MOVALL is such a stupid command
anyway...

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
virt/kvm/arm/vgic/vgic-its.c | 27 ++++++++++++++++++++-------
1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 2c065c970ba0..65cc77fde609 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1147,11 +1147,12 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
u64 *its_cmd)
{
- struct vgic_dist *dist = &kvm->arch.vgic;
u32 target1_addr = its_cmd_get_target_addr(its_cmd);
u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32);
struct kvm_vcpu *vcpu1, *vcpu2;
struct vgic_irq *irq;
+ u32 *intids;
+ int irq_count, i;

if (target1_addr >= atomic_read(&kvm->online_vcpus) ||
target2_addr >= atomic_read(&kvm->online_vcpus))
@@ -1163,19 +1164,31 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
vcpu1 = kvm_get_vcpu(kvm, target1_addr);
vcpu2 = kvm_get_vcpu(kvm, target2_addr);

- spin_lock(&dist->lpi_list_lock);
+ irq_count = vgic_copy_lpi_list(vcpu1, &intids);
+ if (irq_count < 0)
+ return irq_count;

- list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
- spin_lock(&irq->irq_lock);
+ for (i = 0; i < irq_count; i++) {
+ irq = vgic_get_irq(kvm, NULL, intids[i]);
+ if (!irq)
+ continue;

if (irq->target_vcpu == vcpu1)
irq->target_vcpu = vcpu2;

- spin_unlock(&irq->irq_lock);
- }
+ if (irq->hw) {
+ struct its_vlpi_map map;

- spin_unlock(&dist->lpi_list_lock);
+ if (!its_get_vlpi(irq->host_irq, &map)) {
+ map.vpe_idx = vcpu2->vcpu_id;
+ its_map_vlpi(irq->host_irq, &map);
+ }
+ }

+ vgic_put_irq(kvm, irq);
+ }
+
+ kfree(intids);
return 0;
}

--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:40:10 PM7/31/17
to
When the guest issues a MOVI, we need to tell the physical ITS
that we're now targetting a new vcpu. This is done by extracting
the current mapping, updating the target, and reapplying the
mapping. The core ITS code should do the right thing.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
virt/kvm/arm/vgic/vgic-its.c | 13 +++++++++++++
1 file changed, 13 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 79bac93d3e7d..aaad577ce328 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -706,6 +706,19 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
ite->irq->target_vcpu = vcpu;
spin_unlock(&ite->irq->irq_lock);

+ if (ite->irq->hw) {
+ struct its_vlpi_map map;
+ int ret;
+
+ ret = its_get_vlpi(ite->irq->host_irq, &map);
+ if (ret)
+ return ret;
+
+ map.vpe_idx = vcpu->vcpu_id;
+
+ return its_map_vlpi(ite->irq->host_irq, &map);

Marc Zyngier

unread,
Jul 31, 2017, 1:40:13 PM7/31/17
to
The GICv4 architecture doesn't prevent CPUs implementing GICv4 to
cohabit with CPUs limited to GICv3 in the same system.

This is mad (the sheduler would have to be made aware of the v4
capability), and we're certainly not going to support this any
time soon. So let's check that all online CPUs are GICv4 capable,
and disable the functionnality if not.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
include/linux/irqchip/arm-gic-v3.h | 2 ++
virt/kvm/arm/vgic/vgic-v3.c | 22 +++++++++++++++++++++-
2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 1ea576c8126f..dfa4a51643d6 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -532,6 +532,8 @@
#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT)
#define ICH_VTR_A3V_SHIFT 21
#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT)
+#define ICH_VTR_nV4_SHIFT 20
+#define ICH_VTR_nV4_MASK (1 << ICH_VTR_nV4_SHIFT)

#define ICC_IAR1_EL1_SPURIOUS 0x3ff

diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 405733678c2f..252268e83ade 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -466,6 +466,18 @@ static int __init early_gicv4_enable(char *buf)
}
early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);

+static void vgic_check_v4_cpuif(void *param)
+{
+ u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
+ bool *v4 = param, this_cpu_is_v4;
+
+ this_cpu_is_v4 = !(ich_vtr_el2 & ICH_VTR_nV4_MASK);
+ if (!this_cpu_is_v4)
+ kvm_info("CPU%d is not GICv4 capable\n", smp_processor_id());
+
+ *v4 &= this_cpu_is_v4;
+}
+
/**
* vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
* @node: pointer to the DT node
@@ -485,8 +497,16 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
kvm_vgic_global_state.can_emulate_gicv2 = false;
kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2;

- /* GICv4 support? */
+ /*
+ * GICv4 support? We need to check on all CPUs in case of some
+ * extremely creative form of big-little brain damage...
+ */
if (info->has_v4) {
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ smp_call_function_single(cpu, vgic_check_v4_cpuif,
+ &gicv4_enable, 1);
kvm_vgic_global_state.has_gicv4 = gicv4_enable;
kvm_info("GICv4 support %sabled\n",
gicv4_enable ? "en" : "dis");
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:40:13 PM7/31/17
to
When a vPE exits, the pending_last flag is set when there are
pending VLPIs stored in the pending table. Similarily, we set
this flag when a doorbell interrupt fires, as it indicates the
same condition.

Let's update kvm_vgic_vcpu_pending_irq() to account for that
flag as well, making a vcpu runnable when set.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
virt/kvm/arm/vgic/vgic.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index a96e566905d2..dfac894f6f03 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -781,6 +781,9 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
if (!vcpu->kvm->arch.vgic.enabled)
return false;

+ if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
+ return true;
+
spin_lock(&vgic_cpu->ap_list_lock);

list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 1:40:13 PM7/31/17
to
Since when updating the properties one LPI at a time, there is no
need to perform an INV each time we read one. Instead, we rely
on the final VINVALL that gets sent to the ITS to do the work.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
virt/kvm/arm/vgic/vgic-its.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 9e46081e5f15..b395df1bf47c 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -38,7 +38,7 @@ static int vgic_its_save_tables_v0(struct vgic_its *its);
static int vgic_its_restore_tables_v0(struct vgic_its *its);
static int vgic_its_commit_v0(struct vgic_its *its);
static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
- struct kvm_vcpu *filter_vcpu);
+ struct kvm_vcpu *filter_vcpu, bool needs_inv);

/*
* Creates a new (reference to a) struct vgic_irq for a given LPI.
@@ -106,7 +106,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
* However we only have those structs for mapped IRQs, so we read in
* the respective config data from memory here upon mapping the LPI.
*/
- ret = update_lpi_config(kvm, irq, NULL);
+ ret = update_lpi_config(kvm, irq, NULL, false);
if (ret)
return ERR_PTR(ret);

@@ -274,7 +274,7 @@ static struct its_collection *find_collection(struct vgic_its *its, int coll_id)
* VCPU. Unconditionally applies if filter_vcpu is NULL.
*/
static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
- struct kvm_vcpu *filter_vcpu)
+ struct kvm_vcpu *filter_vcpu, bool needs_inv)
{
u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
u8 prop;
@@ -298,7 +298,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
}

if (irq->hw)
- return its_prop_update_vlpi(irq->host_irq, prop, true);
+ return its_prop_update_vlpi(irq->host_irq, prop, needs_inv);

return 0;
}
@@ -1095,7 +1095,7 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
if (!ite)
return E_ITS_INV_UNMAPPED_INTERRUPT;

- return update_lpi_config(kvm, ite->irq, NULL);
+ return update_lpi_config(kvm, ite->irq, NULL, true);
}

/*
@@ -1130,12 +1130,15 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
irq = vgic_get_irq(kvm, NULL, intids[i]);
if (!irq)
continue;
- update_lpi_config(kvm, irq, vcpu);
+ update_lpi_config(kvm, irq, vcpu, false);
vgic_put_irq(kvm, irq);
}

kfree(intids);

+ if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
+ its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);

Marc Zyngier

unread,
Jul 31, 2017, 2:00:08 PM7/31/17
to
We're are going to need to change a bit more than just the enable
bit in the LPI property table in the future. So let's change the
LPI configuration funtion to take a set of bits to be cleared,
and a set of bits to be set.

This way, we'll be able to use it when a guest updates an LPI
property (priority, for example).

Reviewed-by: Eric Auger <eric....@redhat.com>
Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 1c2d6bdf85cb..7ef448a963c1 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -678,17 +678,18 @@ static inline u32 its_get_event_id(struct irq_data *d)
return d->hwirq - its_dev->event_map.lpi_base;
}

-static void lpi_set_config(struct irq_data *d, bool enable)
+static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
{
struct its_device *its_dev = irq_data_get_irq_chip_data(d);
irq_hw_number_t hwirq = d->hwirq;
- u32 id = its_get_event_id(d);
- u8 *cfg = page_address(gic_rdists->prop_page) + hwirq - 8192;
+ struct page *prop_page;
+ u8 *cfg;

- if (enable)
- *cfg |= LPI_PROP_ENABLED;
- else
- *cfg &= ~LPI_PROP_ENABLED;
+ prop_page = gic_rdists->prop_page;
+
+ cfg = page_address(prop_page) + hwirq - 8192;
+ *cfg &= ~clr;
+ *cfg |= set;

/*
* Make the above write visible to the redistributors.
@@ -699,17 +700,17 @@ static void lpi_set_config(struct irq_data *d, bool enable)
gic_flush_dcache_to_poc(cfg, sizeof(*cfg));
else
dsb(ishst);
- its_send_inv(its_dev, id);
+ its_send_inv(its_dev, its_get_event_id(d));
}

static void its_mask_irq(struct irq_data *d)
{
- lpi_set_config(d, false);
+ lpi_update_config(d, LPI_PROP_ENABLED, 0);
}

static void its_unmask_irq(struct irq_data *d)
{
- lpi_set_config(d, true);
+ lpi_update_config(d, 0, LPI_PROP_ENABLED);
}

static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:08 PM7/31/17
to
When masking/unmasking a doorbell interrupt, it is necessary
to issue an invalidation to the corresponding redistributor.
We use the DirectLPI feature by writting directly to the corresponding
redistributor.

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
arch/arm/include/asm/arch_gicv3.h | 5 +++++
arch/arm64/include/asm/arch_gicv3.h | 1 +
drivers/irqchip/irq-gic-v3-its.c | 31 +++++++++++++++++++++++++++++++
3 files changed, 37 insertions(+)

diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index 8d45e88feac9..550fb0bcccf8 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -276,6 +276,11 @@ static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr)
#define gicr_write_pendbaser(v, c) __gic_writeq_nonatomic(v, c)

/*
+ * GICR_xLPIR - only the lower bits are significant
+ */
+#define gic_write_lpir(v, c) writel_relaxed(lower_32_bits(v), c)
+
+/*
* GITS_TYPER is an ID register and doesn't need atomicity.
*/
#define gits_read_typer(c) __gic_readq_nonatomic(c)
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 0d2a53457c30..c2de3ff374a0 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -116,6 +116,7 @@ static inline void gic_write_bpr1(u32 val)

#define gic_read_typer(c) readq_relaxed(c)
#define gic_write_irouter(v, c) writeq_relaxed(v, c)
+#define gic_write_lpir(v, c) writeq_relaxed(v, c)

#define gic_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 0a9aedaf6da2..d871d105be9d 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2330,8 +2330,39 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
}
}

+static void its_vpe_send_inv(struct irq_data *d)
+{
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+ void __iomem *rdbase;
+
+ rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
+ gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_INVLPIR);
+}
+
+static void its_vpe_mask_irq(struct irq_data *d)
+{
+ /*
+ * We need to unmask the LPI, which is described by the parent
+ * irq_data. Instead of calling into the parent (which won't
+ * exactly do the right thing, let's simply use the
+ * parent_data pointer. Yes, I'm naughty.
+ */
+ lpi_write_config(d->parent_data, LPI_PROP_ENABLED, 0);
+ its_vpe_send_inv(d);
+}
+
+static void its_vpe_unmask_irq(struct irq_data *d)
+{
+ /* Same hack as above... */
+ lpi_write_config(d->parent_data, 0, LPI_PROP_ENABLED);
+ its_vpe_send_inv(d);
+}
+
static struct irq_chip its_vpe_irq_chip = {
.name = "GICv4-vpe",
+ .irq_mask = its_vpe_mask_irq,
+ .irq_unmask = its_vpe_unmask_irq,
+ .irq_eoi = irq_chip_eoi_parent,
.irq_set_affinity = its_vpe_set_affinity,
.irq_set_vcpu_affinity = its_vpe_set_vcpu_affinity,
};
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:08 PM7/31/17
to
Add the skeleton irq_set_vcpu_affinity method that will be used
to configure VLPIs.

Reviewed-by: Eric Auger <eric....@redhat.com>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index bc396ab24037..9ef4f161690a 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -36,6 +36,7 @@

#include <linux/irqchip.h>
#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/irqchip/arm-gic-v4.h>

#include <asm/cputype.h>
#include <asm/exception.h>
@@ -780,6 +781,28 @@ static int its_irq_set_irqchip_state(struct irq_data *d,
return 0;
}

+static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
+{
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+ struct its_cmd_info *info = vcpu_info;
+
+ /* Need a v4 ITS */
+ if (!its_dev->its->is_v4 || !info)
+ return -EINVAL;
+
+ switch (info->cmd_type) {
+ case MAP_VLPI:
+
+ case GET_VLPI:
+
+ case PROP_UPDATE_VLPI:
+ case PROP_UPDATE_AND_INV_VLPI:
+
+ default:
+ return -EINVAL;
+ }
+}
+
static struct irq_chip its_irq_chip = {
.name = "ITS",
.irq_mask = its_mask_irq,
@@ -788,6 +811,7 @@ static struct irq_chip its_irq_chip = {
.irq_set_affinity = its_set_affinity,
.irq_compose_msi_msg = its_irq_compose_msi_msg,
.irq_set_irqchip_state = its_irq_set_irqchip_state,
+ .irq_set_vcpu_affinity = its_irq_set_vcpu_affinity,
};

/*
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:08 PM7/31/17
to
While the doorbell interrupts are usually driven by the HW itself,
having a way to trigger them independently has proved to be a
really useful debug feature. As it is actually very little code,
let's add it to the VPE irqchip operations.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 210840e0ec92..a177af0b1657 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2404,12 +2404,41 @@ static void its_vpe_unmask_irq(struct irq_data *d)
its_vpe_send_inv(d);
}

+/* This is really a debug feature */
+static int its_vpe_set_irqchip_state(struct irq_data *d,
+ enum irqchip_irq_state which,
+ bool state)
+{
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+
+ if (which != IRQCHIP_STATE_PENDING)
+ return -EINVAL;
+
+ if (gic_rdists->has_direct_lpi) {
+ void __iomem *rdbase;
+
+ rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
+ if (state)
+ gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_SETLPIR);
+ else
+ gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR);
+ } else {
+ if (state)
+ its_vpe_send_cmd(vpe, its_send_int);
+ else
+ its_vpe_send_cmd(vpe, its_send_clear);
+ }
+
+ return 0;
+}
+
static struct irq_chip its_vpe_irq_chip = {
.name = "GICv4-vpe",
.irq_mask = its_vpe_mask_irq,
.irq_unmask = its_vpe_unmask_irq,
.irq_eoi = irq_chip_eoi_parent,
.irq_set_affinity = its_vpe_set_affinity,
+ .irq_set_irqchip_state = its_vpe_set_irqchip_state,

Marc Zyngier

unread,
Jul 31, 2017, 2:00:08 PM7/31/17
to
Get the show on the road...

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/Makefile | 2 +-
drivers/irqchip/irq-gic-v3-its.c | 3 ++-
drivers/irqchip/irq-gic-v4.c | 13 +++++++++++++
include/linux/irqchip/arm-gic-v4.h | 2 ++
4 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index e88d856cc09c..7dd05ad6bcfb 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -28,7 +28,7 @@ obj-$(CONFIG_ARM_GIC_PM) += irq-gic-pm.o
obj-$(CONFIG_ARCH_REALVIEW) += irq-gic-realview.o
obj-$(CONFIG_ARM_GIC_V2M) += irq-gic-v2m.o
obj-$(CONFIG_ARM_GIC_V3) += irq-gic-v3.o irq-gic-common.o
-obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v3-its-pci-msi.o irq-gic-v3-its-platform-msi.o
+obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v3-its-pci-msi.o irq-gic-v3-its-platform-msi.o irq-gic-v4.o
obj-$(CONFIG_PARTITION_PERCPU) += irq-partition-percpu.o
obj-$(CONFIG_HISILICON_IRQ_MBIGEN) += irq-mbigen.o
obj-$(CONFIG_ARM_NVIC) += irq-nvic.o
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index a5b1317d89eb..9f32cd42238f 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -3109,7 +3109,8 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
has_v4 |= its->is_v4;

if (has_v4 & rdists->has_vlpis) {
- if (its_init_vpe_domain()) {
+ if (its_init_vpe_domain() ||
+ its_init_v4(parent_domain, &its_vpe_domain_ops)) {
rdists->has_vlpis = false;
pr_err("ITS: Disabling GICv4 support\n");
}
diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
index d10d73477d01..400d9bbb0e56 100644
--- a/drivers/irqchip/irq-gic-v4.c
+++ b/drivers/irqchip/irq-gic-v4.c
@@ -209,3 +209,16 @@ int its_prop_update_vlpi(int irq, u8 config, bool inv)

return irq_set_vcpu_affinity(irq, &info);
}
+
+int its_init_v4(struct irq_domain *domain, const struct irq_domain_ops *ops)
+{
+ if (domain) {
+ pr_info("ITS: Enabling GICv4 support\n");
+ gic_domain = domain;
+ vpe_domain_ops = ops;
+ return 0;
+ }
+
+ pr_err("ITS: No GICv4 VPE domain allocated\n");
+ return -ENODEV;
+}
diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index f495d4d1f7d7..3cf72af2c796 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -98,4 +98,6 @@ int its_get_vlpi(int irq, struct its_vlpi_map *map);
int its_unmap_vlpi(int irq);
int its_prop_update_vlpi(int irq, u8 config, bool inv);

+int its_init_v4(struct irq_domain *domain, const struct irq_domain_ops *ops);
+
#endif
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:09 PM7/31/17
to
When a VPE is scheduled to run, the corresponding redistributor must
be told so, by setting VPROPBASER to the VM's property table, and
VPENDBASER to the vcpu's pending table.

When scheduled out, we preserve the IDAI and PendingLast bits. The
latter is specially important, as it tells the hypervisor that
there are pending interrupts for this vcpu.

Reviewed-by: Eric Auger <eric....@redhat.com>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 85 ++++++++++++++++++++++++++++++++++++++
include/linux/irqchip/arm-gic-v3.h | 58 ++++++++++++++++++++++++++
2 files changed, 143 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 5ca1e571c1b4..d94a51489f6c 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -152,6 +152,7 @@ static DEFINE_IDA(its_vpeid_ida);

#define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist))
#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
+#define gic_data_rdist_vlpi_base() (gic_data_rdist_rd_base() + SZ_128K)

static struct its_collection *dev_event_to_col(struct its_device *its_dev,
u32 event)
@@ -2147,8 +2148,92 @@ static const struct irq_domain_ops its_domain_ops = {
.deactivate = its_irq_domain_deactivate,
};

+static void its_vpe_schedule(struct its_vpe *vpe)
+{
+ void * __iomem vlpi_base = gic_data_rdist_vlpi_base();
+ u64 val;
+
+ /* Schedule the VPE */
+ val = virt_to_phys(page_address(vpe->its_vm->vprop_page)) &
+ GENMASK_ULL(51, 12);
+ val |= (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK;
+ val |= GICR_VPROPBASER_RaWb;
+ val |= GICR_VPROPBASER_InnerShareable;
+ gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
+
+ val = virt_to_phys(page_address(vpe->vpt_page)) &
+ GENMASK_ULL(51, 16);
+ val |= GICR_VPENDBASER_RaWaWb;
+ val |= GICR_VPENDBASER_NonShareable;
+ /*
+ * There is no good way of finding out if the pending table is
+ * empty as we can race against the doorbell interrupt very
+ * easily. So in the end, vpe->pending_last is only an
+ * indication that the vcpu has something pending, not one
+ * that the pending table is empty. A good implementation
+ * would be able to read its coarse map pretty quickly anyway,
+ * making this a tolerable issue.
+ */
+ val |= GICR_VPENDBASER_PendingLast;
+ val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0;
+ val |= GICR_VPENDBASER_Valid;
+ gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
+}
+
+static void its_vpe_deschedule(struct its_vpe *vpe)
+{
+ void * __iomem vlpi_base = gic_data_rdist_vlpi_base();
+ u32 count = 1000000; /* 1s! */
+ bool clean;
+ u64 val;
+
+ /* We're being scheduled out */
+ val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
+ val &= ~GICR_VPENDBASER_Valid;
+ gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
+
+ do {
+ val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
+ clean = !(val & GICR_VPENDBASER_Dirty);
+ if (!clean) {
+ count--;
+ cpu_relax();
+ udelay(1);
+ }
+ } while (!clean && count);
+
+ if (unlikely(!clean && !count)) {
+ pr_err_ratelimited("ITS virtual pending table not cleaning\n");
+ vpe->idai = false;
+ vpe->pending_last = true;
+ } else {
+ vpe->idai = !!(val & GICR_VPENDBASER_IDAI);
+ vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
+ }
+}
+
+static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
+{
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+ struct its_cmd_info *info = vcpu_info;
+
+ switch (info->cmd_type) {
+ case SCHEDULE_VPE:
+ its_vpe_schedule(vpe);
+ return 0;
+
+ case DESCHEDULE_VPE:
+ its_vpe_deschedule(vpe);
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+}
+
static struct irq_chip its_vpe_irq_chip = {
.name = "GICv4-vpe",
+ .irq_set_vcpu_affinity = its_vpe_set_vcpu_affinity,
};

static int its_vpe_id_alloc(void)
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 17ba0d732f12..6bc142cfa616 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -212,6 +212,64 @@
#define LPI_PROP_GROUP1 (1 << 1)
#define LPI_PROP_ENABLED (1 << 0)

+/*
+ * Re-Distributor registers, offsets from VLPI_base
+ */
+#define GICR_VPROPBASER 0x0070
+
+#define GICR_VPROPBASER_IDBITS_MASK 0x1f
+
+#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56)
+
+#define GICR_VPROPBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK)
+#define GICR_VPROPBASER_CACHEABILITY_MASK \
+ GICR_VPROPBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPROPBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable)
+
+#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB)
+#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC)
+#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt)
+#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt)
+#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt)
+#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb)
+#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt)
+#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb)
+
+#define GICR_VPENDBASER 0x0078
+
+#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56)
+#define GICR_VPENDBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK)
+#define GICR_VPENDBASER_CACHEABILITY_MASK \
+ GICR_VPENDBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPENDBASER_NonShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable)
+
+#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB)
+#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC)
+#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt)
+#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt)
+#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt)
+#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb)
+#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt)
+#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb)
+
#define GICR_VPENDBASER_Dirty (1ULL << 60)
#define GICR_VPENDBASER_PendingLast (1ULL << 61)
#define GICR_VPENDBASER_IDAI (1ULL << 62)
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:09 PM7/31/17
to
When we're about to run a vcpu, it is crucial that the redistributor
associated with the physical CPU is being told about the new residency.

This is abstracted by hijacking the irq_set_affinity method for the
doorbell interrupt associated with the VPE. It is expected that the
hypervisor will call this method before scheduling the VPE.

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 96 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 96 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 16cdd1f60ebf..0a9aedaf6da2 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -148,6 +148,9 @@ static struct irq_domain *its_parent;
#define ITS_LIST_MAX 16

static unsigned long its_list_map;
+static u16 vmovp_seq_num;
+static DEFINE_RAW_SPINLOCK(vmovp_lock);
+
static DEFINE_IDA(its_vpeid_ida);

#define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist))
@@ -238,6 +241,13 @@ struct its_cmd_desc {
u32 event_id;
bool db_enabled;
} its_vmovi_cmd;
+
+ struct {
+ struct its_vpe *vpe;
+ struct its_collection *col;
+ u16 seq_num;
+ u16 its_list;
+ } its_vmovp_cmd;
};
};

@@ -329,6 +339,16 @@ static void its_encode_db_valid(struct its_cmd_block *cmd, bool db_valid)
its_mask_encode(&cmd->raw_cmd[2], db_valid, 0, 0);
}

+static void its_encode_seq_num(struct its_cmd_block *cmd, u16 seq_num)
+{
+ its_mask_encode(&cmd->raw_cmd[0], seq_num, 47, 32);
+}
+
+static void its_encode_its_list(struct its_cmd_block *cmd, u16 its_list)
+{
+ its_mask_encode(&cmd->raw_cmd[1], its_list, 15, 0);
+}
+
static void its_encode_vpt_addr(struct its_cmd_block *cmd, u64 vpt_pa)
{
its_mask_encode(&cmd->raw_cmd[3], vpt_pa >> 16, 50, 16);
@@ -571,6 +591,20 @@ static struct its_vpe *its_build_vmovi_cmd(struct its_cmd_block *cmd,
return desc->its_vmovi_cmd.vpe;
}

+static struct its_vpe *its_build_vmovp_cmd(struct its_cmd_block *cmd,
+ struct its_cmd_desc *desc)
+{
+ its_encode_cmd(cmd, GITS_CMD_VMOVP);
+ its_encode_seq_num(cmd, desc->its_vmovp_cmd.seq_num);
+ its_encode_its_list(cmd, desc->its_vmovp_cmd.its_list);
+ its_encode_vpeid(cmd, desc->its_vmovp_cmd.vpe->vpe_id);
+ its_encode_target(cmd, desc->its_vmovp_cmd.col->target_address);
+
+ its_fixup_cmd(cmd);
+
+ return desc->its_vmovp_cmd.vpe;
+}
+
static u64 its_cmd_ptr_to_offset(struct its_node *its,
struct its_cmd_block *ptr)
{
@@ -865,6 +899,48 @@ static void its_send_vmapp(struct its_vpe *vpe, bool valid)
}
}

+static void its_send_vmovp(struct its_vpe *vpe)
+{
+ struct its_cmd_desc desc;
+ struct its_node *its;
+ unsigned long flags;
+ int col_id = vpe->col_idx;
+
+ desc.its_vmovp_cmd.vpe = vpe;
+ desc.its_vmovp_cmd.its_list = (u16)its_list_map;
+
+ if (!its_list_map) {
+ its = list_first_entry(&its_nodes, struct its_node, entry);
+ desc.its_vmovp_cmd.seq_num = 0;
+ desc.its_vmovp_cmd.col = &its->collections[col_id];
+ its_send_single_vcommand(its, its_build_vmovp_cmd, &desc);
+ return;
+ }
+
+ /*
+ * Yet another marvel of the architecture. If using the
+ * its_list "feature", we need to make sure that all ITSs
+ * receive all VMOVP commands in the same order. The only way
+ * to guarantee this is to make vmovp a serialization point.
+ *
+ * Wall <-- Head.
+ */
+ raw_spin_lock_irqsave(&vmovp_lock, flags);
+
+ desc.its_vmovp_cmd.seq_num = vmovp_seq_num++;
+
+ /* Emit VMOVPs */
+ list_for_each_entry(its, &its_nodes, entry) {
+ if (!its->is_v4)
+ continue;
+
+ desc.its_vmovp_cmd.col = &its->collections[col_id];
+ its_send_single_vcommand(its, its_build_vmovp_cmd, &desc);
+ }
+
+ raw_spin_unlock_irqrestore(&vmovp_lock, flags);
+}
+
static void its_send_vinvall(struct its_vpe *vpe)
{
struct its_cmd_desc desc;
@@ -2148,6 +2224,25 @@ static const struct irq_domain_ops its_domain_ops = {
.deactivate = its_irq_domain_deactivate,
};

+static int its_vpe_set_affinity(struct irq_data *d,
+ const struct cpumask *mask_val,
+ bool force)
+{
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+ int cpu = cpumask_first(mask_val);
+
+ /*
+ * Changing affinity is mega expensive, so let's be as lazy as
+ * we can and only do it if we really have to.
+ */
+ if (vpe->col_idx != cpu) {
+ vpe->col_idx = cpu;
+ its_send_vmovp(vpe);
+ }
+
+ return IRQ_SET_MASK_OK_DONE;
+}
+
static void its_vpe_schedule(struct its_vpe *vpe)
{
void * __iomem vlpi_base = gic_data_rdist_vlpi_base();
@@ -2237,6 +2332,7 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)

static struct irq_chip its_vpe_irq_chip = {
.name = "GICv4-vpe",
+ .irq_set_affinity = its_vpe_set_affinity,

Marc Zyngier

unread,
Jul 31, 2017, 2:00:10 PM7/31/17
to
In order to let a VLPI being injected into a guest, the VLPI must
be mapped using the VMAPTI command. When moved to a different vcpu,
it must be moved with the VMOVI command.

These commands are issued via the irq_set_vcpu_affinity method,
making sure we unmap the corresponding host LPI first.

The reverse is also done when the VLPI is unmapped from the guest.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 250 ++++++++++++++++++++++++++++++++++++++-
1 file changed, 247 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 9ef4f161690a..ef31c3e58c01 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -115,11 +115,17 @@ struct event_lpi_map {
u16 *col_map;
irq_hw_number_t lpi_base;
int nr_lpis;
+ struct mutex vlpi_lock;
+ struct its_vm *vm;
+ struct its_vlpi_map *vlpi_maps;
+ int nr_vlpis;
};

/*
- * The ITS view of a device - belongs to an ITS, a collection, owns an
- * interrupt translation table, and a list of interrupts.
+ * The ITS view of a device - belongs to an ITS, owns an interrupt
+ * translation table, and a list of interrupts. If it some of its
+ * LPIs are injected into a guest (GICv4), the event_map.vm field
+ * indicates which one.
*/
struct its_device {
struct list_head entry;
@@ -205,6 +211,21 @@ struct its_cmd_desc {
struct {
struct its_collection *col;
} its_invall_cmd;
+
+ struct {
+ struct its_vpe *vpe;
+ struct its_device *dev;
+ u32 virt_id;
+ u32 event_id;
+ bool db_enabled;
+ } its_vmapti_cmd;
+
+ struct {
+ struct its_vpe *vpe;
+ struct its_device *dev;
+ u32 event_id;
+ bool db_enabled;
+ } its_vmovi_cmd;
};
};

@@ -221,6 +242,9 @@ struct its_cmd_block {
typedef struct its_collection *(*its_cmd_builder_t)(struct its_cmd_block *,
struct its_cmd_desc *);

+typedef struct its_vpe *(*its_cmd_vbuilder_t)(struct its_cmd_block *,
+ struct its_cmd_desc *);
+
static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l)
{
u64 mask = GENMASK_ULL(h, l);
@@ -273,6 +297,26 @@ static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
}

+static void its_encode_vpeid(struct its_cmd_block *cmd, u16 vpeid)
+{
+ its_mask_encode(&cmd->raw_cmd[1], vpeid, 47, 32);
+}
+
+static void its_encode_virt_id(struct its_cmd_block *cmd, u32 virt_id)
+{
+ its_mask_encode(&cmd->raw_cmd[2], virt_id, 31, 0);
+}
+
+static void its_encode_db_phys_id(struct its_cmd_block *cmd, u32 db_phys_id)
+{
+ its_mask_encode(&cmd->raw_cmd[2], db_phys_id, 63, 32);
+}
+
+static void its_encode_db_valid(struct its_cmd_block *cmd, bool db_valid)
+{
+ its_mask_encode(&cmd->raw_cmd[2], db_valid, 0, 0);
+}
+
static inline void its_fixup_cmd(struct its_cmd_block *cmd)
{
/* Let's fixup BE commands */
@@ -431,6 +475,50 @@ static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd,
return NULL;
}

+static struct its_vpe *its_build_vmapti_cmd(struct its_cmd_block *cmd,
+ struct its_cmd_desc *desc)
+{
+ u32 db;
+
+ if (desc->its_vmapti_cmd.db_enabled)
+ db = desc->its_vmapti_cmd.vpe->vpe_db_lpi;
+ else
+ db = 1023;
+
+ its_encode_cmd(cmd, GITS_CMD_VMAPTI);
+ its_encode_devid(cmd, desc->its_vmapti_cmd.dev->device_id);
+ its_encode_vpeid(cmd, desc->its_vmapti_cmd.vpe->vpe_id);
+ its_encode_event_id(cmd, desc->its_vmapti_cmd.event_id);
+ its_encode_db_phys_id(cmd, db);
+ its_encode_virt_id(cmd, desc->its_vmapti_cmd.virt_id);
+
+ its_fixup_cmd(cmd);
+
+ return desc->its_vmapti_cmd.vpe;
+}
+
+static struct its_vpe *its_build_vmovi_cmd(struct its_cmd_block *cmd,
+ struct its_cmd_desc *desc)
+{
+ u32 db;
+
+ if (desc->its_vmovi_cmd.db_enabled)
+ db = desc->its_vmovi_cmd.vpe->vpe_db_lpi;
+ else
+ db = 1023;
+
+ its_encode_cmd(cmd, GITS_CMD_VMOVI);
+ its_encode_devid(cmd, desc->its_vmovi_cmd.dev->device_id);
+ its_encode_vpeid(cmd, desc->its_vmovi_cmd.vpe->vpe_id);
+ its_encode_event_id(cmd, desc->its_vmovi_cmd.event_id);
+ its_encode_db_phys_id(cmd, db);
+ its_encode_db_valid(cmd, true);
+
+ its_fixup_cmd(cmd);
+
+ return desc->its_vmovi_cmd.vpe;
+}
+
static u64 its_cmd_ptr_to_offset(struct its_node *its,
struct its_cmd_block *ptr)
{
@@ -576,6 +664,18 @@ static void its_build_sync_cmd(struct its_cmd_block *sync_cmd,
static BUILD_SINGLE_CMD_FUNC(its_send_single_command, its_cmd_builder_t,
struct its_collection, its_build_sync_cmd)

+static void its_build_vsync_cmd(struct its_cmd_block *sync_cmd,
+ struct its_vpe *sync_vpe)
+{
+ its_encode_cmd(sync_cmd, GITS_CMD_VSYNC);
+ its_encode_vpeid(sync_cmd, sync_vpe->vpe_id);
+
+ its_fixup_cmd(sync_cmd);
+}
+
+static BUILD_SINGLE_CMD_FUNC(its_send_single_vcommand, its_cmd_vbuilder_t,
+ struct its_vpe, its_build_vsync_cmd)
+
static void its_send_int(struct its_device *dev, u32 event_id)
{
struct its_cmd_desc desc;
@@ -669,6 +769,33 @@ static void its_send_invall(struct its_node *its, struct its_collection *col)
its_send_single_command(its, its_build_invall_cmd, &desc);
}

+static void its_send_vmapti(struct its_device *dev, u32 id)
+{
+ struct its_vlpi_map *map = &dev->event_map.vlpi_maps[id];
+ struct its_cmd_desc desc;
+
+ desc.its_vmapti_cmd.vpe = map->vm->vpes[map->vpe_idx];
+ desc.its_vmapti_cmd.dev = dev;
+ desc.its_vmapti_cmd.virt_id = map->vintid;
+ desc.its_vmapti_cmd.event_id = id;
+ desc.its_vmapti_cmd.db_enabled = map->db_enabled;
+
+ its_send_single_vcommand(dev->its, its_build_vmapti_cmd, &desc);
+}
+
+static void its_send_vmovi(struct its_device *dev, u32 id)
+{
+ struct its_vlpi_map *map = &dev->event_map.vlpi_maps[id];
+ struct its_cmd_desc desc;
+
+ desc.its_vmovi_cmd.vpe = map->vm->vpes[map->vpe_idx];
+ desc.its_vmovi_cmd.dev = dev;
+ desc.its_vmovi_cmd.event_id = id;
+ desc.its_vmovi_cmd.db_enabled = map->db_enabled;
+
+ its_send_single_vcommand(dev->its, its_build_vmovi_cmd, &desc);
+}
+
/*
* irqchip functions - assumes MSI, mostly.
*/
@@ -781,19 +908,135 @@ static int its_irq_set_irqchip_state(struct irq_data *d,
return 0;
}

+static int its_vlpi_map(struct irq_data *d, struct its_cmd_info *info)
+{
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+ u32 event = its_get_event_id(d);
+ int ret = 0;
+
+ if (!info->map)
+ return -EINVAL;
+
+ mutex_lock(&its_dev->event_map.vlpi_lock);
+
+ if (!its_dev->event_map.vm) {
+ struct its_vlpi_map *maps;
+
+ maps = kzalloc(sizeof(*maps) * its_dev->event_map.nr_lpis,
+ GFP_KERNEL);
+ if (!maps) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ its_dev->event_map.vm = info->map->vm;
+ its_dev->event_map.vlpi_maps = maps;
+ } else if (its_dev->event_map.vm != info->map->vm) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Get our private copy of the mapping information */
+ its_dev->event_map.vlpi_maps[event] = *info->map;
+
+ if (irqd_is_forwarded_to_vcpu(d)) {
+ /* Already mapped, move it around */
+ its_send_vmovi(its_dev, event);
+ } else {
+ /* Drop the physical mapping */
+ its_send_discard(its_dev, event);
+
+ /* and install the virtual one */
+ its_send_vmapti(its_dev, event);
+ irqd_set_forwarded_to_vcpu(d);
+
+ /* Increment the number of VLPIs */
+ its_dev->event_map.nr_vlpis++;
+ }
+
+out:
+ mutex_unlock(&its_dev->event_map.vlpi_lock);
+ return ret;
+}
+
+static int its_vlpi_get(struct irq_data *d, struct its_cmd_info *info)
+{
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+ u32 event = its_get_event_id(d);
+ int ret = 0;
+
+ mutex_lock(&its_dev->event_map.vlpi_lock);
+
+ if (!its_dev->event_map.vm ||
+ !its_dev->event_map.vlpi_maps[event].vm) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Copy our mapping information to the incoming request */
+ *info->map = its_dev->event_map.vlpi_maps[event];
+
+out:
+ mutex_unlock(&its_dev->event_map.vlpi_lock);
+ return ret;
+}
+
+static int its_vlpi_unmap(struct irq_data *d)
+{
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+ u32 event = its_get_event_id(d);
+ int ret = 0;
+
+ mutex_lock(&its_dev->event_map.vlpi_lock);
+
+ if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Drop the virtual mapping */
+ its_send_discard(its_dev, event);
+
+ /* and restore the physical one */
+ irqd_clr_forwarded_to_vcpu(d);
+ its_send_mapti(its_dev, d->hwirq, event);
+ lpi_update_config(d, 0xff, (LPI_PROP_DEFAULT_PRIO |
+ LPI_PROP_ENABLED |
+ LPI_PROP_GROUP1));
+
+ /*
+ * Drop the refcount and make the device available again if
+ * this was the last VLPI.
+ */
+ if (!--its_dev->event_map.nr_vlpis) {
+ its_dev->event_map.vm = NULL;
+ kfree(its_dev->event_map.vlpi_maps);
+ }
+
+out:
+ mutex_unlock(&its_dev->event_map.vlpi_lock);
+ return ret;
+}
+
static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
{
struct its_device *its_dev = irq_data_get_irq_chip_data(d);
struct its_cmd_info *info = vcpu_info;

/* Need a v4 ITS */
- if (!its_dev->its->is_v4 || !info)
+ if (!its_dev->its->is_v4)
return -EINVAL;

+ /* Unmap request? */
+ if (!info)
+ return its_vlpi_unmap(d);
+
switch (info->cmd_type) {
case MAP_VLPI:
+ return its_vlpi_map(d, info);

case GET_VLPI:
+ return its_vlpi_get(d, info);

case PROP_UPDATE_VLPI:
case PROP_UPDATE_AND_INV_VLPI:
@@ -1512,6 +1755,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
dev->event_map.col_map = col_map;
dev->event_map.lpi_base = lpi_base;
dev->event_map.nr_lpis = nr_lpis;
+ mutex_init(&dev->event_map.vlpi_lock);
dev->device_id = dev_id;
INIT_LIST_HEAD(&dev->entry);

--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:10 PM7/31/17
to
When we don't have the DirectLPI feature, we must work around the
architecture shortcomings to be able to perform the required
invalidation.

For this, we create a fake device whose sole purpose is to
provide a way to issue a map/inv/unmap sequence (and the corresponding
sync operations). That's 6 commands and a full serialization point
to be able to do this.

You just have to hope the hypervisor won't do that too often...

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 73 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 70 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index d871d105be9d..210840e0ec92 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -136,6 +136,9 @@ struct its_device {
u32 device_id;
};

+static struct its_device *vpe_proxy_dev;
+static DEFINE_RAW_SPINLOCK(vpe_proxy_dev_lock);
+
static LIST_HEAD(its_nodes);
static DEFINE_SPINLOCK(its_lock);
static struct rdists *gic_rdists;
@@ -2077,6 +2080,14 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev,
msi_info = msi_get_domain_info(domain);
its = msi_info->data;

+ if (its->is_v4 && !gic_rdists->has_direct_lpi &&
+ dev_id == vpe_proxy_dev->device_id) {
+ /* Bad luck. Get yourself a better implementation */
+ WARN_ONCE(1, "DevId %x clashes with GICv4 VPE proxy device\n",
+ dev_id);
+ return -EINVAL;
+ }
+
its_dev = its_find_device(its, dev_id);
if (its_dev) {
/*
@@ -2330,13 +2341,48 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
}
}

+static void its_vpe_send_cmd(struct its_vpe *vpe,
+ void (*cmd)(struct its_device *, u32))
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&vpe_proxy_dev_lock, flags);
+
+ vpe_proxy_dev->event_map.col_map[0] = vpe->col_idx;
+ its_send_mapti(vpe_proxy_dev, vpe->vpe_db_lpi, 0);
+ cmd(vpe_proxy_dev, 0);
+ its_send_discard(vpe_proxy_dev, 0);
+
+ raw_spin_unlock_irqrestore(&vpe_proxy_dev_lock, flags);
+}
+
static void its_vpe_send_inv(struct irq_data *d)
{
struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
- void __iomem *rdbase;

- rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
- gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_INVLPIR);
+ if (gic_rdists->has_direct_lpi) {
+ void __iomem *rdbase;
+
+ rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
+ gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_INVLPIR);
+ } else {
+ /*
+ * This is insane.
+ *
+ * If a GICv4 doesn't implement Direct LPIs (which is
+ * extremely likely), the only way to perform an
+ * invalidate is to use a fake device to issue a
+ * MAP/INV/UNMAP sequence. Since each of these
+ * commands has a sync operation, this is really
+ * fast. Not.
+ *
+ * We always use event 0, and thus serialize all VPE
+ * invalidations in the system.
+ *
+ * Broken by design(tm).
+ */
+ its_vpe_send_cmd(vpe, its_send_inv);
+ }
}

static void its_vpe_mask_irq(struct irq_data *d)
@@ -2638,6 +2684,27 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its)

static int its_init_vpe_domain(void)
{
+ struct its_node *its;
+ u32 devid;
+
+ if (gic_rdists->has_direct_lpi) {
+ pr_info("ITS: Using DirectLPI for VPE invalidation\n");
+ return 0;
+ }
+
+ /* Any ITS will do, even if not v4 */
+ its = list_first_entry(&its_nodes, struct its_node, entry);
+
+ /* Use the last possible DevID */
+ devid = GENMASK(its->device_ids - 1, 0);
+ vpe_proxy_dev = its_create_device(its, devid, 1);
+ if (!vpe_proxy_dev) {
+ pr_err("ITS: Can't allocate GICv4 proxy device\n");
+ return -ENODEV;
+ }
+
+ pr_info("ITS: Allocated DevID %x as GICv4 proxy device\n", devid);

Marc Zyngier

unread,
Jul 31, 2017, 2:00:10 PM7/31/17
to
The whole MSI injection process is fairly monolithic. An MSI write
gets turned into an injected LPI in one swift go. But this is actually
a more fine-grained process:

- First, a virtual ITS gets selected using the doorbell address
- Then the DevID/EventID pair gets translated into an LPI
- Finally the LPI is injected

Since the GICv4 code needs the first two steps in order to match
an IRQ routing entry to an LPI, let's expose them as helpers,
and refactor the existing code to use them

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
virt/kvm/arm/vgic/vgic-its.c | 93 +++++++++++++++++++++++++-------------------
virt/kvm/arm/vgic/vgic.h | 4 ++
2 files changed, 57 insertions(+), 40 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index aa6b68db80b4..fb7be8673bff 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -504,15 +504,8 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
return 0;
}

-/*
- * Find the target VCPU and the LPI number for a given devid/eventid pair
- * and make this IRQ pending, possibly injecting it.
- * Must be called with the its_lock mutex held.
- * Returns 0 on success, a positive error value for any ITS mapping
- * related errors and negative error values for generic errors.
- */
-static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
- u32 devid, u32 eventid)
+int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
+ u32 devid, u32 eventid, struct vgic_irq **irq)
{
struct kvm_vcpu *vcpu;
struct its_ite *ite;
@@ -531,26 +524,60 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
if (!vcpu->arch.vgic_cpu.lpis_enabled)
return -EBUSY;

- spin_lock(&ite->irq->irq_lock);
- ite->irq->pending_latch = true;
- vgic_queue_irq_unlock(kvm, ite->irq);
-
+ *irq = ite->irq;
return 0;
}

-static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
+struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi)
{
+ u64 address;
+ struct kvm_io_device *kvm_io_dev;
struct vgic_io_device *iodev;

- if (dev->ops != &kvm_io_gic_ops)
- return NULL;
+ if (!vgic_has_its(kvm))
+ return ERR_PTR(-ENODEV);

- iodev = container_of(dev, struct vgic_io_device, dev);
+ if (!(msi->flags & KVM_MSI_VALID_DEVID))
+ return ERR_PTR(-EINVAL);
+
+ address = (u64)msi->address_hi << 32 | msi->address_lo;
+
+ kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
+ if (!kvm_io_dev)
+ return ERR_PTR(-EINVAL);

+ if (kvm_io_dev->ops != &kvm_io_gic_ops)
+ return ERR_PTR(-EINVAL);
+
+ iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
if (iodev->iodev_type != IODEV_ITS)
- return NULL;
+ return ERR_PTR(-EINVAL);

- return iodev;
+ return iodev->its;
+}
+
+/*
+ * Find the target VCPU and the LPI number for a given devid/eventid pair
+ * and make this IRQ pending, possibly injecting it.
+ * Must be called with the its_lock mutex held.
+ * Returns 0 on success, a positive error value for any ITS mapping
+ * related errors and negative error values for generic errors.
+ */
+static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
+ u32 devid, u32 eventid)
+{
+ struct vgic_irq *irq = NULL;
+ int err;
+
+ err = vgic_its_resolve_lpi(kvm, its, devid, eventid, &irq);
+ if (err)
+ return err;
+
+ spin_lock(&irq->irq_lock);
+ irq->pending_latch = true;
+ vgic_queue_irq_unlock(kvm, irq);
+
+ return 0;
}

/*
@@ -561,30 +588,16 @@ static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
*/
int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
{
- u64 address;
- struct kvm_io_device *kvm_io_dev;
- struct vgic_io_device *iodev;
+ struct vgic_its *its;
int ret;

- if (!vgic_has_its(kvm))
- return -ENODEV;
-
- if (!(msi->flags & KVM_MSI_VALID_DEVID))
- return -EINVAL;
-
- address = (u64)msi->address_hi << 32 | msi->address_lo;
-
- kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
- if (!kvm_io_dev)
- return -EINVAL;
+ its = vgic_msi_to_its(kvm, msi);
+ if (IS_ERR(its))
+ return PTR_ERR(its);

- iodev = vgic_get_its_iodev(kvm_io_dev);
- if (!iodev)
- return -EINVAL;
-
- mutex_lock(&iodev->its->its_lock);
- ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
- mutex_unlock(&iodev->its->its_lock);
+ mutex_lock(&its->its_lock);
+ ret = vgic_its_trigger_msi(kvm, its, msi->devid, msi->data);
+ mutex_unlock(&its->its_lock);

if (ret < 0)
return ret;
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index da254ae29aec..3002b72d938b 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -225,4 +225,8 @@ int vgic_debug_destroy(struct kvm *kvm);
bool lock_all_vcpus(struct kvm *kvm);
void unlock_all_vcpus(struct kvm *kvm);

+int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
+ u32 devid, u32 eventid, struct vgic_irq **irq);
+struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
+
#endif
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:13 PM7/31/17
to
Add the required interfaces to map, unmap and update a VLPI.

Reviewed-by: Eric Auger <eric....@redhat.com>
Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v4.c | 42 ++++++++++++++++++++++++++++++++++++++
include/linux/irqchip/arm-gic-v4.h | 4 ++++
2 files changed, 46 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
index 86b46fb532ab..6fe2e09172ed 100644
--- a/drivers/irqchip/irq-gic-v4.c
+++ b/drivers/irqchip/irq-gic-v4.c
@@ -96,3 +96,45 @@ int its_invall_vpe(struct its_vpe *vpe)

return its_send_vpe_cmd(vpe, &info);
}
+
+int its_map_vlpi(int irq, struct its_vlpi_map *map)
+{
+ struct its_cmd_info info = {
+ .cmd_type = MAP_VLPI,
+ .map = map,
+ };
+
+ /*
+ * The host will never see that interrupt firing again, so it
+ * is vital that we don't do any lazy masking.
+ */
+ irq_set_status_flags(irq, IRQ_DISABLE_UNLAZY);
+
+ return irq_set_vcpu_affinity(irq, &info);
+}
+
+int its_get_vlpi(int irq, struct its_vlpi_map *map)
+{
+ struct its_cmd_info info = {
+ .cmd_type = GET_VLPI,
+ .map = map,
+ };
+
+ return irq_set_vcpu_affinity(irq, &info);
+}
+
+int its_unmap_vlpi(int irq)
+{
+ irq_clear_status_flags(irq, IRQ_DISABLE_UNLAZY);
+ return irq_set_vcpu_affinity(irq, NULL);
+}
+
+int its_prop_update_vlpi(int irq, u8 config, bool inv)
+{
+ struct its_cmd_info info = {
+ .cmd_type = inv ? PROP_UPDATE_AND_INV_VLPI : PROP_UPDATE_VLPI,
+ .config = config,
+ };
+
+ return irq_set_vcpu_affinity(irq, &info);
+}
diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index faaafd12d8c7..f495d4d1f7d7 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -93,5 +93,9 @@ int its_alloc_vcpu_irqs(struct its_vm *vm);
void its_free_vcpu_irqs(struct its_vm *vm);
int its_schedule_vpe(struct its_vpe *vpe, bool on);
int its_invall_vpe(struct its_vpe *vpe);
+int its_map_vlpi(int irq, struct its_vlpi_map *map);
+int its_get_vlpi(int irq, struct its_vlpi_map *map);
+int its_unmap_vlpi(int irq);
+int its_prop_update_vlpi(int irq, u8 config, bool inv);

#endif
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:14 PM7/31/17
to
Rework LPI deallocation so that it can be reused by the v4 support
code.

Reviewed-by: Eric Auger <eric....@redhat.com>
Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index ebdb1603b95d..1298a9bca56a 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -875,16 +875,15 @@ static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids)
return bitmap;
}

-static void its_lpi_free(struct event_lpi_map *map)
+static void its_lpi_free_chunks(unsigned long *bitmap, int base, int nr_ids)
{
- int base = map->lpi_base;
- int nr_ids = map->nr_lpis;
int lpi;

spin_lock(&lpi_lock);

for (lpi = base; lpi < (base + nr_ids); lpi += IRQS_PER_CHUNK) {
int chunk = its_lpi_to_chunk(lpi);
+
BUG_ON(chunk > lpi_chunks);
if (test_bit(chunk, lpi_bitmap)) {
clear_bit(chunk, lpi_bitmap);
@@ -895,8 +894,7 @@ static void its_lpi_free(struct event_lpi_map *map)

spin_unlock(&lpi_lock);

- kfree(map->lpi_map);
- kfree(map->col_map);
+ kfree(bitmap);
}

static struct page *its_allocate_prop_table(gfp_t gfp_flags)
@@ -1668,7 +1666,10 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq,
/* If all interrupts have been freed, start mopping the floor */
if (bitmap_empty(its_dev->event_map.lpi_map,
its_dev->event_map.nr_lpis)) {
- its_lpi_free(&its_dev->event_map);
+ its_lpi_free_chunks(its_dev->event_map.lpi_map,
+ its_dev->event_map.lpi_base,
+ its_dev->event_map.nr_lpis);
+ kfree(its_dev->event_map.col_map);

/* Unmap device/itt */
its_send_mapd(its_dev, 0);
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:14 PM7/31/17
to
Most ITS commands do operate on a collection object, and require
a SYNC command to be performed on that collection in order to
guarantee the execution of the first command.

With GICv4 ITS, another set of commands perform similar operations
on a VPE object, and a VSYNC operations must be executed to guarantee
their execution.

Given the similarities (post a command, perform a synchronization
operation on a sync object), it makes sense to reuse the same
mechanism for both class of commands.

Let's start with turning its_send_single_command into a huge macro
that performs the bulk of the work, and a set of helpers that
make this macro usable for the GICv3 ITS commands.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 84 ++++++++++++++++++++++------------------
1 file changed, 47 insertions(+), 37 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index e7a7dc9ad585..0f345c0fa700 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -488,43 +488,53 @@ static void its_wait_for_range_completion(struct its_node *its,
}
}

-static void its_send_single_command(struct its_node *its,
- its_cmd_builder_t builder,
- struct its_cmd_desc *desc)
-{
- struct its_cmd_block *cmd, *sync_cmd, *next_cmd;
- struct its_collection *sync_col;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&its->lock, flags);
-
- cmd = its_allocate_entry(its);
- if (!cmd) { /* We're soooooo screewed... */
- pr_err_ratelimited("ITS can't allocate, dropping command\n");
- raw_spin_unlock_irqrestore(&its->lock, flags);
- return;
- }
- sync_col = builder(cmd, desc);
- its_flush_cmd(its, cmd);
-
- if (sync_col) {
- sync_cmd = its_allocate_entry(its);
- if (!sync_cmd) {
- pr_err_ratelimited("ITS can't SYNC, skipping\n");
- goto post;
- }
- its_encode_cmd(sync_cmd, GITS_CMD_SYNC);
- its_encode_target(sync_cmd, sync_col->target_address);
- its_fixup_cmd(sync_cmd);
- its_flush_cmd(its, sync_cmd);
- }
-
-post:
- next_cmd = its_post_commands(its);
- raw_spin_unlock_irqrestore(&its->lock, flags);
-
- its_wait_for_range_completion(its, cmd, next_cmd);
-}
+/* Warning, macro hell follows */
+#define BUILD_SINGLE_CMD_FUNC(name, buildtype, synctype, buildfn) \
+void name(struct its_node *its, \
+ buildtype builder, \
+ struct its_cmd_desc *desc) \
+{ \
+ struct its_cmd_block *cmd, *sync_cmd, *next_cmd; \
+ synctype *sync_obj; \
+ unsigned long flags; \
+ \
+ raw_spin_lock_irqsave(&its->lock, flags); \
+ \
+ cmd = its_allocate_entry(its); \
+ if (!cmd) { /* We're soooooo screewed... */ \
+ raw_spin_unlock_irqrestore(&its->lock, flags); \
+ return; \
+ } \
+ sync_obj = builder(cmd, desc); \
+ its_flush_cmd(its, cmd); \
+ \
+ if (sync_obj) { \
+ sync_cmd = its_allocate_entry(its); \
+ if (!sync_cmd) \
+ goto post; \
+ \
+ buildfn(sync_cmd, sync_obj); \
+ its_flush_cmd(its, sync_cmd); \
+ } \
+ \
+post: \
+ next_cmd = its_post_commands(its); \
+ raw_spin_unlock_irqrestore(&its->lock, flags); \
+ \
+ its_wait_for_range_completion(its, cmd, next_cmd); \
+}
+
+static void its_build_sync_cmd(struct its_cmd_block *sync_cmd,
+ struct its_collection *sync_col)
+{
+ its_encode_cmd(sync_cmd, GITS_CMD_SYNC);
+ its_encode_target(sync_cmd, sync_col->target_address);
+
+ its_fixup_cmd(sync_cmd);
+}
+
+static BUILD_SINGLE_CMD_FUNC(its_send_single_command, its_cmd_builder_t,
+ struct its_collection, its_build_sync_cmd)

static void its_send_inv(struct its_device *dev, u32 event_id)
{
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:16 PM7/31/17
to
When creating a VM, it is very convenient to have an irq domain
containing all the doorbell interrupts associated with that VM
(each interrupt representing a VPE).

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v4.c | 73 ++++++++++++++++++++++++++++++++++++++
include/linux/irqchip/arm-gic-v4.h | 3 ++
2 files changed, 76 insertions(+)
create mode 100644 drivers/irqchip/irq-gic-v4.c

diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
new file mode 100644
index 000000000000..f7fe6181ec8d
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v4.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2016,2017 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.z...@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+
+#include <linux/irqchip/arm-gic-v4.h>
+
+static struct irq_domain *gic_domain;
+static const struct irq_domain_ops *vpe_domain_ops;
+
+int its_alloc_vcpu_irqs(struct its_vm *vm)
+{
+ int vpe_base_irq, i;
+
+ vm->fwnode = irq_domain_alloc_named_id_fwnode("GICv4-vpe",
+ task_pid_nr(current));
+ if (!vm->fwnode)
+ goto err;
+
+ vm->domain = irq_domain_create_hierarchy(gic_domain, 0, vm->nr_vpes,
+ vm->fwnode, vpe_domain_ops,
+ vm);
+ if (!vm->domain)
+ goto err;
+
+ for (i = 0; i < vm->nr_vpes; i++) {
+ vm->vpes[i]->its_vm = vm;
+ vm->vpes[i]->idai = true;
+ }
+
+ vpe_base_irq = __irq_domain_alloc_irqs(vm->domain, -1, vm->nr_vpes,
+ NUMA_NO_NODE, vm,
+ false, NULL);
+ if (vpe_base_irq <= 0)
+ goto err;
+
+ for (i = 0; i < vm->nr_vpes; i++)
+ vm->vpes[i]->irq = vpe_base_irq + i;
+
+ return 0;
+
+err:
+ if (vm->domain)
+ irq_domain_remove(vm->domain);
+ if (vm->fwnode)
+ irq_domain_free_fwnode(vm->fwnode);
+
+ return -ENOMEM;
+}
+
+void its_free_vcpu_irqs(struct its_vm *vm)
+{
+ irq_domain_free_irqs(vm->vpes[0]->irq, vm->nr_vpes);
+ irq_domain_remove(vm->domain);
+ irq_domain_free_fwnode(vm->fwnode);
+}
diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index 5ba0fb6b094d..b31fb8aeba03 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -89,4 +89,7 @@ struct its_cmd_info {
};
};

+int its_alloc_vcpu_irqs(struct its_vm *vm);
+void its_free_vcpu_irqs(struct its_vm *vm);
+
#endif
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:16 PM7/31/17
to
The VCPU tables can be quite sparse as well, and it makes sense
to use indirect tables as well if possible.

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Reviewed-by: Eric Auger <eric....@redhat.com>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 039626d21a73..9a317934455c 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1071,10 +1071,13 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser,
return 0;
}

-static bool its_parse_baser_device(struct its_node *its, struct its_baser *baser,
- u32 psz, u32 *order)
+static bool its_parse_indirect_baser(struct its_node *its,
+ struct its_baser *baser,
+ u32 psz, u32 *order)
{
- u64 esz = GITS_BASER_ENTRY_SIZE(its_read_baser(its, baser));
+ u64 tmp = its_read_baser(its, baser);
+ u64 type = GITS_BASER_TYPE(tmp);
+ u64 esz = GITS_BASER_ENTRY_SIZE(tmp);
u64 val = GITS_BASER_InnerShareable | GITS_BASER_RaWaWb;
u32 ids = its->device_ids;
u32 new_order = *order;
@@ -1113,8 +1116,9 @@ static bool its_parse_baser_device(struct its_node *its, struct its_baser *baser
if (new_order >= MAX_ORDER) {
new_order = MAX_ORDER - 1;
ids = ilog2(PAGE_ORDER_TO_SIZE(new_order) / (int)esz);
- pr_warn("ITS@%pa: Device Table too large, reduce ids %u->%u\n",
- &its->phys_base, its->device_ids, ids);
+ pr_warn("ITS@%pa: %s Table too large, reduce ids %u->%u\n",
+ &its->phys_base, its_base_type_string[type],
+ its->device_ids, ids);
}

*order = new_order;
@@ -1162,11 +1166,16 @@ static int its_alloc_tables(struct its_node *its)
u32 order = get_order(psz);
bool indirect = false;

- if (type == GITS_BASER_TYPE_NONE)
+ switch (type) {
+ case GITS_BASER_TYPE_NONE:
continue;

- if (type == GITS_BASER_TYPE_DEVICE)
- indirect = its_parse_baser_device(its, baser, psz, &order);
+ case GITS_BASER_TYPE_DEVICE:
+ case GITS_BASER_TYPE_VCPU:
+ indirect = its_parse_indirect_baser(its, baser,
+ psz, &order);
+ break;
+ }

err = its_setup_baser(its, baser, cache, shr, psz, order, indirect);
if (err < 0) {
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:18 PM7/31/17
to
The GICv4 support introduces a hard dependency between the KVM
core and the ITS infrastructure. arm64 already selects it at
the architecture level, but 32bit doesn't. In order to avoid
littering the kernel with #ifdefs, let's just select the whole
of the GICv3 suport code.

You know you want it.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
arch/arm/kvm/Kconfig | 2 ++
1 file changed, 2 insertions(+)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 4e2b192a030a..52b50af04e3b 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -23,6 +23,8 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select ARM_GIC
+ select ARM_GIC_V3
+ select ARM_GIC_V3_ITS
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:18 PM7/31/17
to
When a VLPI is reconfigured (enabled, disabled, change in priority),
the full configuration byte must be written, and the caches invalidated.

Also, when using the irq_mask/irq_unmask methods, it is necessary
to disable the doorbell for that particular interrupt (by mapping it
to 1023) on top of clearing the Enable bit.

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Reviewed-by: Eric Auger <eric....@redhat.com>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 75 +++++++++++++++++++++++++++++++++++++---
1 file changed, 70 insertions(+), 5 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index ef31c3e58c01..b618c259be8b 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -806,18 +806,26 @@ static inline u32 its_get_event_id(struct irq_data *d)
return d->hwirq - its_dev->event_map.lpi_base;
}

-static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
+static void lpi_write_config(struct irq_data *d, u8 clr, u8 set)
{
- struct its_device *its_dev = irq_data_get_irq_chip_data(d);
- irq_hw_number_t hwirq = d->hwirq;
+ irq_hw_number_t hwirq;
struct page *prop_page;
u8 *cfg;

- prop_page = gic_rdists->prop_page;
+ if (irqd_is_forwarded_to_vcpu(d)) {
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+ u32 event = its_get_event_id(d);
+
+ prop_page = its_dev->event_map.vm->vprop_page;
+ hwirq = its_dev->event_map.vlpi_maps[event].vintid;
+ } else {
+ prop_page = gic_rdists->prop_page;
+ hwirq = d->hwirq;
+ }

cfg = page_address(prop_page) + hwirq - 8192;
*cfg &= ~clr;
- *cfg |= set;
+ *cfg |= set | LPI_PROP_GROUP1;

/*
* Make the above write visible to the redistributors.
@@ -828,16 +836,52 @@ static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
gic_flush_dcache_to_poc(cfg, sizeof(*cfg));
else
dsb(ishst);
+}
+
+static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
+{
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+
+ lpi_write_config(d, clr, set);
its_send_inv(its_dev, its_get_event_id(d));
}

+static void its_vlpi_set_doorbell(struct irq_data *d, bool enable)
+{
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+ u32 event = its_get_event_id(d);
+
+ if (its_dev->event_map.vlpi_maps[event].db_enabled == enable)
+ return;
+
+ its_dev->event_map.vlpi_maps[event].db_enabled = enable;
+
+ /*
+ * More fun with the architecture:
+ *
+ * Ideally, we'd issue a VMAPTI to set the doorbell to its LPI
+ * value or to 1023, depending on the enable bit. But that
+ * would be issueing a mapping for an /existing/ DevID+EventID
+ * pair, which is UNPREDICTABLE. Instead, let's issue a VMOVI
+ * to the /same/ vPE, using this opportunity to adjust the
+ * doorbell. Mouahahahaha. We loves it, Precious.
+ */
+ its_send_vmovi(its_dev, event);
+}
+
static void its_mask_irq(struct irq_data *d)
{
+ if (irqd_is_forwarded_to_vcpu(d))
+ its_vlpi_set_doorbell(d, false);
+
lpi_update_config(d, LPI_PROP_ENABLED, 0);
}

static void its_unmask_irq(struct irq_data *d)
{
+ if (irqd_is_forwarded_to_vcpu(d))
+ its_vlpi_set_doorbell(d, true);
+
lpi_update_config(d, 0, LPI_PROP_ENABLED);
}

@@ -850,6 +894,10 @@ static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
struct its_collection *target_col;
u32 id = its_get_event_id(d);

+ /* A forwarded interrupt should use irq_set_vcpu_affinity */
+ if (irqd_is_forwarded_to_vcpu(d))
+ return -EINVAL;
+
/* lpi cannot be routed to a redistributor that is on a foreign node */
if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) {
if (its_dev->its->numa_node >= 0) {
@@ -1018,6 +1066,22 @@ static int its_vlpi_unmap(struct irq_data *d)
return ret;
}

+static int its_vlpi_prop_update(struct irq_data *d, struct its_cmd_info *info)
+{
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+
+ if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d))
+ return -EINVAL;
+
+ if (info->cmd_type == PROP_UPDATE_AND_INV_VLPI)
+ lpi_update_config(d, 0xff, info->config);
+ else
+ lpi_write_config(d, 0xff, info->config);
+ its_vlpi_set_doorbell(d, !!(info->config & LPI_PROP_ENABLED));
+
+ return 0;
+}
+
static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
{
struct its_device *its_dev = irq_data_get_irq_chip_data(d);
@@ -1040,6 +1104,7 @@ static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)

case PROP_UPDATE_VLPI:
case PROP_UPDATE_AND_INV_VLPI:
+ return its_vlpi_prop_update(d, info);

default:
return -EINVAL;
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:18 PM7/31/17
to
When creating a VM, the low level GICv4 code is responsible for:
- allocating each VPE a unique VPEID
- allocating a doorbell interrupt for each VPE
- allocating the pending tables for each VPE
- allocating the property table for the VM

This of course has to be reversed when the VM is brought down.

All of this is wired into the irq domain alloc/free methods.

Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 169 +++++++++++++++++++++++++++++++++++++++
1 file changed, 169 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 1fa345d53088..0f9a487448b9 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -148,6 +148,7 @@ static struct irq_domain *its_parent;
#define ITS_LIST_MAX 16

static unsigned long its_list_map;
+static DEFINE_IDA(its_vpeid_ida);

#define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist))
#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
@@ -1249,6 +1250,11 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags)
return prop_page;
}

+static void its_free_prop_table(struct page *prop_page)
+{
+ free_pages((unsigned long)page_address(prop_page),
+ get_order(LPI_PROPBASE_SZ));
+}

static int __init its_alloc_lpi_tables(void)
{
@@ -1551,6 +1557,12 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags)
return pend_page;
}

+static void its_free_pending_table(struct page *pt)
+{
+ free_pages((unsigned long)page_address(pt),
+ get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K)));
+}
+
static void its_cpu_init_lpis(void)
{
void __iomem *rbase = gic_data_rdist_rd_base();
@@ -1773,6 +1785,34 @@ static bool its_alloc_device_table(struct its_node *its, u32 dev_id)
return its_alloc_table_entry(baser, dev_id);
}

+static bool its_alloc_vpe_table(u32 vpe_id)
+{
+ struct its_node *its;
+
+ /*
+ * Make sure the L2 tables are allocated on *all* v4 ITSs. We
+ * could try and only do it on ITSs corresponding to devices
+ * that have interrupts targeted at this VPE, but the
+ * complexity becomes crazy (and you have tons of memory
+ * anyway, right?).
+ */
+ list_for_each_entry(its, &its_nodes, entry) {
+ struct its_baser *baser;
+
+ if (!its->is_v4)
+ continue;
+
+ baser = its_get_baser(its, GITS_BASER_TYPE_VCPU);
+ if (!baser)
+ return false;
+
+ if (!its_alloc_table_entry(baser, vpe_id))
+ return false;
+ }
+
+ return true;
+}
+
static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
int nvecs)
{
@@ -2030,7 +2070,136 @@ static struct irq_chip its_vpe_irq_chip = {
.name = "GICv4-vpe",
};

+static int its_vpe_id_alloc(void)
+{
+ return ida_simple_get(&its_vpeid_ida, 0, 1 << 16, GFP_KERNEL);
+}
+
+static void its_vpe_id_free(u16 id)
+{
+ ida_simple_remove(&its_vpeid_ida, id);
+}
+
+static int its_vpe_init(struct its_vpe *vpe)
+{
+ struct page *vpt_page;
+ int vpe_id;
+
+ /* Allocate vpe_id */
+ vpe_id = its_vpe_id_alloc();
+ if (vpe_id < 0)
+ return vpe_id;
+
+ /* Allocate VPT */
+ vpt_page = its_allocate_pending_table(GFP_KERNEL);
+ if (!vpt_page) {
+ its_vpe_id_free(vpe_id);
+ return -ENOMEM;
+ }
+
+ if (!its_alloc_vpe_table(vpe_id)) {
+ its_vpe_id_free(vpe_id);
+ its_free_pending_table(vpe->vpt_page);
+ return -ENOMEM;
+ }
+
+ vpe->vpe_id = vpe_id;
+ vpe->vpt_page = vpt_page;
+
+ return 0;
+}
+
+static void its_vpe_teardown(struct its_vpe *vpe)
+{
+ its_vpe_id_free(vpe->vpe_id);
+ its_free_pending_table(vpe->vpt_page);
+}
+
+static void its_vpe_irq_domain_free(struct irq_domain *domain,
+ unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct its_vm *vm = domain->host_data;
+ int i;
+
+ irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+
+ for (i = 0; i < nr_irqs; i++) {
+ struct irq_data *data = irq_domain_get_irq_data(domain,
+ virq + i);
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(data);
+
+ BUG_ON(vm != vpe->its_vm);
+
+ clear_bit(data->hwirq, vm->db_bitmap);
+ its_vpe_teardown(vpe);
+ irq_domain_reset_irq_data(data);
+ }
+
+ if (bitmap_empty(vm->db_bitmap, vm->nr_db_lpis)) {
+ its_lpi_free_chunks(vm->db_bitmap, vm->db_lpi_base, vm->nr_db_lpis);
+ its_free_prop_table(vm->vprop_page);
+ }
+}
+
+static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *args)
+{
+ struct its_vm *vm = args;
+ unsigned long *bitmap;
+ struct page *vprop_page;
+ int base, nr_ids, i, err = 0;
+
+ BUG_ON(!vm);
+
+ bitmap = its_lpi_alloc_chunks(nr_irqs, &base, &nr_ids);
+ if (!bitmap)
+ return -ENOMEM;
+
+ if (nr_ids < nr_irqs) {
+ its_lpi_free_chunks(bitmap, base, nr_ids);
+ return -ENOMEM;
+ }
+
+ vprop_page = its_allocate_prop_table(GFP_KERNEL);
+ if (!vprop_page) {
+ its_lpi_free_chunks(bitmap, base, nr_ids);
+ return -ENOMEM;
+ }
+
+ vm->db_bitmap = bitmap;
+ vm->db_lpi_base = base;
+ vm->nr_db_lpis = nr_ids;
+ vm->vprop_page = vprop_page;
+
+ for (i = 0; i < nr_irqs; i++) {
+ vm->vpes[i]->vpe_db_lpi = base + i;
+ err = its_vpe_init(vm->vpes[i]);
+ if (err)
+ break;
+ err = its_irq_gic_domain_alloc(domain, virq + i,
+ vm->vpes[i]->vpe_db_lpi);
+ if (err)
+ break;
+ irq_domain_set_hwirq_and_chip(domain, virq + i, i,
+ &its_vpe_irq_chip, vm->vpes[i]);
+ set_bit(i, bitmap);
+ }
+
+ if (err) {
+ if (i > 0)
+ its_vpe_irq_domain_free(domain, virq, i - 1);
+
+ its_lpi_free_chunks(bitmap, base, nr_ids);
+ its_free_prop_table(vprop_page);
+ }
+
+ return err;
+}
+
static const struct irq_domain_ops its_vpe_domain_ops = {
+ .alloc = its_vpe_irq_domain_alloc,
+ .free = its_vpe_irq_domain_free,
};

static int its_force_quiescent(void __iomem *base)
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:18 PM7/31/17
to
Add the new GICv4 ITS command definitions, most of them, being
defined in terms of their physical counterparts.

Reviewed-by: Eric Auger <eric....@redhat.com>
Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 2 +-
include/linux/irqchip/arm-gic-v3.h | 12 ++++++++++++
2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 7ef448a963c1..bc396ab24037 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Copyright (C) 2013-2017 ARM Limited, All Rights Reserved.
* Author: Marc Zyngier <marc.z...@arm.com>
*
* This program is free software; you can redistribute it and/or modify
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index af8c55105fc2..7c6fd8f3e36c 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -348,6 +348,18 @@
#define GITS_CMD_SYNC 0x05

/*
+ * GICv4 ITS specific commands
+ */
+#define GITS_CMD_GICv4(x) ((x) | 0x20)
+#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL)
+#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC)
+#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI)
+#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI)
+#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC)
+/* VMOVP is the odd one, as it doesn't have a physical counterpart */
+#define GITS_CMD_VMOVP GITS_CMD_GICv4(2)
+
+/*
* ITS error numbers
*/
#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:23 PM7/31/17
to
V{PEND,PROP}BASER being 64bit registers, they need some ad-hoc
accessors on 32bit, specially given that VPENDBASER contains
a Valid bit, making the access a bit convoluted.

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Reviewed-by: Eric Auger <eric....@redhat.com>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
arch/arm/include/asm/arch_gicv3.h | 28 ++++++++++++++++++++++++++++
arch/arm64/include/asm/arch_gicv3.h | 5 +++++
include/linux/irqchip/arm-gic-v3.h | 5 +++++
3 files changed, 38 insertions(+)

diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index 27475904e096..8d45e88feac9 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -291,5 +291,33 @@ static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr)
*/
#define gits_write_cwriter(v, c) __gic_writeq_nonatomic(v, c)

+/*
+ * GITS_VPROPBASER - hi and lo bits may be accessed independently.
+ */
+#define gits_write_vpropbaser(v, c) __gic_writeq_nonatomic(v, c)
+
+/*
+ * GITS_VPENDBASER - the Valid bit must be cleared before changing
+ * anything else.
+ */
+static inline void gits_write_vpendbaser(u64 val, void * __iomem addr)
+{
+ u32 tmp;
+
+ tmp = readl_relaxed(addr + 4);
+ if (tmp & (GICR_VPENDBASER_Valid >> 32)) {
+ tmp &= ~(GICR_VPENDBASER_Valid >> 32);
+ writel_relaxed(tmp, addr + 4);
+ }
+
+ /*
+ * Use the fact that __gic_writeq_nonatomic writes the second
+ * half of the 64bit quantity after the first.
+ */
+ __gic_writeq_nonatomic(val, addr);
+}
+
+#define gits_read_vpendbaser(c) __gic_readq_nonatomic(c)
+
#endif /* !__ASSEMBLY__ */
#endif /* !__ASM_ARCH_GICV3_H */
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 8cef47fa2218..0d2a53457c30 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -133,5 +133,10 @@ static inline void gic_write_bpr1(u32 val)
#define gicr_write_pendbaser(v, c) writeq_relaxed(v, c)
#define gicr_read_pendbaser(c) readq_relaxed(c)

+#define gits_write_vpropbaser(v, c) writeq_relaxed(v, c)
+
+#define gits_write_vpendbaser(v, c) writeq_relaxed(v, c)
+#define gits_read_vpendbaser(c) readq_relaxed(c)
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_ARCH_GICV3_H */
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 7c6fd8f3e36c..17ba0d732f12 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -212,6 +212,11 @@
#define LPI_PROP_GROUP1 (1 << 1)
#define LPI_PROP_ENABLED (1 << 0)

+#define GICR_VPENDBASER_Dirty (1ULL << 60)
+#define GICR_VPENDBASER_PendingLast (1ULL << 61)
+#define GICR_VPENDBASER_IDAI (1ULL << 62)
+#define GICR_VPENDBASER_Valid (1ULL << 63)
+
/*
* ITS registers, offsets from ITS_base
*/
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:00:24 PM7/31/17
to
Move the LPI property table allocation into its own function, as
this is going to be required for those associated with VMs in
the future.

Reviewed-by: Eric Auger <eric....@redhat.com>
Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 31 +++++++++++++++++++++----------
1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 3e4ac8c13e21..039626d21a73 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -899,13 +899,32 @@ static void its_lpi_free(struct event_lpi_map *map)
kfree(map->col_map);
}

+static struct page *its_allocate_prop_table(gfp_t gfp_flags)
+{
+ struct page *prop_page;
+
+ prop_page = alloc_pages(gfp_flags, get_order(LPI_PROPBASE_SZ));
+ if (!prop_page)
+ return NULL;
+
+ /* Priority 0xa0, Group-1, disabled */
+ memset(page_address(prop_page),
+ LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1,
+ LPI_PROPBASE_SZ);
+
+ /* Make sure the GIC will observe the written configuration */
+ gic_flush_dcache_to_poc(page_address(prop_page), LPI_PROPBASE_SZ);
+
+ return prop_page;
+}
+
+
static int __init its_alloc_lpi_tables(void)
{
phys_addr_t paddr;

lpi_id_bits = min_t(u32, gic_rdists->id_bits, ITS_MAX_LPI_NRBITS);
- gic_rdists->prop_page = alloc_pages(GFP_NOWAIT,
- get_order(LPI_PROPBASE_SZ));
+ gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT);
if (!gic_rdists->prop_page) {
pr_err("Failed to allocate PROPBASE\n");
return -ENOMEM;
@@ -914,14 +933,6 @@ static int __init its_alloc_lpi_tables(void)
paddr = page_to_phys(gic_rdists->prop_page);
pr_info("GIC: using LPI property table @%pa\n", &paddr);

- /* Priority 0xa0, Group-1, disabled */
- memset(page_address(gic_rdists->prop_page),
- LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1,
- LPI_PROPBASE_SZ);
-
- /* Make sure the GIC will observe the written configuration */
- gic_flush_dcache_to_poc(page_address(gic_rdists->prop_page), LPI_PROPBASE_SZ);
-
return its_lpi_init(lpi_id_bits);
}

--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:10:08 PM7/31/17
to
When assigning an interrupt to a vcpu, it is not unlikely that
the level of the hierarchy implementing irq_set_vcpu_affinity
is not the top level (think a generic MSI domain on top of a
virtualization aware interrupt controller).

In such a case, let's iterate over the hierarchy until we find
an irqchip implementing it.

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
kernel/irq/manage.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1d1a5b945ab4..573dc52b0806 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -400,8 +400,18 @@ int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info)
return -EINVAL;

data = irq_desc_get_irq_data(desc);
- chip = irq_data_get_irq_chip(data);
- if (chip && chip->irq_set_vcpu_affinity)
+ do {
+ chip = irq_data_get_irq_chip(data);
+ if (chip && chip->irq_set_vcpu_affinity)
+ break;
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+ data = data->parent_data;
+#else
+ data = NULL;
+#endif
+ } while (data);
+
+ if (data)
ret = chip->irq_set_vcpu_affinity(data, vcpu_info);
irq_put_desc_unlock(desc, flags);

--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:10:08 PM7/31/17
to
Add helper functions that probe for VLPI and DirectLPI properties.

Reviewed-by: Eric Auger <eric....@redhat.com>
Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3.c | 24 +++++++++++++++++++++++-
include/linux/irqchip/arm-gic-v3.h | 3 +++
2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 640f621c7f75..17f0ad41864f 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Copyright (C) 2013-2017 ARM Limited, All Rights Reserved.
* Author: Marc Zyngier <marc.z...@arm.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -504,6 +504,24 @@ static int gic_populate_rdist(void)
return -ENODEV;
}

+static int __gic_update_vlpi_properties(struct redist_region *region,
+ void __iomem *ptr)
+{
+ u64 typer = gic_read_typer(ptr + GICR_TYPER);
+ gic_data.rdists.has_vlpis &= !!(typer & GICR_TYPER_VLPIS);
+ gic_data.rdists.has_direct_lpi &= !!(typer & GICR_TYPER_DirectLPIS);
+
+ return 1;
+}
+
+static void gic_update_vlpi_properties(void)
+{
+ gic_iterate_rdists(__gic_update_vlpi_properties);
+ pr_info("%sVLPI support, %sdirect LPI support\n",
+ !gic_data.rdists.has_vlpis ? "no " : "",
+ !gic_data.rdists.has_direct_lpi ? "no " : "");
+}
+
static void gic_cpu_sys_reg_init(void)
{
/*
@@ -968,6 +986,8 @@ static int __init gic_init_bases(void __iomem *dist_base,
gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops,
&gic_data);
gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist));
+ gic_data.rdists.has_vlpis = true;
+ gic_data.rdists.has_direct_lpi = true;

if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) {
err = -ENOMEM;
@@ -976,6 +996,8 @@ static int __init gic_init_bases(void __iomem *dist_base,

set_handle_irq(gic_handle_irq);

+ gic_update_vlpi_properties();
+
if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis())
its_init(handle, &gic_data.rdists, gic_data.domain);

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 6a1f87ff94e2..20a553423ac7 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -204,6 +204,7 @@

#define GICR_TYPER_PLPIS (1U << 0)
#define GICR_TYPER_VLPIS (1U << 1)
+#define GICR_TYPER_DirectLPIS (1U << 3)
#define GICR_TYPER_LAST (1U << 4)

#define GIC_V3_REDIST_SIZE 0x20000
@@ -487,6 +488,8 @@ struct rdists {
struct page *prop_page;
int id_bits;
u64 flags;
+ bool has_vlpis;
+ bool has_direct_lpi;
};

struct irq_domain;
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:10:09 PM7/31/17
to
Allow the pending state of an LPI to be set or cleared via
irq_set_irqchip_state.

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Reviewed-by: Eric Auger <eric....@redhat.com>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3-its.c | 78 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 78 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 0f345c0fa700..3e4ac8c13e21 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -167,6 +167,11 @@ struct its_cmd_desc {
struct {
struct its_device *dev;
u32 event_id;
+ } its_clear_cmd;
+
+ struct {
+ struct its_device *dev;
+ u32 event_id;
} its_int_cmd;

struct {
@@ -380,6 +385,40 @@ static struct its_collection *its_build_inv_cmd(struct its_cmd_block *cmd,
return col;
}

+static struct its_collection *its_build_int_cmd(struct its_cmd_block *cmd,
+ struct its_cmd_desc *desc)
+{
+ struct its_collection *col;
+
+ col = dev_event_to_col(desc->its_int_cmd.dev,
+ desc->its_int_cmd.event_id);
+
+ its_encode_cmd(cmd, GITS_CMD_INT);
+ its_encode_devid(cmd, desc->its_int_cmd.dev->device_id);
+ its_encode_event_id(cmd, desc->its_int_cmd.event_id);
+
+ its_fixup_cmd(cmd);
+
+ return col;
+}
+
+static struct its_collection *its_build_clear_cmd(struct its_cmd_block *cmd,
+ struct its_cmd_desc *desc)
+{
+ struct its_collection *col;
+
+ col = dev_event_to_col(desc->its_clear_cmd.dev,
+ desc->its_clear_cmd.event_id);
+
+ its_encode_cmd(cmd, GITS_CMD_CLEAR);
+ its_encode_devid(cmd, desc->its_clear_cmd.dev->device_id);
+ its_encode_event_id(cmd, desc->its_clear_cmd.event_id);
+
+ its_fixup_cmd(cmd);
+
+ return col;
+}
+
static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd,
struct its_cmd_desc *desc)
{
@@ -536,6 +575,26 @@ static void its_build_sync_cmd(struct its_cmd_block *sync_cmd,
static BUILD_SINGLE_CMD_FUNC(its_send_single_command, its_cmd_builder_t,
struct its_collection, its_build_sync_cmd)

+static void its_send_int(struct its_device *dev, u32 event_id)
+{
+ struct its_cmd_desc desc;
+
+ desc.its_int_cmd.dev = dev;
+ desc.its_int_cmd.event_id = event_id;
+
+ its_send_single_command(dev->its, its_build_int_cmd, &desc);
+}
+
+static void its_send_clear(struct its_device *dev, u32 event_id)
+{
+ struct its_cmd_desc desc;
+
+ desc.its_clear_cmd.dev = dev;
+ desc.its_clear_cmd.event_id = event_id;
+
+ its_send_single_command(dev->its, its_build_clear_cmd, &desc);
+}
+
static void its_send_inv(struct its_device *dev, u32 event_id)
{
struct its_cmd_desc desc;
@@ -702,6 +761,24 @@ static void its_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
iommu_dma_map_msi_msg(d->irq, msg);
}

+static int its_irq_set_irqchip_state(struct irq_data *d,
+ enum irqchip_irq_state which,
+ bool state)
+{
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+ u32 event = its_get_event_id(d);
+
+ if (which != IRQCHIP_STATE_PENDING)
+ return -EINVAL;
+
+ if (state)
+ its_send_int(its_dev, event);
+ else
+ its_send_clear(its_dev, event);
+
+ return 0;
+}
+
static struct irq_chip its_irq_chip = {
.name = "ITS",
.irq_mask = its_mask_irq,
@@ -709,6 +786,7 @@ static struct irq_chip its_irq_chip = {
.irq_eoi = irq_chip_eoi_parent,
.irq_set_affinity = its_set_affinity,
.irq_compose_msi_msg = its_irq_compose_msi_msg,
+ .irq_set_irqchip_state = its_irq_set_irqchip_state,
};

/*
--
2.11.0

Marc Zyngier

unread,
Jul 31, 2017, 2:10:09 PM7/31/17
to
In order to discover the VLPI properties, we need to iterate over
the redistributor regions. As we already have code that does this,
let's factor it out and make it slightly more generic.

Reviewed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Marc Zyngier <marc.z...@arm.com>
---
drivers/irqchip/irq-gic-v3.c | 69 ++++++++++++++++++++++++++++++--------------
1 file changed, 47 insertions(+), 22 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index dbffb7ab6203..640f621c7f75 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -421,24 +421,14 @@ static void __init gic_dist_init(void)
gic_write_irouter(affinity, base + GICD_IROUTER + i * 8);
}

-static int gic_populate_rdist(void)
+static int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *))
{
- unsigned long mpidr = cpu_logical_map(smp_processor_id());
- u64 typer;
- u32 aff;
+ int ret = -ENODEV;
int i;

- /*
- * Convert affinity to a 32bit value that can be matched to
- * GICR_TYPER bits [63:32].
- */
- aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 |
- MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
- MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
- MPIDR_AFFINITY_LEVEL(mpidr, 0));
-
for (i = 0; i < gic_data.nr_redist_regions; i++) {
void __iomem *ptr = gic_data.redist_regions[i].redist_base;
+ u64 typer;
u32 reg;

reg = readl_relaxed(ptr + GICR_PIDR2) & GIC_PIDR2_ARCH_MASK;
@@ -450,15 +440,9 @@ static int gic_populate_rdist(void)

do {
typer = gic_read_typer(ptr + GICR_TYPER);
- if ((typer >> 32) == aff) {
- u64 offset = ptr - gic_data.redist_regions[i].redist_base;
- gic_data_rdist_rd_base() = ptr;
- gic_data_rdist()->phys_base = gic_data.redist_regions[i].phys_base + offset;
- pr_info("CPU%d: found redistributor %lx region %d:%pa\n",
- smp_processor_id(), mpidr, i,
- &gic_data_rdist()->phys_base);
+ ret = fn(gic_data.redist_regions + i, ptr);
+ if (!ret)
return 0;
- }

if (gic_data.redist_regions[i].single_redist)
break;
@@ -473,9 +457,50 @@ static int gic_populate_rdist(void)
} while (!(typer & GICR_TYPER_LAST));
}

+ return ret ? -ENODEV : 0;
+}
+
+static int __gic_populate_rdist(struct redist_region *region, void __iomem *ptr)
+{
+ unsigned long mpidr = cpu_logical_map(smp_processor_id());
+ u64 typer;
+ u32 aff;
+
+ /*
+ * Convert affinity to a 32bit value that can be matched to
+ * GICR_TYPER bits [63:32].
+ */
+ aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 |
+ MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
+ MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
+ MPIDR_AFFINITY_LEVEL(mpidr, 0));
+
+ typer = gic_read_typer(ptr + GICR_TYPER);
+ if ((typer >> 32) == aff) {
+ u64 offset = ptr - region->redist_base;
+ gic_data_rdist_rd_base() = ptr;
+ gic_data_rdist()->phys_base = region->phys_base + offset;
+
+ pr_info("CPU%d: found redistributor %lx region %d:%pa\n",
+ smp_processor_id(), mpidr,
+ (int)(region - gic_data.redist_regions),
+ &gic_data_rdist()->phys_base);
+ return 0;
+ }
+
+ /* Try next one */
+ return 1;
+}
+
+static int gic_populate_rdist(void)
+{
+ if (gic_iterate_rdists(__gic_populate_rdist) == 0)
+ return 0;
+
/* We couldn't even deal with ourselves... */
WARN(true, "CPU%d: mpidr %lx has no re-distributor!\n",
- smp_processor_id(), mpidr);
+ smp_processor_id(),
+ (unsigned long)cpu_logical_map(smp_processor_id()));
return -ENODEV;
}

--
2.11.0

Marc Zyngier

unread,
Aug 4, 2017, 3:50:05 AM8/4/17
to
On 31/07/17 18:26, Marc Zyngier wrote:
> When a vPE is not running, a VLPI being made pending results in a
> doorbell interrupt being delivered. Let's handle this interrupt
> and update the pending_last flag that indicates that VLPIs are
> pending. The corresponding vcpu is also kicked into action.
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
> ---
> virt/kvm/arm/vgic/vgic-v4.c | 34 ++++++++++++++++++++++++++++++++++
> 1 file changed, 34 insertions(+)
>
> diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
> index 534d3051a078..6af3cde6d7d4 100644
> --- a/virt/kvm/arm/vgic/vgic-v4.c
> +++ b/virt/kvm/arm/vgic/vgic-v4.c
> @@ -21,6 +21,19 @@
>
> #include "vgic.h"
>
> +static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
> +{
> + struct kvm_vcpu *vcpu = info;
> +
> + if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
> + vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
> + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
> + kvm_vcpu_kick(vcpu);
> + }

This code is so obviously broken that I completely overlooked it.

If we have take a doorbell interrupt, then it means nothing was
otherwise pending (because we'd have been kicked out of the blocking
state, and will have masked the doorbell). So checking for pending
interrupts is pointless.

Furthermore, calling kvm_vgic_vcpu_pending_irq() takes the ap_list
lock. If we take a doorbell interrupt while injecting a virtual
interrupt (from userspace, for example) on the same CPU, we end-up
in deadlock land. This would be solved by Christoffer's latest
crop of timer patches, but there is no point getting there the first
place.

The patchlet below solves it:

diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 15feb1151797..48e4d6ebeaa8 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -94,11 +94,9 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
{
struct kvm_vcpu *vcpu = info;

- if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
- vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
- kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
- kvm_vcpu_kick(vcpu);
- }
+ vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
+ kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
+ kvm_vcpu_kick(vcpu);

return IRQ_HANDLED;
}

and I've queued it for the next round.

Thanks,

M.
--
Jazz is not dead. It just smells funny...

Christoffer Dall

unread,
Aug 26, 2017, 3:50:08 PM8/26/17
to
On Mon, Jul 31, 2017 at 06:26:14PM +0100, Marc Zyngier wrote:
> The GICv4 support introduces a hard dependency between the KVM
> core and the ITS infrastructure. arm64 already selects it at
> the architecture level, but 32bit doesn't. In order to avoid
> littering the kernel with #ifdefs, let's just select the whole
> of the GICv3 suport code.
>
> You know you want it.

Acked-by: Christoffer Dall <cd...@linaro.org>

Christoffer Dall

unread,
Aug 26, 2017, 3:50:08 PM8/26/17
to
Reviewed-by: Christoffer Dall <cd...@linaro.org>

Christoffer Dall

unread,
Aug 26, 2017, 3:50:08 PM8/26/17
to
On Mon, Jul 31, 2017 at 06:26:19PM +0100, Marc Zyngier wrote:
> Let's use the irq bypass mechanism introduced for platform device
> interrupts to intercept the virtual PCIe endpoint configuration
> and establish our LPI->VLPI mapping.
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
> ---
> include/kvm/arm_vgic.h | 8 ++++
> virt/kvm/arm/arm.c | 27 ++++++++----
> virt/kvm/arm/vgic/vgic-v4.c | 103 ++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 130 insertions(+), 8 deletions(-)
>
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 359eeffe9857..050f78d4fb42 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -367,4 +367,12 @@ int kvm_vgic_set_forwarding(struct kvm *kvm, unsigned int host_irq,
> void kvm_vgic_unset_forwarding(struct kvm *kvm, unsigned int host_irq,
> unsigned int vintid);
>
> +struct kvm_kernel_irq_routing_entry;
> +
> +int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
> + struct kvm_kernel_irq_routing_entry *irq_entry);
> +
> +int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
> + struct kvm_kernel_irq_routing_entry *irq_entry);
> +
> #endif /* __KVM_ARM_VGIC_H */
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index ebab6c29e3be..6803ea27c47d 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -1457,11 +1457,16 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
> struct kvm_kernel_irqfd *irqfd =
> container_of(cons, struct kvm_kernel_irqfd, consumer);
>
> - if (prod->type != IRQ_BYPASS_VFIO_PLATFORM)
> + switch (prod->type) {
> + case IRQ_BYPASS_VFIO_PLATFORM:
> + return kvm_vgic_set_forwarding(irqfd->kvm, prod->irq,
> + irqfd->gsi + VGIC_NR_PRIVATE_IRQS);
> + case IRQ_BYPASS_VFIO_PCI_MSI:
> + return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
> + &irqfd->irq_entry);
> + default:
> return 0;
> -
> - return kvm_vgic_set_forwarding(irqfd->kvm, prod->irq,
> - irqfd->gsi + VGIC_NR_PRIVATE_IRQS);
> + }
> }
> void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
> struct irq_bypass_producer *prod)
> @@ -1469,11 +1474,17 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
> struct kvm_kernel_irqfd *irqfd =
> container_of(cons, struct kvm_kernel_irqfd, consumer);
>
> - if (prod->type != IRQ_BYPASS_VFIO_PLATFORM)
> - return;
> + switch (prod->type) {
> + case IRQ_BYPASS_VFIO_PLATFORM:
> + kvm_vgic_unset_forwarding(irqfd->kvm, prod->irq,
> + irqfd->gsi + VGIC_NR_PRIVATE_IRQS);
> + break;
>
> - kvm_vgic_unset_forwarding(irqfd->kvm, prod->irq,
> - irqfd->gsi + VGIC_NR_PRIVATE_IRQS);
> + case IRQ_BYPASS_VFIO_PCI_MSI:
> + kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
> + &irqfd->irq_entry);
> + break;
> + }
> }
>
> void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
> diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
> index 207e1fda0dcd..338c86c5159f 100644
> --- a/virt/kvm/arm/vgic/vgic-v4.c
> +++ b/virt/kvm/arm/vgic/vgic-v4.c
> @@ -72,3 +72,106 @@ void vgic_v4_teardown(struct kvm *kvm)
> its_vm->nr_vpes = 0;
> its_vm->vpes = NULL;
> }
> +
> +static struct vgic_its *vgic_get_its(struct kvm *kvm,
> + struct kvm_kernel_irq_routing_entry *irq_entry)
> +{
> + struct kvm_msi msi = (struct kvm_msi) {
> + .address_lo = irq_entry->msi.address_lo,
> + .address_hi = irq_entry->msi.address_hi,
> + .data = irq_entry->msi.data,
> + .flags = irq_entry->msi.flags,
> + .devid = irq_entry->msi.devid,
> + };
> +
> + /*
> + * Get a reference on the LPI. If NULL, this is not a valid
> + * translation for any of our vITSs.
> + */
> + return vgic_msi_to_its(kvm, &msi);
> +}
> +
> +int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
> + struct kvm_kernel_irq_routing_entry *irq_entry)
> +{
> + struct vgic_its *its;
> + struct vgic_irq *irq;
> + struct its_vlpi_map map;
> + int ret;
> +
> + if (!vgic_is_v4_capable(kvm))
> + return 0;
> +
> + /*
> + * Get the ITS, and escape early on error (not a valid
> + * doorbell for any of our vITSs).
> + */
> + its = vgic_get_its(kvm, irq_entry);
> + if (IS_ERR(its))
> + return 0;
> +
> + mutex_lock(&its->its_lock);
> +
> + /* Perform then actual DevID/EventID -> LPI translation. */
> + ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
> + irq_entry->msi.data, &irq);
> + if (ret)
> + goto out;
> +
> + /*
> + * Emit the mapping request. If it fails, the ITS probably
> + * isn't v4 compatible, so let's silently bail out. Holding
> + * the ITS lock should ensure that nothing can modify the
> + * target vcpu.
> + */
> + map = (struct its_vlpi_map) {
> + .vm = &kvm->arch.vgic.its_vm,
> + .vintid = irq->intid,
> + .db_enabled = true,
> + .vpe_idx = irq->target_vcpu->vcpu_id,
> + };
> +
> + if (its_map_vlpi(virq, &map))
> + goto out;

This seems to be able to return things like -ENOMEM, whould we really
not report this back to the caller in any way?

> +
> + irq->hw = true;
> + irq->host_irq = virq;
> +
> +out:
> + mutex_unlock(&its->its_lock);
> + return 0;
> +}
> +
> +int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
> + struct kvm_kernel_irq_routing_entry *irq_entry)
> +{
> + struct vgic_its *its;
> + struct vgic_irq *irq;
> + int ret;
> +
> + if (!vgic_is_v4_capable(kvm))
> + return 0;
> +
> + /*
> + * Get the ITS, and escape early on error (not a valid
> + * doorbell for any of our vITSs).
> + */
> + its = vgic_get_its(kvm, irq_entry);
> + if (IS_ERR(its))
> + return 0;
> +
> + mutex_lock(&its->its_lock);
> +
> + ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
> + irq_entry->msi.data, &irq);
> + if (ret)
> + goto out;
> +
> + WARN_ON(!(irq->hw && irq->host_irq == virq));
> + irq->hw = false;
> + ret = its_unmap_vlpi(virq);
> +
> +out:
> + mutex_unlock(&its->its_lock);
> + return ret;
> +}
> --
> 2.11.0
>
Otherwise looks good to me.

Thanks,
-Christoffer

Christoffer Dall

unread,
Aug 28, 2017, 2:20:06 PM8/28/17
to
On Mon, Jul 31, 2017 at 06:26:24PM +0100, Marc Zyngier wrote:
> The current implementation of MOVALL doesn't allow us to call
> into the core ITS code as we hold a number of spinlocks.
>
> Let's try a method used in other parts of the code, were we copy
> the intids of the candicate interrupts, and then do whatever
> we need to do with them outside of the critical section.
>
> This allows us to move the interrupts one by one, at the expense
> of a bit of CPU time. Who cares? MOVALL is such a stupid command
> anyway...
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
> ---
> virt/kvm/arm/vgic/vgic-its.c | 27 ++++++++++++++++++++-------
> 1 file changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
> index 2c065c970ba0..65cc77fde609 100644
> --- a/virt/kvm/arm/vgic/vgic-its.c
> +++ b/virt/kvm/arm/vgic/vgic-its.c
> @@ -1147,11 +1147,12 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
> static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
> u64 *its_cmd)
> {
> - struct vgic_dist *dist = &kvm->arch.vgic;
> u32 target1_addr = its_cmd_get_target_addr(its_cmd);
> u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32);
> struct kvm_vcpu *vcpu1, *vcpu2;
> struct vgic_irq *irq;
> + u32 *intids;
> + int irq_count, i;
>
> if (target1_addr >= atomic_read(&kvm->online_vcpus) ||
> target2_addr >= atomic_read(&kvm->online_vcpus))
> @@ -1163,19 +1164,31 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
> vcpu1 = kvm_get_vcpu(kvm, target1_addr);
> vcpu2 = kvm_get_vcpu(kvm, target2_addr);
>
> - spin_lock(&dist->lpi_list_lock);
> + irq_count = vgic_copy_lpi_list(vcpu1, &intids);
> + if (irq_count < 0)
> + return irq_count;
>
> - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
> - spin_lock(&irq->irq_lock);
> + for (i = 0; i < irq_count; i++) {
> + irq = vgic_get_irq(kvm, NULL, intids[i]);
> + if (!irq)
> + continue;

Getting irq == NULL means that we've removed this LPI since
vgic_copy_lpi_list, right? Can this really happen while we hold the its
mutex?

Also, we don't check this in its_sync_lpi_pending_table which would
indicate that we either have a bug there or are being overly careful
here (or should change the continue to BUG).


>
> if (irq->target_vcpu == vcpu1)
> irq->target_vcpu = vcpu2;
>
> - spin_unlock(&irq->irq_lock);

Is it safe to modify target_vcpu without holding the irq_lock?

> - }
> + if (irq->hw) {
> + struct its_vlpi_map map;
>
> - spin_unlock(&dist->lpi_list_lock);
> + if (!its_get_vlpi(irq->host_irq, &map)) {
> + map.vpe_idx = vcpu2->vcpu_id;
> + its_map_vlpi(irq->host_irq, &map);
> + }
> + }
>
> + vgic_put_irq(kvm, irq);
> + }
> +
> + kfree(intids);
> return 0;
> }
>
> --
> 2.11.0
>

Thanks,
-Christoffer

Christoffer Dall

unread,
Aug 28, 2017, 2:20:09 PM8/28/17
to
On Mon, Jul 31, 2017 at 06:26:28PM +0100, Marc Zyngier wrote:
> When a vPE exits, the pending_last flag is set when there are
> pending VLPIs stored in the pending table. Similarily, we set
> this flag when a doorbell interrupt fires, as it indicates the
> same condition.
>
> Let's update kvm_vgic_vcpu_pending_irq() to account for that
> flag as well, making a vcpu runnable when set.
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
> ---
> virt/kvm/arm/vgic/vgic.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index a96e566905d2..dfac894f6f03 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -781,6 +781,9 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
> if (!vcpu->kvm->arch.vgic.enabled)
> return false;
>
> + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
> + return true;
> +
> spin_lock(&vgic_cpu->ap_list_lock);
>
> list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
> --
> 2.11.0
>

Acked-by: Christoffer Dall <cd...@linaro.org>

Christoffer Dall

unread,
Aug 28, 2017, 2:20:09 PM8/28/17
to
On Mon, Jul 31, 2017 at 06:26:30PM +0100, Marc Zyngier wrote:
> The doorbell interrupt is only useful if the vcpu is blocked on WFI.
> In all other cases, recieving a doorbell interrupt is just a waste
> of cycles.
>
> So let's only enable the doorbell if a vcpu is getting blocked,
> and disable it when it is unblocked. This is very similar to
> what we're doing for the background timer.
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
> ---
> include/kvm/arm_vgic.h | 3 +++
> virt/kvm/arm/arm.c | 2 ++
> virt/kvm/arm/vgic/vgic-v4.c | 32 +++++++++++++++++++++++++++++++-
> 3 files changed, 36 insertions(+), 1 deletion(-)
>
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 050f78d4fb42..7b1cc7ff5b42 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -375,4 +375,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
> int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
> struct kvm_kernel_irq_routing_entry *irq_entry);
>
> +void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu);
> +void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu);
> +
> #endif /* __KVM_ARM_VGIC_H */
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 6803ea27c47d..5dbc8dc8fbf4 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -316,11 +316,13 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
> void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
> {
> kvm_timer_schedule(vcpu);
> + kvm_vgic_v4_enable_doorbell(vcpu);
> }
>
> void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
> {
> kvm_timer_unschedule(vcpu);
> + kvm_vgic_v4_disable_doorbell(vcpu);
> }
>
> int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
> index 6af3cde6d7d4..50721c4e3da5 100644
> --- a/virt/kvm/arm/vgic/vgic-v4.c
> +++ b/virt/kvm/arm/vgic/vgic-v4.c
> @@ -16,6 +16,7 @@
> */
>
> #include <linux/interrupt.h>
> +#include <linux/irq.h>
> #include <linux/irqdomain.h>
> #include <linux/kvm_host.h>
>
> @@ -73,6 +74,14 @@ int vgic_v4_init(struct kvm *kvm)
> kvm_for_each_vcpu(i, vcpu, kvm) {
> int irq = dist->its_vm.vpes[i]->irq;
>
> + /*
> + * Don't automatically enable the doorbell, as we're
> + * flipping it back and forth when the vcpu gets
> + * blocked. Also disable the lazy disabling, as the
> + * doorbell could kick us out of the guest too
> + * early...
> + */
> + irq_set_status_flags(irq, IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY);
> ret = request_irq(irq, vgic_v4_doorbell_handler,
> 0, "vcpu", vcpu);
> if (ret) {
> @@ -98,7 +107,10 @@ void vgic_v4_teardown(struct kvm *kvm)
>
> for (i = 0; i < its_vm->nr_vpes; i++) {
> struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i);
> - free_irq(its_vm->vpes[i]->irq, vcpu);
> + int irq = its_vm->vpes[i]->irq;
> +
> + irq_clear_status_flags(irq, IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY);
> + free_irq(irq, vcpu);
> }
>
> its_free_vcpu_irqs(its_vm);
> @@ -211,3 +223,21 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
> mutex_unlock(&its->its_lock);
> return ret;
> }
> +
> +void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu)
> +{
> + if (vgic_is_v4_capable(vcpu->kvm)) {
> + int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
> + if (irq)
> + enable_irq(irq);
> + }
> +}
> +
> +void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu)
> +{
> + if (vgic_is_v4_capable(vcpu->kvm)) {
> + int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
> + if (irq)
> + disable_irq(irq);
> + }
> +}

Christoffer Dall

unread,
Aug 28, 2017, 2:20:09 PM8/28/17
to
On Mon, Jul 31, 2017 at 06:26:29PM +0100, Marc Zyngier wrote:
> When a vPE is not running, a VLPI being made pending results in a
> doorbell interrupt being delivered. Let's handle this interrupt
> and update the pending_last flag that indicates that VLPIs are
> pending. The corresponding vcpu is also kicked into action.
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
> ---
> virt/kvm/arm/vgic/vgic-v4.c | 34 ++++++++++++++++++++++++++++++++++
> 1 file changed, 34 insertions(+)
>
> diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
> index 534d3051a078..6af3cde6d7d4 100644
> --- a/virt/kvm/arm/vgic/vgic-v4.c
> +++ b/virt/kvm/arm/vgic/vgic-v4.c
> @@ -21,6 +21,19 @@
>
> #include "vgic.h"
>
> +static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
> +{
> + struct kvm_vcpu *vcpu = info;
> +
> + if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
> + vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
> + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
> + kvm_vcpu_kick(vcpu);
> + }

Can this ever fire while vgic_v4_init() is running and before te rest of
the system has been properly initialized with some entertaining results
to follow? (I'm not sure if spurious doorbell non-resident vPE
interrupts is a thing or not).

> +
> + return IRQ_HANDLED;
> +}
> +
> int vgic_v4_init(struct kvm *kvm)
> {
> struct vgic_dist *dist = &kvm->arch.vgic;
> @@ -57,16 +70,37 @@ int vgic_v4_init(struct kvm *kvm)
> return ret;
> }
>
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + int irq = dist->its_vm.vpes[i]->irq;
> +
> + ret = request_irq(irq, vgic_v4_doorbell_handler,
> + 0, "vcpu", vcpu);
> + if (ret) {
> + kvm_err("failed to allocate vcpu IRQ%d\n", irq);
> + dist->its_vm.nr_vpes = i;

That's a neat trick for the error handling. Might deserve a tiny
comment.

> + break;
> + }
> + }
> +
> + if (ret)
> + vgic_v4_teardown(kvm);
> +
> return ret;
> }
>
> void vgic_v4_teardown(struct kvm *kvm)
> {
> struct its_vm *its_vm = &kvm->arch.vgic.its_vm;
> + int i;
>
> if (!its_vm->vpes)
> return;
>
> + for (i = 0; i < its_vm->nr_vpes; i++) {
> + struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i);
> + free_irq(its_vm->vpes[i]->irq, vcpu);
> + }
> +
> its_free_vcpu_irqs(its_vm);
> kfree(its_vm->vpes);
> its_vm->nr_vpes = 0;
> --
> 2.11.0
>

Thanks,
-Christoffer

Christoffer Dall

unread,
Aug 28, 2017, 2:20:09 PM8/28/17
to
On Mon, Jul 31, 2017 at 06:26:35PM +0100, Marc Zyngier wrote:
> Yet another braindump so I can free some cells...
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
> ---
> virt/kvm/arm/vgic/vgic-v4.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 68 insertions(+)
>
> diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
> index 0a8deefbcf1c..0c002d2be620 100644
> --- a/virt/kvm/arm/vgic/vgic-v4.c
> +++ b/virt/kvm/arm/vgic/vgic-v4.c
> @@ -22,6 +22,74 @@
>
> #include "vgic.h"
>
> +/*
> + * How KVM uses GICv4 (insert rude comments here):
> + *
> + * The vgic-v4 layer acts as a bridge between several entities:
> + * - The GICv4 ITS representation offered by the ITS driver
> + * - VFIO, which is in charge of the PCI endpoint
> + * - The virtual ITS, which is the only thing the guest sees
> + *
> + * The configuration of VLPIs is triggered by a callback from VFIO,
> + * instructing KVM that a PCI device has been configured to deliver
> + * MSIs to a vITS.
> + *
> + * kvm_vgic_v4_set_forwarding() is thus called with the routing entry,
> + * and this is used to find the corresponding vITS data structures
> + * (ITS instance, device, event and irq) using a process that is
> + * extremely similar to the injection of an MSI.
> + *
> + * At this stage, we can link the guest's view of an LPI (uniquely
> + * identified by the routing entry) and the host irq, using the GICv4
> + * driver mapping operation. Should the mapping succeed, we've then
> + * successfully upgraded the guest's LPI to a VLPI. We can then start
> + * with updating GICv4's view of the property table and generating an
> + * INValidation in order to kickstart the delivery of this VLPI to the
> + * guest directly, without software intervention. Well, almost.
> + *
> + * When the PCI endpoint is deconfigured, this operation is reversed
> + * with VFIO calling kvm_vgic_v4_unset_forwarding().
> + *
> + * Once the VLPI has been mapped, it needs to follow any change the
> + * guest performs on its LPI through the vITS. For that, a number of
> + * command handlers have hooks to communicate these changes to the HW:
> + * - Any invalidation triggers a call to its_prop_update_vlpi()
> + * - The INT command results in a irq_set_irqchip_state(), which
> + * generates an INT on the corresponding VLPI.
> + * - The CLEAR command results in a irq_set_irqchip_state(), which
> + * generates an CLEAR on the corresponding VLPI.
> + * - DISCARD translates into an unmap, similar to a call to
> + * kvm_vgic_v4_unset_forwarding().

So is VFIO notified of this or does it still think the IRQ is
forwarded? Or does it not care, and it's state maintained by the irq
subsystem?

> + * - MOVI is translated by an update of the existing mapping, changing
> + * the target vcpu, resulting in a VMOVI being generated.
> + * - MOVALL is translated by a string of mapping updates (similar to
> + * the handling of MOVI). MOVALL is horrible.
> + *
> + * Note that a DISCARD/MAPTI sequence emitted from the guest without
> + * reprogramming the PCI endpoint after MAPTI does not result in a
> + * VLPI being mapped, as there is no callback from VFIO (the guest
> + * will get the interrupt via the normal SW injection). Fixing this is
> + * not trivial, and requires some horrible messing with the VFIO
> + * internals. Not fun. Don't do that.

Is there not a quick way to check with VFIO or the irq subsystem if this
interrupt can be forwarded and attempt that when handling the MAPTI in
the vTIS, or does this break in horrible ways?

> + *
> + * Then there is the scheduling. Each time a vcpu is about to run on a
> + * physical CPU, KVM must tell the corresponding redistributor about
> + * it. And if we've migrated our vcpu from one CPU to another, we must
> + * tell the ITS (so that the messages reach the right redistributor).
> + * This is done in two steps: first issue a irq_set_affinity() on the
> + * irq corresponding to the vcpu, then call its_schedule_vpe(). You
> + * must be in a non-preemptible context. On exit, another call to
> + * its_schedule_vpe() tells the redistributor that we're done with the
> + * vcpu.
> + *
> + * Finally, the doorbell handling: Each vcpu is allocated an interrupt
> + * which will fire each time a VLPI is made pending whilst the vcpu is
> + * not running. Each time the vcpu gets blocked, the doorbell
> + * interrupt gets enabled. When the vcpu is unblocked (for whatever
> + * reason), the doorbell interrupt is disabled. This behaviour is
> + * pretty similar to that of the backgroud timer.

*background

However, I think you can probably drop the last sentence in interest of
clarity.

> + */
> +
> static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
> {
> struct kvm_vcpu *vcpu = info;
> --
> 2.11.0
>

Thanks for an otherwise excellent (and of course entertaining) writeup!
-Christoffer

Christoffer Dall

unread,
Aug 28, 2017, 2:20:09 PM8/28/17
to
On Mon, Jul 31, 2017 at 06:26:31PM +0100, Marc Zyngier wrote:
> The redistributor needs to be told which vPE is about to be run,
> and tells us whether there is any pending VLPI on exit.
>
> Let's add the scheduling calls to the vgic flush/sync functions,
> allowing the VLPIs to be delivered to the guest.
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
> ---
> virt/kvm/arm/vgic/vgic-v4.c | 24 ++++++++++++++++++++++++
> virt/kvm/arm/vgic/vgic.c | 4 ++++
> virt/kvm/arm/vgic/vgic.h | 1 +
> 3 files changed, 29 insertions(+)
>
> diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
> index 50721c4e3da5..0a8deefbcf1c 100644
> --- a/virt/kvm/arm/vgic/vgic-v4.c
> +++ b/virt/kvm/arm/vgic/vgic-v4.c
> @@ -119,6 +119,30 @@ void vgic_v4_teardown(struct kvm *kvm)
> its_vm->vpes = NULL;
> }
>
> +int vgic_v4_schedule(struct kvm_vcpu *vcpu, bool on)
> +{
> + int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
> +
> + if (!vgic_is_v4_capable(vcpu->kvm) || !irq)
> + return 0;

why do we need to check the its_vpe.irq here? This check is certainly
not untuitive, as I don't understand what happened on a v4 capable
system that somehow failed. Is it because a specific VM is configured
to not use VLPIs, or?

> +
> + /*
> + * Before making the VPE resident, make sure the redistributor
> + * expects us here.
> + */
> + if (on) {
> + int err;
> +
> + err = irq_set_affinity(irq, cpumask_of(smp_processor_id()));

This is pretty unintuitive, and coming here without having read your
documentation may make people completely puzzled. Could we provide a
pointer to the documentation that explains how the vpe irq hooks this
all together?

> + if (err) {
> + kvm_err("failed irq_set_affinity IRQ%d (%d)\n", irq, err);
> + return err;
> + }
> + }
> +
> + return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, on);
> +}
> +

I'd prefer this function be split into two and follow the vgic notation
of having a flush and a sync function.

> static struct vgic_its *vgic_get_its(struct kvm *kvm,
> struct kvm_kernel_irq_routing_entry *irq_entry)
> {
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index dfac894f6f03..9ab52108989d 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -721,6 +721,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
> {
> struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>
> + WARN_ON(vgic_v4_schedule(vcpu, false));
> +

This is in the critical path, so would it be worth considering a static
key to cater for non-GICv4 systems here?

> /* An empty ap_list_head implies used_lrs == 0 */
> if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
> return;
> @@ -733,6 +735,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
> /* Flush our emulation state into the GIC hardware before entering the guest. */
> void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
> {
> + WARN_ON(vgic_v4_schedule(vcpu, true));
> +
> /*
> * If there are no virtual interrupts active or pending for this
> * VCPU, then there is no work to do and we can bail out without
> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
> index 1210bf4681dc..693b654acf4d 100644
> --- a/virt/kvm/arm/vgic/vgic.h
> +++ b/virt/kvm/arm/vgic/vgic.h
> @@ -234,5 +234,6 @@ int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
> bool vgic_is_v4_capable(struct kvm *kvm);
> int vgic_v4_init(struct kvm *kvm);
> void vgic_v4_teardown(struct kvm *kvm);
> +int vgic_v4_schedule(struct kvm_vcpu *vcpu, bool on);
>
> #endif
> --
> 2.11.0
>
Functionally, this looks correct.

Thanks,
-Christoffer

Christoffer Dall

unread,
Aug 28, 2017, 2:20:09 PM8/28/17
to
I don't think you need the request and kick, because if you're getting
this doorbell, doesn't that also mean that the VCPU is not running in
the guest and you simply need to make sure the VCPU thread gets
scheduled again, so you could call kvm_vcpu_wake_up() instead.

Unless the request is there to ensure proper memory barriers around
setting pending_last?

In that case, is the read barrier taken care of by prepare_to_swait in
kvm_vcpu_block()?

Drew?


Thanks,
-Christoffer

Christoffer Dall

unread,
Aug 28, 2017, 2:20:11 PM8/28/17
to
On Mon, Jul 31, 2017 at 06:26:22PM +0100, Marc Zyngier wrote:
> When the guest issues a MOVI, we need to tell the physical ITS
> that we're now targetting a new vcpu. This is done by extracting
> the current mapping, updating the target, and reapplying the
> mapping. The core ITS code should do the right thing.
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
> ---
> virt/kvm/arm/vgic/vgic-its.c | 13 +++++++++++++
> 1 file changed, 13 insertions(+)
>
> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
> index 79bac93d3e7d..aaad577ce328 100644
> --- a/virt/kvm/arm/vgic/vgic-its.c
> +++ b/virt/kvm/arm/vgic/vgic-its.c
> @@ -706,6 +706,19 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
> ite->irq->target_vcpu = vcpu;
> spin_unlock(&ite->irq->irq_lock);
>
> + if (ite->irq->hw) {
> + struct its_vlpi_map map;
> + int ret;
> +
> + ret = its_get_vlpi(ite->irq->host_irq, &map);
> + if (ret)
> + return ret;
> +
> + map.vpe_idx = vcpu->vcpu_id;
> +
> + return its_map_vlpi(ite->irq->host_irq, &map);

Since you're not holding the irq_lock across these two calls, would it
be possible that the forwarding was removed through some other call path
here, and could you end up passing an invalid host_irq to its_map_vlpi?

Thanks,
-Christoffer

> + }
> +
> return 0;
> }
>
> --
> 2.11.0
>

Christoffer Dall

unread,
Aug 28, 2017, 2:20:15 PM8/28/17
to
On Mon, Jul 31, 2017 at 06:26:26PM +0100, Marc Zyngier wrote:
> Since when updating the properties one LPI at a time, there is no
> need to perform an INV each time we read one. Instead, we rely
> on the final VINVALL that gets sent to the ITS to do the work.
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>

Acked-by: Christoffer Dall <cd...@linaro.org>

> ---
> virt/kvm/arm/vgic/vgic-its.c | 15 +++++++++------
> 1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
> index 9e46081e5f15..b395df1bf47c 100644
> --- a/virt/kvm/arm/vgic/vgic-its.c
> +++ b/virt/kvm/arm/vgic/vgic-its.c
> @@ -38,7 +38,7 @@ static int vgic_its_save_tables_v0(struct vgic_its *its);
> static int vgic_its_restore_tables_v0(struct vgic_its *its);
> static int vgic_its_commit_v0(struct vgic_its *its);
> static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
> - struct kvm_vcpu *filter_vcpu);
> + struct kvm_vcpu *filter_vcpu, bool needs_inv);
>
> /*
> * Creates a new (reference to a) struct vgic_irq for a given LPI.
> @@ -106,7 +106,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
> * However we only have those structs for mapped IRQs, so we read in
> * the respective config data from memory here upon mapping the LPI.
> */
> - ret = update_lpi_config(kvm, irq, NULL);
> + ret = update_lpi_config(kvm, irq, NULL, false);
> if (ret)
> return ERR_PTR(ret);
>
> @@ -274,7 +274,7 @@ static struct its_collection *find_collection(struct vgic_its *its, int coll_id)
> * VCPU. Unconditionally applies if filter_vcpu is NULL.
> */
> static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
> - struct kvm_vcpu *filter_vcpu)
> + struct kvm_vcpu *filter_vcpu, bool needs_inv)
> {
> u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
> u8 prop;
> @@ -298,7 +298,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
> }
>
> if (irq->hw)
> - return its_prop_update_vlpi(irq->host_irq, prop, true);
> + return its_prop_update_vlpi(irq->host_irq, prop, needs_inv);
>
> return 0;
> }
> @@ -1095,7 +1095,7 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
> if (!ite)
> return E_ITS_INV_UNMAPPED_INTERRUPT;
>
> - return update_lpi_config(kvm, ite->irq, NULL);
> + return update_lpi_config(kvm, ite->irq, NULL, true);
> }
>
> /*
> @@ -1130,12 +1130,15 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
> irq = vgic_get_irq(kvm, NULL, intids[i]);
> if (!irq)
> continue;
> - update_lpi_config(kvm, irq, vcpu);
> + update_lpi_config(kvm, irq, vcpu, false);
> vgic_put_irq(kvm, irq);
> }
>
> kfree(intids);
>
> + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
> + its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);

Christoffer Dall

unread,
Aug 28, 2017, 2:30:08 PM8/28/17
to
On Mon, Jul 31, 2017 at 06:26:25PM +0100, Marc Zyngier wrote:
> Upon updating a property, we propagate it all the way to the physical
> ITS, and ask for an INV command to be executed there.
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>

Acked-by: Christoffer Dall <cd...@linaro.org>

> ---
> virt/kvm/arm/vgic/vgic-its.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
> index 65cc77fde609..9e46081e5f15 100644
> --- a/virt/kvm/arm/vgic/vgic-its.c
> +++ b/virt/kvm/arm/vgic/vgic-its.c
> @@ -297,6 +297,9 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
> spin_unlock(&irq->irq_lock);
> }
>
> + if (irq->hw)
> + return its_prop_update_vlpi(irq->host_irq, prop, true);

Christoffer Dall

unread,
Aug 28, 2017, 2:30:09 PM8/28/17
to
On Mon, Jul 31, 2017 at 06:26:20PM +0100, Marc Zyngier wrote:
> If the guest issues an INT command targetting a VLPI, let's
> call into the irq_set_irqchip_state() helper to make it pending
> on the physical side.
>
> This works just as well if userspace decides to inject an interrupt
> using the normal userspace API...
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>

Acked-by: Christoffer Dall <cd...@linaro.org>

> ---
> virt/kvm/arm/vgic/vgic-its.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
> index a2217f53bbe5..40aeadef33fe 100644
> --- a/virt/kvm/arm/vgic/vgic-its.c
> +++ b/virt/kvm/arm/vgic/vgic-its.c
> @@ -573,6 +573,10 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
> if (err)
> return err;
>
> + if (irq->hw)
> + return irq_set_irqchip_state(irq->host_irq,
> + IRQCHIP_STATE_PENDING, true);
> +
> spin_lock(&irq->irq_lock);
> irq->pending_latch = true;
> vgic_queue_irq_unlock(kvm, irq);
> --
> 2.11.0
>

Christoffer Dall

unread,
Aug 28, 2017, 2:40:09 PM8/28/17
to
On Mon, Jul 31, 2017 at 06:26:34PM +0100, Marc Zyngier wrote:
> The GICv4 architecture doesn't prevent CPUs implementing GICv4 to
> cohabit with CPUs limited to GICv3 in the same system.
>
> This is mad (the sheduler would have to be made aware of the v4

*scheduler

> capability), and we're certainly not going to support this any
> time soon. So let's check that all online CPUs are GICv4 capable,
> and disable the functionnality if not.

*functionality

>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
> ---
> include/linux/irqchip/arm-gic-v3.h | 2 ++
> virt/kvm/arm/vgic/vgic-v3.c | 22 +++++++++++++++++++++-
> 2 files changed, 23 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
> index 1ea576c8126f..dfa4a51643d6 100644
> --- a/include/linux/irqchip/arm-gic-v3.h
> +++ b/include/linux/irqchip/arm-gic-v3.h
> @@ -532,6 +532,8 @@
> #define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT)
> #define ICH_VTR_A3V_SHIFT 21
> #define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT)
> +#define ICH_VTR_nV4_SHIFT 20
> +#define ICH_VTR_nV4_MASK (1 << ICH_VTR_nV4_SHIFT)
>
> #define ICC_IAR1_EL1_SPURIOUS 0x3ff
>
> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
> index 405733678c2f..252268e83ade 100644
> --- a/virt/kvm/arm/vgic/vgic-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-v3.c
> @@ -466,6 +466,18 @@ static int __init early_gicv4_enable(char *buf)
> }
> early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
>
> +static void vgic_check_v4_cpuif(void *param)
> +{
> + u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
> + bool *v4 = param, this_cpu_is_v4;
> +
> + this_cpu_is_v4 = !(ich_vtr_el2 & ICH_VTR_nV4_MASK);
> + if (!this_cpu_is_v4)
> + kvm_info("CPU%d is not GICv4 capable\n", smp_processor_id());
> +
> + *v4 &= this_cpu_is_v4;

nit: You could make this function look slightly less scary by declaring
this_cpu_is_v4 on a separate line and not use a bitwise operator on a
boolean type. Actually, having a function called 'check' with a side
effect is not the nicest thing either, so why not just return what this
particular CPU does and do the comparison in the calling loop.

#bikeshedding


Thanks,
-Christoffer



> +}
> +
> /**
> * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
> * @node: pointer to the DT node
> @@ -485,8 +497,16 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
> kvm_vgic_global_state.can_emulate_gicv2 = false;
> kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2;
>
> - /* GICv4 support? */
> + /*
> + * GICv4 support? We need to check on all CPUs in case of some
> + * extremely creative form of big-little brain damage...
> + */
> if (info->has_v4) {
> + int cpu;
> +
> + for_each_online_cpu(cpu)
> + smp_call_function_single(cpu, vgic_check_v4_cpuif,
> + &gicv4_enable, 1);
> kvm_vgic_global_state.has_gicv4 = gicv4_enable;
> kvm_info("GICv4 support %sabled\n",
> gicv4_enable ? "en" : "dis");
> --
> 2.11.0
>

Marc Zyngier

unread,
Aug 30, 2017, 5:50:09 AM8/30/17
to
That's a good question.

If we return -ENOMEM, we'll probably end-up returning an error to
userspace (as a result of the VFIO ioctl), which will in turn probably
terminate the guest (I'm guessing, I haven't actually looked at what
userspace does).

If we don't return the error, then we have a chance to keep the guest
running by sticking to software injection.

I'm not sure what is preferable...

Marc Zyngier

unread,
Aug 30, 2017, 6:00:09 AM8/30/17
to
Hmm. I think that's a debug leftover from my early attempt at making
things work with QEMU, which initializes things in the opposite order
as kvmtool. It should be removed (or replaced by a fat WARN_ON).

>> +
>> + /*
>> + * Before making the VPE resident, make sure the redistributor
>> + * expects us here.
>> + */
>> + if (on) {
>> + int err;
>> +
>> + err = irq_set_affinity(irq, cpumask_of(smp_processor_id()));
>
> This is pretty unintuitive, and coming here without having read your
> documentation may make people completely puzzled. Could we provide a
> pointer to the documentation that explains how the vpe irq hooks this
> all together?

Sure, will do.

>
>> + if (err) {
>> + kvm_err("failed irq_set_affinity IRQ%d (%d)\n", irq, err);
>> + return err;
>> + }
>> + }
>> +
>> + return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, on);
>> +}
>> +
>
> I'd prefer this function be split into two and follow the vgic notation
> of having a flush and a sync function.

Yes, makes sense.

>> static struct vgic_its *vgic_get_its(struct kvm *kvm,
>> struct kvm_kernel_irq_routing_entry *irq_entry)
>> {
>> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
>> index dfac894f6f03..9ab52108989d 100644
>> --- a/virt/kvm/arm/vgic/vgic.c
>> +++ b/virt/kvm/arm/vgic/vgic.c
>> @@ -721,6 +721,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
>> {
>> struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>>
>> + WARN_ON(vgic_v4_schedule(vcpu, false));
>> +
>
> This is in the critical path, so would it be worth considering a static
> key to cater for non-GICv4 systems here?

Hey, for once I wasn't trying to optimize early! ;-) This would be
useful indeed, as I expect GICv4 systems to be the absolute minority for
the foreseeable future.

Auger Eric

unread,
Aug 30, 2017, 6:30:09 AM8/30/17
to
Hi Marc,
I have not read the whole stuff yet but userspace is not aware of this
negotiation. Everything happens under the hood in kernel, see
virt/lib/irqbypass.c __connect(): if add_producer() fails
prod->del_consumer() is called and we should return to the not optimized
injection.

Thanks

Eric

Marc Zyngier

unread,
Aug 30, 2017, 6:30:09 AM8/30/17
to
On 26/08/17 20:48, Christoffer Dall wrote:
This is just wrong. We cannot assume that the vcpu_id has anything to do
with the vpe_idx. It happens to be the same thing now, but the two things
should be clearly disconnected.

I suggest the following (untested):

diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index cf5d6e2de6b8..0146e004401a 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -251,13 +251,27 @@ static void dump_routing(int virq, struct kvm_kernel_irq_routing_entry *irq_entr

}

+static int vgic_v4_vcpu_to_index(struct its_vm *its_vm, struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ for (i = 0; i < its_vm->nr_vpes; i++) {
+ struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+
+ if (its_vm->vpes[i] == vpe)
+ return i;
+ }
+
+ return -ENODEV;
+}
+
int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
struct kvm_kernel_irq_routing_entry *irq_entry)
{
struct vgic_its *its;
struct vgic_irq *irq;
struct its_vlpi_map map;
- int ret;
+ int ret, idx;

dump_routing(virq, irq_entry, true);

@@ -280,6 +294,11 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
if (ret)
goto out;

+ idx = vgic_v4_vcpu_to_index(&kvm->arch.vgic.its_vm,
+ irq->target_vcpu);
+ if (idx < 0)
+ goto out;
+
/*
* Emit the mapping request. If it fails, the ITS probably
* isn't v4 compatible, so let's silently bail out. Holding
@@ -290,7 +309,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
.vm = &kvm->arch.vgic.its_vm,
.vintid = irq->intid,
.db_enabled = true,
- .vpe_idx = irq->target_vcpu->vcpu_id,
+ .vpe_idx = idx,
};

if (its_map_vlpi(virq, &map))

Another option would be to just pass the pointer to the vpe instead,
and let the irq code to figure out the index, which can easily be
derived the doorbells (vpe->vpe_db_lpi - vpe->its_vm->db_lpi_base).

I'll work something out for the next version.

Andrew Jones

unread,
Aug 30, 2017, 6:40:10 AM8/30/17
to
While we do that in kvm_timer_inject_irq_work(), which is scheduled from
the same function that vgic_v4_doorbell_handler() would be enabled from
(kvm_vcpu_block->kvm_arch_vcpu_blocking), just a wake up isn't sufficient
in this case.

>
> Unless the request is there to ensure proper memory barriers around
> setting pending_last?

Right, unlike pending timers, we need the barriers in this handler.
Pending timers are safe because their pending test compares state set by
the VCPU thread itself to state acquired by the VCPU thread reading a host
sysreg itself. IOW, handlers making "set vcpu timer pending requests"
don't need to do anything but wake up the VCPU. Handlers that set some
sort of irq pending state, like vgic_v4_doorbell_handler(), do need to
worry about the visibility of that state though.

>
> In that case, is the read barrier taken care of by prepare_to_swait in
> kvm_vcpu_block()?

Thanks for the bug report :-)

There's a barrier, but it's not properly paired. Currently we have

VCPU handler
---- -------
for (;;) {
WRITE_ONCE(task->state, INTERRUPTIBLE); pending=true;
smp_mb(); smp_wmb(); // kvm_make_request()
if (pending) { WRITE_ONCE(vcpu->state, NORMAL);
... stop waiting ...
}
schedule();
}

Proper barrier use with swait/swake should instead look like this

VCPU handler
---- -------
for (;;) {
WRITE_ONCE(task->state, INTERRUPTIBLE);
smp_mb();
if (READ_ONCE(task->state) == NORMAL) { pending=true;
smp_rmb(); smp_wmb();
if (pending) WRITE_ONCE(vcpu->state, NORMAL);
... stop waiting ...
else
continue;
}
schedule();
}

But checking task state adds complexity and would only cover a small
window anyway (the window between prepare_to_swait() and
kvm_vcpu_check_block()). We need to cover a larger window, for this
particular case it's from kvm_arch_vcpu_blocking() to
kvm_vcpu_check_block(). Better use of VCPU requests is the answer:

VCPU handler
---- -------
kvm_arch_vcpu_blocking();
for (;;) {
prepare_to_swait();
if (test_bit(IRQ_PENDING)) { pending=true;
smp_rmb(); smp_wmb();
if (pending) set_bit(IRQ_PENDING);
... stop waiting ...
else
continue;
}
schedule();
}

The handler side is covered by kvm_make_request() and the vcpu kick, so
this patch looks good. However we need a patch to kvm_arch_vcpu_runnable()
to fix the VCPU side.

int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v))
- && !v->arch.power_off && !v->arch.pause);
+ if (v->arch.power_off || v->arch.pause)
+ return false;
+
+ if (kvm_test_request(KVM_REQ_IRQ_PENDING, vcpu)) {
+ smp_rmb();
+ return true;
+ }
}

We can keep the kvm_vgic_vcpu_pending_irq() check, after the IRQ_PENDING
check, if there are cases where IRQ_PENDING wouldn't be set, but
kvm_vgic_vcpu_pending_irq() would return true. Either way we also get a
bit of an optimization with this fix.

I'll think/test some more, and then send a patch.

Thanks,
drew

Marc Zyngier

unread,
Aug 30, 2017, 6:50:07 AM8/30/17
to
Hi Eric,
Ah, fair enough. I guess del_consumer() does nothing on PCI?

Marc Zyngier

unread,
Aug 30, 2017, 7:40:09 AM8/30/17
to
VFIO shouldn't care. The whole forward/bypass looks pretty stateless,
and VFIO will happily inject the interrupt if it gets remapped, as its
own interrupt handlers are still live.

>> + * - MOVI is translated by an update of the existing mapping, changing
>> + * the target vcpu, resulting in a VMOVI being generated.
>> + * - MOVALL is translated by a string of mapping updates (similar to
>> + * the handling of MOVI). MOVALL is horrible.
>> + *
>> + * Note that a DISCARD/MAPTI sequence emitted from the guest without
>> + * reprogramming the PCI endpoint after MAPTI does not result in a
>> + * VLPI being mapped, as there is no callback from VFIO (the guest
>> + * will get the interrupt via the normal SW injection). Fixing this is
>> + * not trivial, and requires some horrible messing with the VFIO
>> + * internals. Not fun. Don't do that.
>
> Is there not a quick way to check with VFIO or the irq subsystem if this
> interrupt can be forwarded and attempt that when handling the MAPTI in
> the vTIS, or does this break in horrible ways?

The problem we have here is that we need to map a purely virtual
interrupt to a Linux IRQ. VFIO does that job by using the offset of the
guest write into the MSI-X table and finding which MSI descriptor is
associated with this entry, giving us the corresponding interrupt.

We could keep track of the previous mappings we've been given, use that
as a hint for the new mapping, and be able to revert it should the guest
update the MSI on the endpoint. It feels pretty involved for something
that is pretty theoretical right now, but I'm happy to try it...

>> + *
>> + * Then there is the scheduling. Each time a vcpu is about to run on a
>> + * physical CPU, KVM must tell the corresponding redistributor about
>> + * it. And if we've migrated our vcpu from one CPU to another, we must
>> + * tell the ITS (so that the messages reach the right redistributor).
>> + * This is done in two steps: first issue a irq_set_affinity() on the
>> + * irq corresponding to the vcpu, then call its_schedule_vpe(). You
>> + * must be in a non-preemptible context. On exit, another call to
>> + * its_schedule_vpe() tells the redistributor that we're done with the
>> + * vcpu.
>> + *
>> + * Finally, the doorbell handling: Each vcpu is allocated an interrupt
>> + * which will fire each time a VLPI is made pending whilst the vcpu is
>> + * not running. Each time the vcpu gets blocked, the doorbell
>> + * interrupt gets enabled. When the vcpu is unblocked (for whatever
>> + * reason), the doorbell interrupt is disabled. This behaviour is
>> + * pretty similar to that of the backgroud timer.
>
> *background
>
> However, I think you can probably drop the last sentence in interest of
> clarity.

Fair enough.

>
>> + */
>> +
>> static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
>> {
>> struct kvm_vcpu *vcpu = info;
>> --
>> 2.11.0
>>
>
> Thanks for an otherwise excellent (and of course entertaining) writeup!
> -Christoffer
>

Thanks!

Christoffer Dall

unread,
Aug 30, 2017, 7:50:09 AM8/30/17
to
Stupid question: Can we change the struct its_vlpi_map to contain a
vpe pointer or in stead of or in addition to the index?

Alternatively, can you look it up using
vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq() and another 'ioctl' in
irq_set_vcpu_affinity() somehow instead of looping around and testing?
Ah, yeah, so I guess that is an option.

It should be pretty quick to loop over even hundreds of vcpus, but that
kind of thing just always seems wrong, somehow.

Thanks,
-Christoffer

Christoffer Dall

unread,
Aug 30, 2017, 8:00:09 AM8/30/17
to
I understand the problem now, and I think we should leave it alone until
someone comes along and shows us a performance problem with some guest
and driver that does this.


Thanks,
-Christoffer

Christoffer Dall

unread,
Aug 30, 2017, 8:00:09 AM8/30/17
to
Right. For the record, I don't mind getting this functional first, and
adding the static key later, as you prefer.

Thanks,
-Christoffer

Christoffer Dall

unread,
Aug 30, 2017, 8:00:09 AM8/30/17
to
Why don't we need to ensure reads of these flags are observable when
waking up, if the thread waking us up had se the variables prior to
issuing the wake up call?

> +
> + if (kvm_test_request(KVM_REQ_IRQ_PENDING, vcpu)) {
> + smp_rmb();

This looks wrong. What does this barrier ensure exactly? Some kind of
read following whoever called this function?

> + return true;
> + }
> }
>
> We can keep the kvm_vgic_vcpu_pending_irq() check, after the IRQ_PENDING
> check, if there are cases where IRQ_PENDING wouldn't be set, but
> kvm_vgic_vcpu_pending_irq() would return true. Either way we also get a
> bit of an optimization with this fix.
>
> I'll think/test some more, and then send a patch.
>

I'll review it more thoroughly then.

Thanks,
-Christoffer

Andrew Jones

unread,
Aug 30, 2017, 8:30:09 AM8/30/17
to
They're "special". I analyzed the cases for them and I *think* they're
all safe. I can do another round, or perhaps better would be to replace
them in some way with VCPU requests to make the analysis consistent.

>
> > +
> > + if (kvm_test_request(KVM_REQ_IRQ_PENDING, vcpu)) {
> > + smp_rmb();
>
> This looks wrong. What does this barrier ensure exactly? Some kind of
> read following whoever called this function?

Yes, it's similar to how kvm_check_request() is written. Actually, I
should probably just copy that more completely, i.e.

if (kvm_test_request(KVM_REQ_IRQ_PENDING, vcpu)) {
/*
* Ensure the rest of the request is visible to the caller.
* Pairs with the smp_wmb in kvm_make_request.
*/
smp_mb__after_atomic();
return true;
}

>
> > + return true;
> > + }
> > }
> >
> > We can keep the kvm_vgic_vcpu_pending_irq() check, after the IRQ_PENDING
> > check, if there are cases where IRQ_PENDING wouldn't be set, but
> > kvm_vgic_vcpu_pending_irq() would return true. Either way we also get a
> > bit of an optimization with this fix.
> >
> > I'll think/test some more, and then send a patch.
> >
>
> I'll review it more thoroughly then.

Thanks,
drew

Marc Zyngier

unread,
Aug 30, 2017, 9:00:10 AM8/30/17
to
This is obviously the right solution, because the *index* of the VPE
doesn't really matter for a map/unmap (it only matters for doorbell
operations, and that's a very different code path).

I came up with the following (untested, again), which is much more
appealing:

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index b47097a3e4b4..0607541fcafc 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -900,7 +900,7 @@ static void its_send_vmapti(struct its_device *dev, u32 id)
struct its_vlpi_map *map = &dev->event_map.vlpi_maps[id];
struct its_cmd_desc desc;

- desc.its_vmapti_cmd.vpe = map->vm->vpes[map->vpe_idx];
+ desc.its_vmapti_cmd.vpe = map->vpe;
desc.its_vmapti_cmd.dev = dev;
desc.its_vmapti_cmd.virt_id = map->vintid;
desc.its_vmapti_cmd.event_id = id;
@@ -914,7 +914,7 @@ static void its_send_vmovi(struct its_device *dev, u32 id)
struct its_vlpi_map *map = &dev->event_map.vlpi_maps[id];
struct its_cmd_desc desc;

- desc.its_vmovi_cmd.vpe = map->vm->vpes[map->vpe_idx];
+ desc.its_vmovi_cmd.vpe = map->vpe;
desc.its_vmovi_cmd.dev = dev;
desc.its_vmovi_cmd.event_id = id;
desc.its_vmovi_cmd.db_enabled = map->db_enabled;
diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index 52661b838821..58a4d89aa82c 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -62,15 +62,15 @@ struct its_vpe {
* irq_set_vcpu_affinity().
*
* @vm: Pointer to the GICv4 notion of a VM
+ * @vpe: Pointer to the GICv4 notion of a virtual CPU (VPE)
* @vintid: Virtual LPI number
* @db_enabled: Is the VPE doorbell to be generated?
- * @vpe_idx: Index (0-based) of the VPE in this VM. Not the vpe_id!
*/
struct its_vlpi_map {
struct its_vm *vm;
+ struct its_vpe *vpe;
u32 vintid;
bool db_enabled;
- u16 vpe_idx;
};

enum its_vcpu_info_cmd_type {
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index d790d0c74b8b..6ba3d73e0f70 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -715,7 +715,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
if (ret)
return ret;

- map.vpe_idx = vcpu->vcpu_id;
+ map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;

return its_map_vlpi(ite->irq->host_irq, &map);
}
@@ -1184,7 +1184,7 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
struct its_vlpi_map map;

if (!its_get_vlpi(irq->host_irq, &map)) {
- map.vpe_idx = vcpu2->vcpu_id;
+ map.vpe = &vcpu2->arch.vgic_cpu.vgic_v3.its_vpe;
its_map_vlpi(irq->host_irq, &map);
}
}
diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index cf5d6e2de6b8..6ece88322013 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -288,9 +288,9 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
*/
map = (struct its_vlpi_map) {
.vm = &kvm->arch.vgic.its_vm,
+ .vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe,
.vintid = irq->intid,
.db_enabled = true,
- .vpe_idx = irq->target_vcpu->vcpu_id,
};

if (its_map_vlpi(virq, &map))

Maybe I'll introduce a vcpu_to_vpe() helper, but it already looks much
better to me...

Auger Eric

unread,
Aug 30, 2017, 9:00:11 AM8/30/17
to
Hi Marc,
Correct. For vfio-platform, it is switching back to automasked handler
but if you haven't implemented anything specific on vfio side in this
series, it is not even implemented.

Thanks

Eric
>
> Thanks,
>
> M.
>

Marc Zyngier

unread,
Aug 30, 2017, 10:10:10 AM8/30/17
to
On 28/08/17 19:18, Christoffer Dall wrote:
I believe we should be OK here, as we hold the ITS mutex during any
command processing, and both the forward/unforward paths take that same
mutex.

On a slightly different note, it looks like the MOVI code could benefit
from using vgic_its_resolve_lpi(), which has been introduce earlier in
this series.

Marc Zyngier

unread,
Aug 30, 2017, 10:50:10 AM8/30/17
to
On 28/08/17 19:18, Christoffer Dall wrote:
> On Mon, Jul 31, 2017 at 06:26:24PM +0100, Marc Zyngier wrote:
>> The current implementation of MOVALL doesn't allow us to call
>> into the core ITS code as we hold a number of spinlocks.
>>
>> Let's try a method used in other parts of the code, were we copy
>> the intids of the candicate interrupts, and then do whatever
>> we need to do with them outside of the critical section.
>>
>> This allows us to move the interrupts one by one, at the expense
>> of a bit of CPU time. Who cares? MOVALL is such a stupid command
>> anyway...
>>
>> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
>> ---
>> virt/kvm/arm/vgic/vgic-its.c | 27 ++++++++++++++++++++-------
>> 1 file changed, 20 insertions(+), 7 deletions(-)
>>
>> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
>> index 2c065c970ba0..65cc77fde609 100644
>> --- a/virt/kvm/arm/vgic/vgic-its.c
>> +++ b/virt/kvm/arm/vgic/vgic-its.c
>> @@ -1147,11 +1147,12 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
>> static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
>> u64 *its_cmd)
>> {
>> - struct vgic_dist *dist = &kvm->arch.vgic;
>> u32 target1_addr = its_cmd_get_target_addr(its_cmd);
>> u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32);
>> struct kvm_vcpu *vcpu1, *vcpu2;
>> struct vgic_irq *irq;
>> + u32 *intids;
>> + int irq_count, i;
>>
>> if (target1_addr >= atomic_read(&kvm->online_vcpus) ||
>> target2_addr >= atomic_read(&kvm->online_vcpus))
>> @@ -1163,19 +1164,31 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
>> vcpu1 = kvm_get_vcpu(kvm, target1_addr);
>> vcpu2 = kvm_get_vcpu(kvm, target2_addr);
>>
>> - spin_lock(&dist->lpi_list_lock);
>> + irq_count = vgic_copy_lpi_list(vcpu1, &intids);
>> + if (irq_count < 0)
>> + return irq_count;
>>
>> - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
>> - spin_lock(&irq->irq_lock);
>> + for (i = 0; i < irq_count; i++) {
>> + irq = vgic_get_irq(kvm, NULL, intids[i]);
>> + if (!irq)
>> + continue;
>
> Getting irq == NULL means that we've removed this LPI since
> vgic_copy_lpi_list, right? Can this really happen while we hold the its
> mutex?

A disappearing LPI can only be the result of a DISCARD, which cannot
happen, as we indeed hold the ITS lock.

> Also, we don't check this in its_sync_lpi_pending_table which would
> indicate that we either have a bug there or are being overly careful
> here (or should change the continue to BUG).

Let's aim for consistency. I'll drop this test.

>
>
>>
>> if (irq->target_vcpu == vcpu1)
>> irq->target_vcpu = vcpu2;
>>
>> - spin_unlock(&irq->irq_lock);
>
> Is it safe to modify target_vcpu without holding the irq_lock?

Unintentional regression. I'll fix that. But I wonder if there is an
actual point in testing testing the target_vcpu here. Since we hold the
ITS lock, we're damn sure that the affinity can't be changed, right?

Marc Zyngier

unread,
Aug 30, 2017, 11:40:11 AM8/30/17
to
On 28/08/17 19:18, Christoffer Dall wrote:
It could if you only had this patch. The following patch makes sure that
the interrupt does not get enabled at request time, meaning it will only
get enabled when the vcpu will eventually block.

And yes, spurious doorbells are a real thing. And they suck.

>
>> +
>> + return IRQ_HANDLED;
>> +}
>> +
>> int vgic_v4_init(struct kvm *kvm)
>> {
>> struct vgic_dist *dist = &kvm->arch.vgic;
>> @@ -57,16 +70,37 @@ int vgic_v4_init(struct kvm *kvm)
>> return ret;
>> }
>>
>> + kvm_for_each_vcpu(i, vcpu, kvm) {
>> + int irq = dist->its_vm.vpes[i]->irq;
>> +
>> + ret = request_irq(irq, vgic_v4_doorbell_handler,
>> + 0, "vcpu", vcpu);
>> + if (ret) {
>> + kvm_err("failed to allocate vcpu IRQ%d\n", irq);
>> + dist->its_vm.nr_vpes = i;
>
> That's a neat trick for the error handling. Might deserve a tiny
> comment.

Ah, yes, will do.

Marc Zyngier

unread,
Aug 30, 2017, 12:10:11 PM8/30/17
to
Fair enough.

> #bikeshedding

How about this on top:

diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 01eaf4ee2f63..e471750dc0a1 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -469,18 +469,16 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
static void vgic_check_v4_cpuif(void *param)
{
u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
- bool *v4 = param, this_cpu_is_v4;
+ bool *v4 = param;

- this_cpu_is_v4 = !(ich_vtr_el2 & ICH_VTR_nV4_MASK);
- if (!this_cpu_is_v4)
+ *v4 = !(ich_vtr_el2 & ICH_VTR_nV4_MASK);
+ if (!*v4)
kvm_info("CPU%d is not GICv4 capable\n", smp_processor_id());
-
- *v4 &= this_cpu_is_v4;
}

/**
* vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
- * @node: pointer to the DT node
+ * @info: pointer to the firmware-agnostic GIC information
*
* Returns 0 if a GICv3 has been found, returns an error code otherwise
*/
@@ -504,9 +502,14 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
if (info->has_v4) {
int cpu;

- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
+ bool enable;
+
smp_call_function_single(cpu, vgic_check_v4_cpuif,
- &gicv4_enable, 1);
+ &enable, 1);
+ gicv4_enable = gicv4_enable && enable;
+ }
+
kvm_vgic_global_state.has_gicv4 = gicv4_enable;
kvm_info("GICv4 support %sabled\n",
gicv4_enable ? "en" : "dis");

Christoffer Dall

unread,
Aug 30, 2017, 4:00:10 PM8/30/17
to
Yes, indeed. Looks good to me as well.

The only thing that makes me slightly nervous is the use of target_vcpu,
but I think we rely on it never being NULL for LPIs elsewhere in the
code, so we should be fine.

Thanks,
-Christoffer

Christoffer Dall

unread,
Aug 30, 2017, 4:10:09 PM8/30/17
to
On Wed, Aug 30, 2017 at 03:08:01PM +0100, Marc Zyngier wrote:
ok, yes, as long as the only other modifirs are the forward/unforward
paths and they hold the mutex, we should be fine.

>
> On a slightly different note, it looks like the MOVI code could benefit
> from using vgic_its_resolve_lpi(), which has been introduce earlier in
> this series.

Only problem is you'd need to get a handle to the ite as well to change
the collection pointer. I suppose you could add another pointer
pointer.

-Christoffer

Christoffer Dall

unread,
Aug 30, 2017, 4:20:16 PM8/30/17
to
Ah, yes, because you filtered the list on the source VCPU already you
should be able to let go of this check.

Thanks,
-Christoffer

Christoffer Dall

unread,
Aug 30, 2017, 5:00:09 PM8/30/17
to
Ah, my abilities to forward read on a patch series are quite poor.

Thanks,
-Christoffer

Christoffer Dall

unread,
Aug 30, 2017, 5:10:09 PM8/30/17
to
Looks good to me, thanks!
-Christoffer

Marc Zyngier

unread,
Aug 31, 2017, 4:20:08 AM8/31/17
to
Not quite. It indicates that the patch split is a bit wrong, and that

irq_set_status_flags(irq, IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY);

should really be in this patch and not the following one.

I'll fix that as I rebase the whole thing.

Marc Zyngier

unread,
Aug 31, 2017, 6:30:09 AM8/31/17
to
Hmmm. Maybe not. When mapping an LPI, you can assign it to a collection
that is not yet mapped to a redistributor, hence no target_vcpu.

But in this case, vgic_its_resolve_lpi() fails, and we just don't enter
this code path. Annoyingly, this also shows that I do not handle MAPC at
all in this code, which is pretty embarrassing (I rely on MAPC being
done before MAPI/MAPTI).

I'll address that in the next version.

Marc Zyngier

unread,
Aug 31, 2017, 8:20:08 AM8/31/17
to
On 28/08/17 19:18, Christoffer Dall wrote:
> On Fri, Aug 04, 2017 at 08:44:04AM +0100, Marc Zyngier wrote:
>> On 31/07/17 18:26, Marc Zyngier wrote:
>>> When a vPE is not running, a VLPI being made pending results in a
>>> doorbell interrupt being delivered. Let's handle this interrupt
>>> and update the pending_last flag that indicates that VLPIs are
>>> pending. The corresponding vcpu is also kicked into action.
>>>
>>> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
>>> ---
>>> virt/kvm/arm/vgic/vgic-v4.c | 34 ++++++++++++++++++++++++++++++++++
>>> 1 file changed, 34 insertions(+)
>>>
>>> diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
>>> index 534d3051a078..6af3cde6d7d4 100644
>>> --- a/virt/kvm/arm/vgic/vgic-v4.c
>>> +++ b/virt/kvm/arm/vgic/vgic-v4.c
>>> @@ -21,6 +21,19 @@
>>>
>>> #include "vgic.h"
>>>
>>> +static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
>>> +{
>>> + struct kvm_vcpu *vcpu = info;
>>> +
>>> + if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
>>> + vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
>>> + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
>>> + kvm_vcpu_kick(vcpu);
>>> + }
>>
>> This code is so obviously broken that I completely overlooked it.
>>
>> If we have take a doorbell interrupt, then it means nothing was
>> otherwise pending (because we'd have been kicked out of the blocking
>> state, and will have masked the doorbell). So checking for pending
>> interrupts is pointless.
>>
>> Furthermore, calling kvm_vgic_vcpu_pending_irq() takes the ap_list
>> lock. If we take a doorbell interrupt while injecting a virtual
>> interrupt (from userspace, for example) on the same CPU, we end-up
>> in deadlock land. This would be solved by Christoffer's latest
>> crop of timer patches, but there is no point getting there the first
>> place.
>>
>> The patchlet below solves it:
>>
>> diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
>> index 15feb1151797..48e4d6ebeaa8 100644
>> --- a/virt/kvm/arm/vgic/vgic-v4.c
>> +++ b/virt/kvm/arm/vgic/vgic-v4.c
>> @@ -94,11 +94,9 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
>> {
>> struct kvm_vcpu *vcpu = info;
>>
>> - if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
>> - vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
>> - kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
>> - kvm_vcpu_kick(vcpu);
>> - }
>> + vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
>> + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
>> + kvm_vcpu_kick(vcpu);
>
> I don't think you need the request and kick, because if you're getting
> this doorbell, doesn't that also mean that the VCPU is not running in
> the guest and you simply need to make sure the VCPU thread gets
> scheduled again, so you could call kvm_vcpu_wake_up() instead.
>
> Unless the request is there to ensure proper memory barriers around
> setting pending_last?

We definitely need some form of barrier here (the write needs to be
observed on the CPU we're waking up on), and I wanted to model this in a
similar way to our injection path (where we do have the make_request +
kick pattern).

After all, we really have an interrupt pending that deserves attention.
Is there any reason why we'd want to model it differently from the rest
of the injection path?

Christoffer Dall

unread,
Aug 31, 2017, 8:40:08 AM8/31/17
to
Oh right, the local vcpu varible in ...handle_mapi is initialized to
NULL, bummer I missed that.

>
> But in this case, vgic_its_resolve_lpi() fails, and we just don't enter
> this code path. Annoyingly, this also shows that I do not handle MAPC at
> all in this code, which is pretty embarrassing (I rely on MAPC being
> done before MAPI/MAPTI).
>
> I'll address that in the next version.
>

Looking forward to it ;)

-Christoffer

Christoffer Dall

unread,
Aug 31, 2017, 8:50:08 AM8/31/17
to
The difference is that you'll never need this if the VCPU is running,
this is only when the VCPU is on a waitqueue and sleeping, and therefore
you don't actually need a kick, but you just need to wake up the VCPU
thread.

But kvm_vcpu_kick() also wakes up, it's just a question of using the
most appropriate hammer, making it clear what exactly is going on.

But, as Drew pointed out, we use the request framework to ensure proper
barriers (when he fixes it), after waking up a VCPU, so in this cae it's
to make sure the VCPU thread observed pending_last==true after being
woken up.

Interestingly, we don't need that for the timer, since we don't change
random in-memory values, but we read the physical counter which is the
thing that changes when waking up the thread.

So, sorry for a lot of noise, your code is fine, and Drew fixes KVM, I
think.

Thanks,
-Christoffer

Shannon Zhao

unread,
Sep 6, 2017, 5:10:08 AM9/6/17
to


On 2017/8/1 1:26, Marc Zyngier wrote:
> When a vPE is not running, a VLPI being made pending results in a
> doorbell interrupt being delivered. Let's handle this interrupt
> and update the pending_last flag that indicates that VLPIs are
> pending. The corresponding vcpu is also kicked into action.
>
> Signed-off-by: Marc Zyngier <marc.z...@arm.com>
> ---
> virt/kvm/arm/vgic/vgic-v4.c | 34 ++++++++++++++++++++++++++++++++++
> 1 file changed, 34 insertions(+)
>
> diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
> index 534d3051a078..6af3cde6d7d4 100644
> --- a/virt/kvm/arm/vgic/vgic-v4.c
> +++ b/virt/kvm/arm/vgic/vgic-v4.c
> @@ -21,6 +21,19 @@
>
> #include "vgic.h"
>
> +static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
> +{
> + struct kvm_vcpu *vcpu = info;
> +
> + if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
> + vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
> + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
> + kvm_vcpu_kick(vcpu);
> + }
> +
> + return IRQ_HANDLED;
> +}
> +
> int vgic_v4_init(struct kvm *kvm)
> {
> struct vgic_dist *dist = &kvm->arch.vgic;
> @@ -57,16 +70,37 @@ int vgic_v4_init(struct kvm *kvm)
> return ret;
> }
>
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + int irq = dist->its_vm.vpes[i]->irq;
> +
> + ret = request_irq(irq, vgic_v4_doorbell_handler,
> + 0, "vcpu", vcpu);
> + if (ret) {
> + kvm_err("failed to allocate vcpu IRQ%d\n", irq);
> + dist->its_vm.nr_vpes = i;
This overwirtes the nr_vpes while it uses kvm->online_vcpus in
its_alloc_vcpu_irqs to alloc irqs and if this fails it uses the
overwirten nr_vpes other than kvm->online_vcpus in its_free_vcpu_irqs to
free the irqs. So there will be memory leak on error path.

> + break;
> + }
> + }
> +
> + if (ret)
> + vgic_v4_teardown(kvm);
> +
> return ret;
> }
>
> void vgic_v4_teardown(struct kvm *kvm)
> {
> struct its_vm *its_vm = &kvm->arch.vgic.its_vm;
> + int i;
>
> if (!its_vm->vpes)
> return;
>
> + for (i = 0; i < its_vm->nr_vpes; i++) {
> + struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i);
> + free_irq(its_vm->vpes[i]->irq, vcpu);
> + }
> +
> its_free_vcpu_irqs(its_vm);
> kfree(its_vm->vpes);
> its_vm->nr_vpes = 0;
>

Thanks,
--
Shannon
0 new messages