[PATCH V2 2/2] LoongArch: Add AVEC irqchip support

0 views
Skip to first unread message

Tianyang Zhang

unread,
Jul 4, 2024, 10:03:41 PMJul 4
to chenh...@kernel.org, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, zhangt...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Introduce the advanced extended interrupt controllers. This feature will
allow each core to have 256 independent interrupt vectors and MSI
interrupts can be independently routed to any vector on any CPU.

Co-developed-by: Jianmin Lv <lvji...@loongson.cn>
Signed-off-by: Jianmin Lv <lvji...@loongson.cn>
Co-developed-by: Liupu Wang <wang...@loongson.cn>
Signed-off-by: Liupu Wang <wang...@loongson.cn>
Co-developed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Tianyang Zhang <zhangt...@loongson.cn>
---
arch/loongarch/Kconfig | 1 +
arch/loongarch/include/asm/cpu-features.h | 1 +
arch/loongarch/include/asm/cpu.h | 2 +
arch/loongarch/include/asm/hw_irq.h | 10 +
arch/loongarch/include/asm/irq.h | 12 +-
arch/loongarch/include/asm/loongarch.h | 20 +-
arch/loongarch/include/asm/smp.h | 2 +
arch/loongarch/kernel/cpu-probe.c | 3 +-
arch/loongarch/kernel/smp.c | 5 +
drivers/irqchip/Makefile | 2 +-
drivers/irqchip/irq-loongarch-avec.c | 440 ++++++++++++++++++++++
drivers/irqchip/irq-loongarch-cpu.c | 4 +-
drivers/irqchip/irq-loongson-eiointc.c | 3 +
drivers/irqchip/irq-loongson-pch-msi.c | 43 ++-
14 files changed, 538 insertions(+), 10 deletions(-)
create mode 100644 drivers/irqchip/irq-loongarch-avec.c

Changes log:
V0->V1:
1.Modified some formats and declarations
2.Removed kmalloc/kfree when adding affinity related data to pending_list,
and used moving tag to replace the original behavior
3.Adjusted the process that enables AVEC interrupts, now it is at the end of all processes
4.Removed CPUHP related callbacks, now irq_matrix_online/irq_matrix_offline is completed in start_secondary/loongson_cpu_disable
5.Adjusted compatibility issues for CONFIG_ACPI
6.About question:
> irr = csr_read64(LOONGARCH_CSR_IRR0 + vector / 64);
> should be good enough, no?
csr_read64 was built-in as __csrrd_d, it doesn't seem to support variables as parameters
>>>>
drivers/irqchip/irq-loongarch-avec.c: In function ‘complete_irq_moving’:
./arch/loongarch/include/asm/loongarch.h:164:25: error: invalid argument to built-in function
164 | #define csr_read64(reg) __csrrd_d(reg)
| ^~~~~~~~~
drivers/irqchip/irq-loongarch-avec.c:170:23: note: in expansion of macro ‘csr_read64’
170 | irr = csr_read64(LOONGARCH_CSR_IRR_BASE + vector / VECTORS_PER_REG);
| ^~~~~~~~~~
>>>>
So we have temporarily retained the previous implementation.

V1->V2:
Fixed up coding style. Made on/offline functions void
Added compatibility when CONFIG_SMP is turned off

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index e38139c576ee..a66e49b5a68c 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -83,6 +83,7 @@ config LOONGARCH
select GENERIC_ENTRY
select GENERIC_GETTIMEOFDAY
select GENERIC_IOREMAP if !ARCH_IOREMAP
+ select GENERIC_IRQ_MATRIX_ALLOCATOR
select GENERIC_IRQ_MULTI_HANDLER
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h
index 2eafe6a6aca8..16a716f88a5c 100644
--- a/arch/loongarch/include/asm/cpu-features.h
+++ b/arch/loongarch/include/asm/cpu-features.h
@@ -65,5 +65,6 @@
#define cpu_has_guestid cpu_opt(LOONGARCH_CPU_GUESTID)
#define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR)
#define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW)
+#define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT)

#endif /* __ASM_CPU_FEATURES_H */
diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h
index 48b9f7168bcc..843f9c4ec980 100644
--- a/arch/loongarch/include/asm/cpu.h
+++ b/arch/loongarch/include/asm/cpu.h
@@ -99,6 +99,7 @@ enum cpu_type_enum {
#define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */
#define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */
#define CPU_FEATURE_PTW 26 /* CPU has hardware page table walker */
+#define CPU_FEATURE_AVECINT 27 /* CPU has avec interrupt */

#define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG)
#define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM)
@@ -127,5 +128,6 @@ enum cpu_type_enum {
#define LOONGARCH_CPU_GUESTID BIT_ULL(CPU_FEATURE_GUESTID)
#define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR)
#define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW)
+#define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT)

#endif /* _ASM_CPU_H */
diff --git a/arch/loongarch/include/asm/hw_irq.h b/arch/loongarch/include/asm/hw_irq.h
index af4f4e8fbd85..772692e765c0 100644
--- a/arch/loongarch/include/asm/hw_irq.h
+++ b/arch/loongarch/include/asm/hw_irq.h
@@ -9,6 +9,16 @@

extern atomic_t irq_err_count;

+/*
+ * 256 vectors Map:
+ *
+ * 0 - 15: mapping legacy IPs, e.g. IP0-12.
+ * 16 - 255: mapping a vector for external IRQ.
+ *
+ */
+#define NR_VECTORS 256
+#define IRQ_MATRIX_BITS NR_VECTORS
+#define NR_LEGACY_VECTORS 16
/*
* interrupt-retrigger: NOP for now. This may not be appropriate for all
* machines, we'll see ...
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 480418bc5071..cf3b635a9b86 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -65,7 +65,7 @@ extern struct acpi_vector_group msi_group[MAX_IO_PICS];
#define LOONGSON_LPC_LAST_IRQ (LOONGSON_LPC_IRQ_BASE + 15)

#define LOONGSON_CPU_IRQ_BASE 16
-#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 14)
+#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 15)

#define LOONGSON_PCH_IRQ_BASE 64
#define LOONGSON_PCH_ACPI_IRQ (LOONGSON_PCH_IRQ_BASE + 47)
@@ -101,6 +101,16 @@ int pch_msi_acpi_init(struct irq_domain *parent,
struct acpi_madt_msi_pic *acpi_pchmsi);
int pch_pic_acpi_init(struct irq_domain *parent,
struct acpi_madt_bio_pic *acpi_pchpic);
+
+#ifdef CONFIG_ACPI
+int __init pch_msi_acpi_init_v2(struct irq_domain *parent,
+ struct acpi_madt_msi_pic *pch_msi_entry);
+int __init loongarch_avec_acpi_init(struct irq_domain *parent);
+void complete_irq_moving(void);
+void loongarch_avec_offline_cpu(unsigned int cpu);
+void loongarch_avec_online_cpu(unsigned int cpu);
+#endif
+
int find_pch_pic(u32 gsi);
struct fwnode_handle *get_pch_msi_handle(int pci_segment);

diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index eb09adda54b7..16a910359977 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -72,7 +72,6 @@
#define CPUCFG1_RPLV BIT(23)
#define CPUCFG1_HUGEPG BIT(24)
#define CPUCFG1_CRC32 BIT(25)
-#define CPUCFG1_MSGINT BIT(26)

#define LOONGARCH_CPUCFG2 0x2
#define CPUCFG2_FP BIT(0)
@@ -252,8 +251,8 @@
#define CSR_ESTAT_EXC_WIDTH 6
#define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT)
#define CSR_ESTAT_IS_SHIFT 0
-#define CSR_ESTAT_IS_WIDTH 14
-#define CSR_ESTAT_IS (_ULCAST_(0x3fff) << CSR_ESTAT_IS_SHIFT)
+#define CSR_ESTAT_IS_WIDTH 15
+#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT)

#define LOONGARCH_CSR_ERA 0x6 /* ERA */

@@ -999,10 +998,18 @@
#define CSR_FWPC_SKIP_SHIFT 16
#define CSR_FWPC_SKIP (_ULCAST_(1) << CSR_FWPC_SKIP_SHIFT)

+#define LOONGARCH_CSR_IRR0 0xa0
+#define LOONGARCH_CSR_IRR1 0xa1
+#define LOONGARCH_CSR_IRR2 0xa2
+#define LOONGARCH_CSR_IRR3 0xa3
+#define LOONGARCH_CSR_IRR_BASE LOONGARCH_CSR_IRR0
+
+#define LOONGARCH_CSR_ILR 0xa4
+
/*
* CSR_ECFG IM
*/
-#define ECFG0_IM 0x00001fff
+#define ECFG0_IM 0x00005fff
#define ECFGB_SIP0 0
#define ECFGF_SIP0 (_ULCAST_(1) << ECFGB_SIP0)
#define ECFGB_SIP1 1
@@ -1045,6 +1052,7 @@
#define IOCSRF_EIODECODE BIT_ULL(9)
#define IOCSRF_FLATMODE BIT_ULL(10)
#define IOCSRF_VM BIT_ULL(11)
+#define IOCSRF_AVEC BIT_ULL(15)

#define LOONGARCH_IOCSR_VENDOR 0x10

@@ -1055,6 +1063,7 @@
#define LOONGARCH_IOCSR_MISC_FUNC 0x420
#define IOCSR_MISC_FUNC_TIMER_RESET BIT_ULL(21)
#define IOCSR_MISC_FUNC_EXT_IOI_EN BIT_ULL(48)
+#define IOCSR_MISC_FUNC_AVEC_EN BIT_ULL(51)

#define LOONGARCH_IOCSR_CPUTEMP 0x428

@@ -1375,9 +1384,10 @@ __BUILD_CSR_OP(tlbidx)
#define INT_TI 11 /* Timer */
#define INT_IPI 12
#define INT_NMI 13
+#define INT_AVEC 14

/* ExcCodes corresponding to interrupts */
-#define EXCCODE_INT_NUM (INT_NMI + 1)
+#define EXCCODE_INT_NUM (INT_AVEC + 1)
#define EXCCODE_INT_START 64
#define EXCCODE_INT_END (EXCCODE_INT_START + EXCCODE_INT_NUM - 1)

diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index 278700cfee88..2399004596a3 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -69,9 +69,11 @@ extern int __cpu_logical_map[NR_CPUS];
#define ACTION_BOOT_CPU 0
#define ACTION_RESCHEDULE 1
#define ACTION_CALL_FUNCTION 2
+#define ACTION_CLEAR_VECT 3
#define SMP_BOOT_CPU BIT(ACTION_BOOT_CPU)
#define SMP_RESCHEDULE BIT(ACTION_RESCHEDULE)
#define SMP_CALL_FUNCTION BIT(ACTION_CALL_FUNCTION)
+#define SMP_CLEAR_VECT BIT(ACTION_CLEAR_VECT)

struct secondary_data {
unsigned long stack;
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index 55320813ee08..3b2e72e8f9bd 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -106,7 +106,6 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
elf_hwcap |= HWCAP_LOONGARCH_CRC32;
}

-
config = read_cpucfg(LOONGARCH_CPUCFG2);
if (config & CPUCFG2_LAM) {
c->options |= LOONGARCH_CPU_LAM;
@@ -176,6 +175,8 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options |= LOONGARCH_CPU_EIODECODE;
if (config & IOCSRF_VM)
c->options |= LOONGARCH_CPU_HYPERVISOR;
+ if (config & IOCSRF_AVEC)
+ c->options |= LOONGARCH_CPU_AVECINT;

config = csr_read32(LOONGARCH_CSR_ASID);
config = (config & CSR_ASID_BIT) >> CSR_ASID_BIT_SHIFT;
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 0dfe2388ef41..6dfedef306f3 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -234,6 +234,9 @@ static irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
per_cpu(irq_stat, cpu).ipi_irqs[IPI_CALL_FUNCTION]++;
}

+ if (action & SMP_CLEAR_VECT)
+ complete_irq_moving();
+
return IRQ_HANDLED;
}

@@ -388,6 +391,7 @@ int loongson_cpu_disable(void)
irq_migrate_all_off_this_cpu();
clear_csr_ecfg(ECFG0_IM);
local_irq_restore(flags);
+ loongarch_avec_offline_cpu(cpu);
local_flush_tlb_all();

return 0;
@@ -566,6 +570,7 @@ asmlinkage void start_secondary(void)
* early is dangerous.
*/
WARN_ON_ONCE(!irqs_disabled());
+ loongarch_avec_online_cpu(cpu);
loongson_smp_finish();

cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 2df72b7b165b..de091a9f7c88 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -110,7 +110,7 @@ obj-$(CONFIG_LS1X_IRQ) += irq-ls1x.o
obj-$(CONFIG_TI_SCI_INTR_IRQCHIP) += irq-ti-sci-intr.o
obj-$(CONFIG_TI_SCI_INTA_IRQCHIP) += irq-ti-sci-inta.o
obj-$(CONFIG_TI_PRUSS_INTC) += irq-pruss-intc.o
-obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o
+obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o irq-loongarch-avec.o
obj-$(CONFIG_LOONGSON_LIOINTC) += irq-loongson-liointc.o
obj-$(CONFIG_LOONGSON_EIOINTC) += irq-loongson-eiointc.o
obj-$(CONFIG_LOONGSON_HTPIC) += irq-loongson-htpic.o
diff --git a/drivers/irqchip/irq-loongarch-avec.c b/drivers/irqchip/irq-loongarch-avec.c
new file mode 100644
index 000000000000..81dbccbea5a2
--- /dev/null
+++ b/drivers/irqchip/irq-loongarch-avec.c
@@ -0,0 +1,440 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2024 Loongson Technologies, Inc.
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/radix-tree.h>
+#include <linux/spinlock.h>
+
+#include <asm/loongarch.h>
+#include <asm/setup.h>
+
+#define VECTORS_PER_REG 64
+#define ILR_INVALID_MASK 0x80000000UL
+#define ILR_VECTOR_MASK 0xffUL
+#define AVEC_MSG_OFFSET 0x100000
+
+static phys_addr_t msi_base_v2;
+static DEFINE_PER_CPU(struct irq_desc * [NR_VECTORS], irq_map);
+
+#ifdef CONFIG_SMP
+struct pending_list {
+ struct list_head head;
+};
+
+static DEFINE_PER_CPU(struct pending_list, pending_list);
+#endif
+
+struct loongarch_avec_chip {
+ struct fwnode_handle *fwnode;
+ struct irq_domain *domain;
+ struct irq_matrix *vector_matrix;
+ raw_spinlock_t lock;
+};
+
+static struct loongarch_avec_chip loongarch_avec;
+
+struct loongarch_avec_data {
+ struct list_head entry;
+ unsigned int cpu;
+ unsigned int vec;
+ unsigned int prev_cpu;
+ unsigned int prev_vec;
+ unsigned int moving : 1,
+ managed : 1;
+};
+
+static struct cpumask intersect_mask;
+
+static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest,
+ unsigned int *cpu)
+{
+ return irq_matrix_alloc(loongarch_avec.vector_matrix, dest, false, cpu);
+}
+
+static inline void loongarch_avec_ack_irq(struct irq_data *d)
+{
+}
+
+static inline void loongarch_avec_unmask_irq(struct irq_data *d)
+{
+}
+
+static inline void loongarch_avec_mask_irq(struct irq_data *d)
+{
+}
+
+#ifdef CONFIG_SMP
+static inline void pending_list_init(int cpu)
+{
+ struct pending_list *plist = per_cpu_ptr(&pending_list, cpu);
+
+ INIT_LIST_HEAD(&plist->head);
+}
+
+static void loongarch_avec_sync(struct loongarch_avec_data *adata)
+{
+ struct pending_list *plist;
+
+ if (cpu_online(adata->prev_cpu)) {
+ plist = per_cpu_ptr(&pending_list, adata->prev_cpu);
+ list_add_tail(&adata->entry, &plist->head);
+ adata->moving = true;
+ mp_ops.send_ipi_single(adata->prev_cpu, ACTION_CLEAR_VECT);
+ }
+}
+
+static int loongarch_avec_set_affinity(struct irq_data *data, const struct cpumask *dest,
+ bool force)
+{
+ struct loongarch_avec_data *adata;
+ unsigned int cpu, vector;
+ unsigned long flags;
+ int ret;
+
+ raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
+ adata = irq_data_get_irq_chip_data(data);
+
+ if (adata->vec && cpu_online(adata->cpu) && cpumask_test_cpu(adata->cpu, dest)) {
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+ return 0;
+ }
+ if (adata->moving)
+ return -EBUSY;
+
+ cpumask_and(&intersect_mask, dest, cpu_online_mask);
+
+ ret = assign_irq_vector(data, &intersect_mask, &cpu);
+ if (ret < 0) {
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+ return ret;
+ }
+ vector = ret;
+ adata->cpu = cpu;
+ adata->vec = vector;
+ per_cpu_ptr(irq_map, adata->cpu)[adata->vec] = irq_data_to_desc(data);
+ loongarch_avec_sync(adata);
+
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+ irq_data_update_effective_affinity(data, cpumask_of(cpu));
+
+ return IRQ_SET_MASK_OK;
+}
+
+void complete_irq_moving(void)
+{
+ struct pending_list *plist = this_cpu_ptr(&pending_list);
+ struct loongarch_avec_data *adata, *tmp;
+ int cpu, vector, bias;
+ u64 irr;
+
+ raw_spin_lock(&loongarch_avec.lock);
+
+ list_for_each_entry_safe(adata, tmp, &plist->head, entry) {
+ cpu = adata->prev_cpu;
+ vector = adata->prev_vec;
+ bias = vector / VECTORS_PER_REG;
+ switch (bias) {
+ case 0:
+ irr = csr_read64(LOONGARCH_CSR_IRR0);
+ case 1:
+ irr = csr_read64(LOONGARCH_CSR_IRR1);
+ case 2:
+ irr = csr_read64(LOONGARCH_CSR_IRR2);
+ case 3:
+ irr = csr_read64(LOONGARCH_CSR_IRR3);
+ }
+
+ if (irr & (1UL << (vector % VECTORS_PER_REG))) {
+ mp_ops.send_ipi_single(cpu, ACTION_CLEAR_VECT);
+ continue;
+ }
+ list_del(&adata->entry);
+ irq_matrix_free(loongarch_avec.vector_matrix, cpu, vector, adata->managed);
+ this_cpu_write(irq_map[vector], NULL);
+ adata->prev_cpu = adata->cpu;
+ adata->prev_vec = adata->vec;
+ adata->moving = 0;
+ }
+ raw_spin_unlock(&loongarch_avec.lock);
+}
+
+void loongarch_avec_offline_cpu(unsigned int cpu)
+{
+ struct pending_list *plist = per_cpu_ptr(&pending_list, cpu);
+ unsigned long flags;
+
+ if (!loongarch_avec.vector_matrix)
+ return;
+
+ raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
+ if (list_empty(&plist->head))
+ irq_matrix_offline(loongarch_avec.vector_matrix);
+ else
+ pr_warn("cpu %d advanced extioi is busy\n", cpu);
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+}
+
+void loongarch_avec_online_cpu(unsigned int cpu)
+{
+ unsigned long flags;
+
+ if (!loongarch_avec.vector_matrix)
+ return;
+
+ raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
+
+ irq_matrix_online(loongarch_avec.vector_matrix);
+
+ pending_list_init(cpu);
+
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+}
+
+#else
+#define loongarch_avec_set_affinity NULL
+#endif
+
+static void loongarch_avec_compose_msg(struct irq_data *d,
+ struct msi_msg *msg)
+{
+ struct loongarch_avec_data *avec_data;
+
+ avec_data = irq_data_get_irq_chip_data(d);
+
+ msg->address_hi = 0xfd;
+ msg->address_lo = ((avec_data->vec & 0xff) << 4) |
+ ((cpu_logical_map(avec_data->cpu & 0xffff)) << 12);
+ msg->data = 0x0;
+
+}
+
+static struct irq_chip loongarch_avec_controller = {
+ .name = "AVECINTC",
+ .irq_ack = loongarch_avec_ack_irq,
+ .irq_mask = loongarch_avec_mask_irq,
+ .irq_unmask = loongarch_avec_unmask_irq,
+ .irq_set_affinity = loongarch_avec_set_affinity,
+ .irq_compose_msi_msg = loongarch_avec_compose_msg,
+};
+
+static void loongarch_avec_dispatch(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned long vector;
+ struct irq_desc *d;
+
+ chained_irq_enter(chip, desc);
+ vector = csr_read64(LOONGARCH_CSR_ILR);
+ if (vector & ILR_INVALID_MASK)
+ return;
+
+ vector &= ILR_VECTOR_MASK;
+
+ d = this_cpu_read(irq_map[vector]);
+ if (d) {
+ generic_handle_irq_desc(d);
+ } else {
+ pr_warn("IRQ ERROR:Unexpected irq occur on cpu %d[vector %ld]\n",
+ smp_processor_id(), vector);
+ }
+
+ chained_irq_exit(chip, desc);
+}
+
+static int loongarch_avec_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct loongarch_avec_data *adata;
+ struct irq_data *irqd;
+ unsigned int cpu, vector, i, ret;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
+ for (i = 0; i < nr_irqs; i++) {
+ irqd = irq_domain_get_irq_data(domain, virq + i);
+ adata = kzalloc(sizeof(*adata), GFP_KERNEL);
+ if (!adata) {
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+ return -ENOMEM;
+ }
+ ret = assign_irq_vector(irqd, cpu_online_mask, &cpu);
+ if (ret < 0) {
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+ return ret;
+ }
+ vector = ret;
+ adata->prev_cpu = adata->cpu = cpu;
+ adata->prev_vec = adata->vec = vector;
+ adata->managed = irqd_affinity_is_managed(irqd);
+ irq_domain_set_info(domain, virq + i, virq + i, &loongarch_avec_controller,
+ adata, handle_edge_irq, NULL, NULL);
+ adata->moving = 0;
+ irqd_set_single_target(irqd);
+ irqd_set_affinity_on_activate(irqd);
+
+ per_cpu_ptr(irq_map, adata->cpu)[adata->vec] = irq_data_to_desc(irqd);
+ }
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+
+ return 0;
+}
+
+static void clear_free_vector(struct irq_data *irqd)
+{
+ struct loongarch_avec_data *adata = irq_data_get_irq_chip_data(irqd);
+ bool managed = irqd_affinity_is_managed(irqd);
+
+ per_cpu(irq_map, adata->cpu)[adata->vec] = NULL;
+ irq_matrix_free(loongarch_avec.vector_matrix, adata->cpu, adata->vec, managed);
+ adata->cpu = 0;
+ adata->vec = 0;
+#ifdef CONFIG_SMP
+ if (!adata->moving)
+ return;
+
+ per_cpu(irq_map, adata->prev_cpu)[adata->prev_vec] = 0;
+ irq_matrix_free(loongarch_avec.vector_matrix, adata->prev_cpu,
+ adata->prev_vec, adata->managed);
+ adata->prev_vec = 0;
+ adata->prev_cpu = 0;
+ adata->moving = 0;
+ list_del_init(&adata->entry);
+#endif
+}
+
+static void loongarch_avec_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d;
+ unsigned long flags;
+ unsigned int i;
+
+ raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
+ for (i = 0; i < nr_irqs; i++) {
+ d = irq_domain_get_irq_data(domain, virq + i);
+ if (d) {
+ clear_free_vector(d);
+ irq_domain_reset_irq_data(d);
+
+ }
+ }
+
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+}
+
+static const struct irq_domain_ops loongarch_avec_domain_ops = {
+ .alloc = loongarch_avec_alloc,
+ .free = loongarch_avec_free,
+};
+
+static int __init irq_matrix_init(void)
+{
+ int i;
+
+ loongarch_avec.vector_matrix = irq_alloc_matrix(NR_VECTORS, 0, NR_VECTORS - 1);
+ if (!loongarch_avec.vector_matrix)
+ return -ENOMEM;
+ for (i = 0; i < NR_LEGACY_VECTORS; i++)
+ irq_matrix_assign_system(loongarch_avec.vector_matrix, i, false);
+
+ irq_matrix_online(loongarch_avec.vector_matrix);
+
+ return 0;
+}
+
+static int __init loongarch_avec_init(struct irq_domain *parent)
+{
+ int ret = 0, parent_irq;
+ unsigned long tmp;
+
+ raw_spin_lock_init(&loongarch_avec.lock);
+
+ loongarch_avec.fwnode = irq_domain_alloc_named_fwnode("CORE_AVEC");
+ if (!loongarch_avec.fwnode) {
+ pr_err("Unable to allocate domain handle\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ loongarch_avec.domain = irq_domain_create_tree(loongarch_avec.fwnode,
+ &loongarch_avec_domain_ops, NULL);
+ if (!loongarch_avec.domain) {
+ pr_err("core-vec: cannot create IRQ domain\n");
+ ret = -ENOMEM;
+ goto out_free_handle;
+ }
+
+ parent_irq = irq_create_mapping(parent, INT_AVEC);
+ if (!parent_irq) {
+ pr_err("Failed to mapping hwirq\n");
+ ret = -EINVAL;
+ goto out_remove_domain;
+ }
+ irq_set_chained_handler_and_data(parent_irq, loongarch_avec_dispatch, NULL);
+
+ ret = irq_matrix_init();
+ if (ret) {
+ pr_err("Failed to init irq matrix\n");
+ goto out_free_matrix;
+ }
+#ifdef CONFIG_SMP
+ pending_list_init(0);
+#endif
+ tmp = iocsr_read64(LOONGARCH_IOCSR_MISC_FUNC);
+ tmp |= IOCSR_MISC_FUNC_AVEC_EN;
+ iocsr_write64(tmp, LOONGARCH_IOCSR_MISC_FUNC);
+
+ return ret;
+
+out_free_matrix:
+ kfree(loongarch_avec.vector_matrix);
+out_remove_domain:
+ irq_domain_remove(loongarch_avec.domain);
+out_free_handle:
+ irq_domain_free_fwnode(loongarch_avec.fwnode);
+out:
+ return ret;
+}
+
+static int __init pch_msi_parse_madt(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_madt_msi_pic *pchmsi_entry = (struct acpi_madt_msi_pic *)header;
+
+ msi_base_v2 = pchmsi_entry->msg_address - AVEC_MSG_OFFSET;
+ return pch_msi_acpi_init_v2(loongarch_avec.domain, pchmsi_entry);
+}
+
+static inline int __init acpi_cascade_irqdomain_init(void)
+{
+ return acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 1);
+}
+
+int __init loongarch_avec_acpi_init(struct irq_domain *parent)
+{
+ int ret = 0;
+
+ ret = loongarch_avec_init(parent);
+ if (ret) {
+ pr_err("Failed to init irq domain\n");
+ return ret;
+ }
+
+ ret = acpi_cascade_irqdomain_init();
+ if (ret) {
+ pr_err("Failed to cascade IRQ domain\n");
+ return ret;
+ }
+
+ return ret;
+}
diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c
index 9d8f2c406043..1ecac59925c6 100644
--- a/drivers/irqchip/irq-loongarch-cpu.c
+++ b/drivers/irqchip/irq-loongarch-cpu.c
@@ -138,7 +138,9 @@ static int __init acpi_cascade_irqdomain_init(void)
if (r < 0)
return r;

- return 0;
+ if (cpu_has_avecint)
+ r = loongarch_avec_acpi_init(irq_domain);
+ return r;
}

static int __init cpuintc_acpi_init(union acpi_subtable_headers *header,
diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c
index c7ddebf312ad..1f9a30488137 100644
--- a/drivers/irqchip/irq-loongson-eiointc.c
+++ b/drivers/irqchip/irq-loongson-eiointc.c
@@ -359,6 +359,9 @@ static int __init acpi_cascade_irqdomain_init(void)
if (r < 0)
return r;

+ if (cpu_has_avecint)
+ return 0;
+
r = acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 1);
if (r < 0)
return r;
diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c
index dd4d699170f4..1926857f9a41 100644
--- a/drivers/irqchip/irq-loongson-pch-msi.c
+++ b/drivers/irqchip/irq-loongson-pch-msi.c
@@ -16,7 +16,6 @@
#include <linux/slab.h>

static int nr_pics;
-
struct pch_msi_data {
struct mutex msi_map_lock;
phys_addr_t doorbell;
@@ -100,6 +99,17 @@ static struct irq_chip middle_irq_chip = {
.irq_compose_msi_msg = pch_msi_compose_msi_msg,
};

+static struct irq_chip pch_msi_irq_chip_v2 = {
+ .name = "MSI",
+ .irq_ack = irq_chip_ack_parent,
+};
+
+static struct msi_domain_info pch_msi_domain_info_v2 = {
+ .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
+ .chip = &pch_msi_irq_chip_v2,
+};
+
static int pch_msi_parent_domain_alloc(struct irq_domain *domain,
unsigned int virq, int hwirq)
{
@@ -268,6 +278,9 @@ struct fwnode_handle *get_pch_msi_handle(int pci_segment)
{
int i;

+ if (cpu_has_avecint)
+ return pch_msi_handle[0];
+
for (i = 0; i < MAX_IO_PICS; i++) {
if (msi_group[i].pci_segment == pci_segment)
return pch_msi_handle[i];
@@ -289,4 +302,32 @@ int __init pch_msi_acpi_init(struct irq_domain *parent,

return ret;
}
+
+int __init pch_msi_acpi_init_v2(struct irq_domain *parent,
+ struct acpi_madt_msi_pic *msi_entry)
+{
+ struct irq_domain *msi_domain;
+
+ if (pch_msi_handle[0])
+ return 0;
+
+ pch_msi_handle[0] = irq_domain_alloc_named_fwnode("msipic-v2");
+ if (!pch_msi_handle[0]) {
+ pr_err("Unable to allocate domain handle\n");
+ kfree(pch_msi_handle[0]);
+ return -ENOMEM;
+ }
+
+ msi_domain = pci_msi_create_irq_domain(pch_msi_handle[0],
+ &pch_msi_domain_info_v2,
+ parent);
+ if (!msi_domain) {
+ pr_err("Failed to create PCI MSI domain\n");
+ kfree(pch_msi_handle[0]);
+ return -ENOMEM;
+ }
+
+ pr_info("IRQ domain MSIPIC-V2 init done.\n");
+ return 0;
+}
#endif
--
2.20.1


Huacai Chen

unread,
Jul 5, 2024, 3:08:41 AMJul 5
to Tianyang Zhang, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Hi, Tianyang,

You can squash two patches into one, as Song does:
https://lore.kernel.org/lkml/20240624084410.1...@loongson.cn/
In handle_cpu_irq() we handle CPUINTC irqs from low to high, which
means when we handle AVECINTC (IP13), IP0-12 have been handled
already, so should we really mapping IP012 to AVECINTC?

> + *
> + */
> +#define NR_VECTORS 256
> +#define IRQ_MATRIX_BITS NR_VECTORS
> +#define NR_LEGACY_VECTORS 16
Should we define them in irq.h and update NR_IRQS there?

> /*
> * interrupt-retrigger: NOP for now. This may not be appropriate for all
> * machines, we'll see ...
> diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
> index 480418bc5071..cf3b635a9b86 100644
> --- a/arch/loongarch/include/asm/irq.h
> +++ b/arch/loongarch/include/asm/irq.h
> @@ -65,7 +65,7 @@ extern struct acpi_vector_group msi_group[MAX_IO_PICS];
> #define LOONGSON_LPC_LAST_IRQ (LOONGSON_LPC_IRQ_BASE + 15)
>
> #define LOONGSON_CPU_IRQ_BASE 16
> -#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 14)
> +#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 15)
>
> #define LOONGSON_PCH_IRQ_BASE 64
> #define LOONGSON_PCH_ACPI_IRQ (LOONGSON_PCH_IRQ_BASE + 47)
> @@ -101,6 +101,16 @@ int pch_msi_acpi_init(struct irq_domain *parent,
> struct acpi_madt_msi_pic *acpi_pchmsi);
> int pch_pic_acpi_init(struct irq_domain *parent,
> struct acpi_madt_bio_pic *acpi_pchpic);
> +
> +#ifdef CONFIG_ACPI
No #ifdef needed.

> +int __init pch_msi_acpi_init_v2(struct irq_domain *parent,
> + struct acpi_madt_msi_pic *pch_msi_entry);
No line break needed.
What is ILR stand for?
SMP_CLEAR_VECT trigger complete_irq_moving(), but in
complete_irq_moving() it calls mp_ops.send_ipi_single(cpu,
ACTION_CLEAR_VECT), which will trigger loongson_ipi_interrupt() again.
I'm a bit confused here. Could you please explain the design goal of
SMP_CLEAR_VECT here?
Empty function is prefered here.


Huacai

Tianyang Zhang

unread,
Jul 5, 2024, 10:45:46 PMJul 5
to cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, chenh...@kernel.org, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, zhangt...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Introduce the advanced extended interrupt controllers. This feature will
allow each core to have 256 independent interrupt vectors and MSI
interrupts can be independently routed to any vector on any CPU.

Co-developed-by: Jianmin Lv <lvji...@loongson.cn>
Signed-off-by: Jianmin Lv <lvji...@loongson.cn>
Co-developed-by: Liupu Wang <wang...@loongson.cn>
Signed-off-by: Liupu Wang <wang...@loongson.cn>
Co-developed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Tianyang Zhang <zhangt...@loongson.cn>
---
.../arch/loongarch/irq-chip-model.rst | 33 ++
.../zh_CN/arch/loongarch/irq-chip-model.rst | 37 +-
arch/loongarch/Kconfig | 1 +
arch/loongarch/include/asm/cpu-features.h | 1 +
arch/loongarch/include/asm/cpu.h | 2 +
arch/loongarch/include/asm/hw_irq.h | 10 +
arch/loongarch/include/asm/irq.h | 12 +-
arch/loongarch/include/asm/loongarch.h | 20 +-
arch/loongarch/include/asm/smp.h | 2 +
arch/loongarch/kernel/cpu-probe.c | 3 +-
arch/loongarch/kernel/smp.c | 5 +
drivers/irqchip/Makefile | 2 +-
drivers/irqchip/irq-loongarch-avec.c | 440 ++++++++++++++++++
drivers/irqchip/irq-loongarch-cpu.c | 4 +-
drivers/irqchip/irq-loongson-eiointc.c | 3 +
drivers/irqchip/irq-loongson-pch-msi.c | 43 +-
16 files changed, 605 insertions(+), 13 deletions(-)
create mode 100644 drivers/irqchip/irq-loongarch-avec.c

Changes log:
V0->V1:
1.Modified some formats and declarations
2.Removed kmalloc/kfree when adding affinity related data to pending_list,
and used moving tag to replace the original behavior
3.Adjusted the process that enables AVEC interrupts, now it is at the end of all processes
4.Removed CPUHP related callbacks, now irq_matrix_online/irq_matrix_offline is completed in start_secondary/loongson_cpu_disable
5.Adjusted compatibility issues for CONFIG_ACPI
6.About question:
> irr = csr_read64(LOONGARCH_CSR_IRR0 + vector / 64);
> should be good enough, no?
csr_read64 was built-in as __csrrd_d, it doesn't seem to support variables as parameters
>>>>
drivers/irqchip/irq-loongarch-avec.c: In function ‘complete_irq_moving’:
./arch/loongarch/include/asm/loongarch.h:164:25: error: invalid argument to built-in function
164 | #define csr_read64(reg) __csrrd_d(reg)
| ^~~~~~~~~
drivers/irqchip/irq-loongarch-avec.c:170:23: note: in expansion of macro ‘csr_read64’
170 | irr = csr_read64(LOONGARCH_CSR_IRR_BASE + vector / VECTORS_PER_REG);
| ^~~~~~~~~~
>>>>
So we have temporarily retained the previous implementation.

V1->V2:
Fixed up coding style. Made on/offline functions void
Added compatibility when CONFIG_SMP is turned off

V2->V3:
Squash two patches into one

diff --git a/Documentation/arch/loongarch/irq-chip-model.rst b/Documentation/arch/loongarch/irq-chip-model.rst
index 7988f4192363..a90c78c8e5bb 100644
--- a/Documentation/arch/loongarch/irq-chip-model.rst
+++ b/Documentation/arch/loongarch/irq-chip-model.rst
@@ -85,6 +85,39 @@ to CPUINTC directly::
| Devices |
+---------+

+Advanced Extended IRQ model
+===========================
+
+In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt go
+to CPUINTC directly, CPU UARTS interrupts go to LIOINTC, PCH-MSI interrupts go to AVEC,
+and then go to CPUINTC, Other devices interrupts go to PCH-PIC/PCH-LPC and gathered
+by EIOINTC, and then go to CPUINTC directly::
+
+ +-----+ +--------------------------+ +-------+
+ | IPI | --> | CPUINTC | <-- | Timer |
+ +-----+ +--------------------------+ +-------+
+ ^ ^ ^
+ | | |
+ +--------+ +---------+ +---------+ +-------+
+ | AVEC | | EIOINTC | | LIOINTC | <-- | UARTs |
+ +--------+ +---------+ +---------+ +-------+
+ ^ ^
+ | |
+ +---------+ +---------+
+ | PCH-MSI | | PCH-PIC |
+ +---------+ +---------+
+ ^ ^ ^
+ | | |
+ +---------+ +---------+ +---------+
+ | Devices | | PCH-LPC | | Devices |
+ +---------+ +---------+ +---------+
+ ^
+ |
+ +---------+
+ | Devices |
+ +---------+
+
+
ACPI-related definitions
========================

diff --git a/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst b/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst
index f1e9ab18206c..b54567380c90 100644
--- a/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst
+++ b/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst
@@ -9,9 +9,8 @@
LoongArch的IRQ芯片模型(层级关系)
==================================

-目前,基于LoongArch的处理器(如龙芯3A5000)只能与LS7A芯片组配合工作。LoongArch计算机
-中的中断控制器(即IRQ芯片)包括CPUINTC(CPU Core Interrupt Controller)、LIOINTC(
-Legacy I/O Interrupt Controller)、EIOINTC(Extended I/O Interrupt Controller)、
+LoongArch计算机中的中断控制器(即IRQ芯片)包括CPUINTC(CPU Core Interrupt Controller)、
+LIOINTC(Legacy I/O Interrupt Controller)、EIOINTC(Extended I/O Interrupt Controller)、
HTVECINTC(Hyper-Transport Vector Interrupt Controller)、PCH-PIC(LS7A芯片组的主中
断控制器)、PCH-LPC(LS7A芯片组的LPC中断控制器)和PCH-MSI(MSI中断控制器)。

@@ -87,6 +86,38 @@ PCH-LPC/PCH-MSI,然后被EIOINTC统一收集,再直接到达CPUINTC::
| Devices |
+---------+

+高级扩展IRQ模型
+=======================
+
+在这种模型里面,IPI(Inter-Processor Interrupt)和CPU本地时钟中断直接发送到CPUINTC,
+CPU串口(UARTs)中断发送到LIOINTC,PCH-MSI中断发送到AVEC,而后通过AVEC送达CPUINTC,而
+其他所有设备的中断则分别发送到所连接的PCH-PIC/PCH-LPC,然后由EIOINTC统一收集,再直
+接到达CPUINTC::
+
+ +-----+ +--------------------------+ +-------+
+ | IPI | --> | CPUINTC | <-- | Timer |
+ +-----+ +--------------------------+ +-------+
+ ^ ^ ^
+ | | |
+ +--------+ +---------+ +---------+ +-------+
+ | AVEC | | EIOINTC | | LIOINTC | <-- | UARTs |
+ +--------+ +---------+ +---------+ +-------+
+ ^ ^
+ | |
+ +---------+ +-------------+
+ | PCH-MSI | | PCH-PIC |
+ +---------+ +-------------+
+ ^ ^ ^
+ | | |
+ +---------+ +---------+ +---------+
+ | Devices | | PCH-LPC | | Devices |
+ +---------+ +---------+ +---------+
+ ^
+ |
+ +---------+
+ | Devices |
+ +---------+
+
ACPI相关的定义
==============
+ *
+ */
+#define NR_VECTORS 256
+#define IRQ_MATRIX_BITS NR_VECTORS
+#define NR_LEGACY_VECTORS 16
/*
* interrupt-retrigger: NOP for now. This may not be appropriate for all
* machines, we'll see ...
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 480418bc5071..cf3b635a9b86 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -65,7 +65,7 @@ extern struct acpi_vector_group msi_group[MAX_IO_PICS];
#define LOONGSON_LPC_LAST_IRQ (LOONGSON_LPC_IRQ_BASE + 15)

#define LOONGSON_CPU_IRQ_BASE 16
-#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 14)
+#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 15)

#define LOONGSON_PCH_IRQ_BASE 64
#define LOONGSON_PCH_ACPI_IRQ (LOONGSON_PCH_IRQ_BASE + 47)
@@ -101,6 +101,16 @@ int pch_msi_acpi_init(struct irq_domain *parent,
struct acpi_madt_msi_pic *acpi_pchmsi);
int pch_pic_acpi_init(struct irq_domain *parent,
struct acpi_madt_bio_pic *acpi_pchpic);
+
+#ifdef CONFIG_ACPI
+int __init pch_msi_acpi_init_v2(struct irq_domain *parent,
+ struct acpi_madt_msi_pic *pch_msi_entry);

Tianyang Zhang

unread,
Jul 5, 2024, 10:46:34 PMJul 5
to Huacai Chen, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org

在 2024/7/5 下午2:58, Huacai Chen 写道:
> Hi, Tianyang,
>
> You can squash two patches into one, as Song does:
> https://lore.kernel.org/lkml/20240624084410.1...@loongson.cn/
Ok. I have resend the patch with two-in-one mode

Huacai Chen

unread,
Jul 6, 2024, 12:51:09 AMJul 6
to Tianyang Zhang, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
On Sat, Jul 6, 2024 at 10:46 AM Tianyang Zhang
<zhangt...@loongson.cn> wrote:
>
>
> 在 2024/7/5 下午2:58, Huacai Chen 写道:
> > Hi, Tianyang,
> >
> > You can squash two patches into one, as Song does:
> > https://lore.kernel.org/lkml/20240624084410.1...@loongson.cn/
> Ok. I have resend the patch with two-in-one mode
I ask several questions but you only take this one.

Huacai

Huacai Chen

unread,
Jul 6, 2024, 6:22:24 AMJul 6
to Tianyang Zhang, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Hi, Tianyang,

Please ask my questions in V2.

Huacai

Tianyang Zhang

unread,
Jul 7, 2024, 10:17:19 PMJul 7
to Huacai Chen, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org

在 2024/7/6 下午12:50, Huacai Chen 写道:
> On Sat, Jul 6, 2024 at 10:46 AM Tianyang Zhang
> <zhangt...@loongson.cn> wrote:
>>
>> 在 2024/7/5 下午2:58, Huacai Chen 写道:
>>> Hi, Tianyang,
>>>
>>> You can squash two patches into one, as Song does:
>>> https://lore.kernel.org/lkml/20240624084410.1...@loongson.cn/
>> Ok. I have resend the patch with two-in-one mode
> I ask several questions but you only take this one.
>
> Huacai

Hi ,Huacai

Sorry for my careless first.
In fact, retaining 0-15 vector is for compatibility with future
extensions, and currently it has not been done in practice.

The AVEC interrupt controller can completely replace the current core-ip
0-12 interrupt. When we implement the

function described above, core-ip 0-12 will no longer have its original
function

Of course, this is a long-term plan
>>>
>>>> + *
>>>> + */
>>>> +#define NR_VECTORS 256
>>>> +#define IRQ_MATRIX_BITS NR_VECTORS
>>>> +#define NR_LEGACY_VECTORS 16
>>> Should we define them in irq.h and update NR_IRQS there?]
Yes, you are right, this may indeed cause potential problems
>>>
>>>> /*
>>>> * interrupt-retrigger: NOP for now. This may not be appropriate for all
>>>> * machines, we'll see ...
>>>> diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
>>>> index 480418bc5071..cf3b635a9b86 100644
>>>> --- a/arch/loongarch/include/asm/irq.h
>>>> +++ b/arch/loongarch/include/asm/irq.h
>>>> @@ -65,7 +65,7 @@ extern struct acpi_vector_group msi_group[MAX_IO_PICS];
>>>> #define LOONGSON_LPC_LAST_IRQ (LOONGSON_LPC_IRQ_BASE + 15)
>>>>
>>>> #define LOONGSON_CPU_IRQ_BASE 16
>>>> -#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 14)
>>>> +#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 15)
>>>>
>>>> #define LOONGSON_PCH_IRQ_BASE 64
>>>> #define LOONGSON_PCH_ACPI_IRQ (LOONGSON_PCH_IRQ_BASE + 47)
>>>> @@ -101,6 +101,16 @@ int pch_msi_acpi_init(struct irq_domain *parent,
>>>> struct acpi_madt_msi_pic *acpi_pchmsi);
>>>> int pch_pic_acpi_init(struct irq_domain *parent,
>>>> struct acpi_madt_bio_pic *acpi_pchpic);
>>>> +
>>>> +#ifdef CONFIG_ACPI
>>> No #ifdef needed.
ok , I adjust here next patch
>>>
>>>> +int __init pch_msi_acpi_init_v2(struct irq_domain *parent,
>>>> + struct acpi_madt_msi_pic *pch_msi_entry);
>>> No line break needed.
ok
Interrupt Load Register, ILR
SMP_CLEAR_VECT is used to release the affinity of interrupts on old CPUs

Due to the possibility of certain interrupts occurring during interrupt
affinity modification,

which can result in the original CPU receiving this interrupt vector, it
is necessary to

determine whether the interrupt has been processed through the original CPU.

When the original CPU discovers that an interrupt that needs to clear
affinity has not

been processed locally in the SMP_CLEAR_VECT process, it will suspend
SMP_CLEAR_VECT

again and end the process, allowing the avec interrupt that has already
been routed

to the current CPU to be processed.

Then, it will enter the SMP_CLEAR_VECT process again for judgment
ok ,I got it

Thanks

Tianyang

Huacai Chen

unread,
Jul 8, 2024, 4:24:51 AMJul 8
to Tianyang Zhang, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
On Mon, Jul 8, 2024 at 10:17 AM Tianyang Zhang
According to the user manual, IRR here should be ISR, and ILR here
should be IRR.
When setting affinity from CPU A to CPU B, trigger SMP_CLEAR_VECT on
A, A calls complete_irq_moving() to see if the irq is processed. If
not, it triggers SMP_CLEAR_VECT again and again, until B has processed
this irq.
Am I right (correct me if not)?

Huacai

Tianyang Zhang

unread,
Jul 8, 2024, 6:08:25 AMJul 8
to Huacai Chen, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Hi, Huacai
Perhaps my manual version is too old, I will confirm after updating it
Releasing affinity  of interrupt in the old CPU does not depend on the
state of the new CPU.

It only determines whether the interrupt has been processed on old CPU. 
This is to avoid

software cleaning data that may cause kernel crashes before interrupts 
generated at

critical times are properly processed

So I think a description should be as follows:

When setting affinity from CPU A to CPU B, trigger SMP_CLEAR_VECT on
A, A calls complete_irq_moving() to see if the irq has been processed on CPU A.
If not, CPU A self-retriggers SMP_CLEAR_VECT again and again, until A has processed
all this irq.

Tianyang

Huacai Chen

unread,
Jul 8, 2024, 6:21:03 AMJul 8
to Tianyang Zhang, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
But CPU A is running complete_irq_moving() again and again, how does
it process the pending irq?

Huacai

Tianyang Zhang

unread,
Jul 8, 2024, 9:42:18 PMJul 8
to Huacai Chen, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Hi, Huacai

in driver/irqchip/irq-loongarch-cpu.c:

static void handle_cpu_irq(struct pt_regs *regs)
{
        int hwirq;
        unsigned int estat = read_csr_estat() & CSR_ESTAT_IS;

        while ((hwirq = ffs(estat))) {
                estat &= ~BIT(hwirq - 1);

--------------------------------------------------

Every time we enter the CPU interrupt, we will save the value of
CSR_ESTAT_IS to estat,

and then process the interrupt indicated by the corresponding bit one by
one.

 In our design, avec is routed to core ip-14, so when ipi exits, the
avec interrupt will

be executed and the corresponding register flag bits will be cleared


Tianyang

                generic_handle_domain_irq(irq_domain, hwirq - 1);

Huacai Chen

unread,
Jul 8, 2024, 10:42:55 PMJul 8
to Tianyang Zhang, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
OK, I got it, now you can fix other issues and then submit V4.

Huacai

Tianyang Zhang

unread,
Jul 8, 2024, 11:36:08 PMJul 8
to Huacai Chen, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
OK, I will complete it as soon as possible.

Thank you for your feedback and guidance

Tianyang

Tianyang Zhang

unread,
Jul 9, 2024, 11:15:14 PMJul 9
to cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, chenh...@kernel.org, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, zhangt...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Introduce the advanced extended interrupt controllers. This feature will
allow each core to have 256 independent interrupt vectors and MSI
interrupts can be independently routed to any vector on any CPU.

Co-developed-by: Jianmin Lv <lvji...@loongson.cn>
Signed-off-by: Jianmin Lv <lvji...@loongson.cn>
Co-developed-by: Liupu Wang <wang...@loongson.cn>
Signed-off-by: Liupu Wang <wang...@loongson.cn>
Co-developed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Tianyang Zhang <zhangt...@loongson.cn>
---
.../arch/loongarch/irq-chip-model.rst | 33 ++
.../zh_CN/arch/loongarch/irq-chip-model.rst | 37 +-
arch/loongarch/Kconfig | 1 +
arch/loongarch/include/asm/cpu-features.h | 1 +
arch/loongarch/include/asm/cpu.h | 2 +
arch/loongarch/include/asm/irq.h | 23 +-
arch/loongarch/include/asm/loongarch.h | 19 +-
arch/loongarch/include/asm/smp.h | 2 +
arch/loongarch/kernel/cpu-probe.c | 3 +-
arch/loongarch/kernel/smp.c | 5 +
drivers/irqchip/Makefile | 2 +-
drivers/irqchip/irq-loongarch-avec.c | 433 ++++++++++++++++++
drivers/irqchip/irq-loongarch-cpu.c | 4 +-
drivers/irqchip/irq-loongson-eiointc.c | 3 +
drivers/irqchip/irq-loongson-pch-msi.c | 42 +-
15 files changed, 596 insertions(+), 14 deletions(-)
create mode 100644 drivers/irqchip/irq-loongarch-avec.c

Changes log:
V0->V1:
1.Modified some formats and declarations
2.Removed kmalloc/kfree when adding affinity related data to pending_list,
and used moving tag to replace the original behavior
3.Adjusted the process that enables AVEC interrupts, now it is at the end of all processes
4.Removed CPUHP related callbacks, now irq_matrix_online/irq_matrix_offline is completed in start_secondary/loongson_cpu_disable
5.Adjusted compatibility issues for CONFIG_ACPI
6.About question:
> irr = csr_read64(LOONGARCH_CSR_IRR0 + vector / 64);
> should be good enough, no?
csr_read64 was built-in as __csrrd_d, it doesn't seem to support variables as parameters
>>>>
drivers/irqchip/irq-loongarch-avec.c: In function ‘complete_irq_moving’:
./arch/loongarch/include/asm/loongarch.h:164:25: error: invalid argument to built-in function
164 | #define csr_read64(reg) __csrrd_d(reg)
| ^~~~~~~~~
drivers/irqchip/irq-loongarch-avec.c:170:23: note: in expansion of macro ‘csr_read64’
170 | irr = csr_read64(LOONGARCH_CSR_IRR_BASE + vector / VECTORS_PER_REG);
| ^~~~~~~~~~
>>>>
So we have temporarily retained the previous implementation.

V1->V2:
Fixed up coding style. Made on/offline functions void
Added compatibility when CONFIG_SMP is turned off

V2->V3:
Squash two patches into one

V3->V4:
Update NR_IRQS
Update Register's name
Fixed up coding style
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 480418bc5071..899e6f9a9eaa 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -39,11 +39,22 @@ void spurious_interrupt(void);

#define NR_IRQS_LEGACY 16

+/*
+ * 256 vectors Map:
+ *
+ * 0 - 15: mapping legacy IPs, e.g. IP0-12.
+ * 16 - 255: mapping a vector for external IRQ.
+ *
+ */
+#define NR_VECTORS 256
+#define IRQ_MATRIX_BITS NR_VECTORS
+#define NR_LEGACY_VECTORS 16
+
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
void arch_trigger_cpumask_backtrace(const struct cpumask *mask, int exclude_cpu);

#define MAX_IO_PICS 2
-#define NR_IRQS (64 + (256 * MAX_IO_PICS))
+#define NR_IRQS ((64 + (64 * MAX_IO_PICS)) + NR_VECTORS * NR_CPUS)

struct acpi_vector_group {
int node;
@@ -65,7 +76,7 @@ extern struct acpi_vector_group msi_group[MAX_IO_PICS];
#define LOONGSON_LPC_LAST_IRQ (LOONGSON_LPC_IRQ_BASE + 15)

#define LOONGSON_CPU_IRQ_BASE 16
-#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 14)
+#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 15)

#define LOONGSON_PCH_IRQ_BASE 64
#define LOONGSON_PCH_ACPI_IRQ (LOONGSON_PCH_IRQ_BASE + 47)
@@ -101,6 +112,14 @@ int pch_msi_acpi_init(struct irq_domain *parent,
struct acpi_madt_msi_pic *acpi_pchmsi);
int pch_pic_acpi_init(struct irq_domain *parent,
struct acpi_madt_bio_pic *acpi_pchpic);
+
+int __init pch_msi_acpi_init_v2(struct irq_domain *parent,
+ struct acpi_madt_msi_pic *pch_msi_entry);
+int __init loongarch_avec_acpi_init(struct irq_domain *parent);
+void complete_irq_moving(void);
+void loongarch_avec_offline_cpu(unsigned int cpu);
+void loongarch_avec_online_cpu(unsigned int cpu);
+
int find_pch_pic(u32 gsi);
struct fwnode_handle *get_pch_msi_handle(int pci_segment);

diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index eb09adda54b7..805f51eaac06 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -72,7 +72,6 @@
#define CPUCFG1_RPLV BIT(23)
#define CPUCFG1_HUGEPG BIT(24)
#define CPUCFG1_CRC32 BIT(25)
-#define CPUCFG1_MSGINT BIT(26)

#define LOONGARCH_CPUCFG2 0x2
#define CPUCFG2_FP BIT(0)
@@ -252,8 +251,8 @@
#define CSR_ESTAT_EXC_WIDTH 6
#define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT)
#define CSR_ESTAT_IS_SHIFT 0
-#define CSR_ESTAT_IS_WIDTH 14
-#define CSR_ESTAT_IS (_ULCAST_(0x3fff) << CSR_ESTAT_IS_SHIFT)
+#define CSR_ESTAT_IS_WIDTH 15
+#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT)

#define LOONGARCH_CSR_ERA 0x6 /* ERA */

@@ -999,10 +998,17 @@
#define CSR_FWPC_SKIP_SHIFT 16
#define CSR_FWPC_SKIP (_ULCAST_(1) << CSR_FWPC_SKIP_SHIFT)

+#define LOONGARCH_CSR_ISR0 0xa0
+#define LOONGARCH_CSR_ISR1 0xa1
+#define LOONGARCH_CSR_ISR2 0xa2
+#define LOONGARCH_CSR_ISR3 0xa3
+
+#define LOONGARCH_CSR_IRR 0xa4
+
/*
* CSR_ECFG IM
*/
-#define ECFG0_IM 0x00001fff
+#define ECFG0_IM 0x00005fff
#define ECFGB_SIP0 0
#define ECFGF_SIP0 (_ULCAST_(1) << ECFGB_SIP0)
#define ECFGB_SIP1 1
@@ -1045,6 +1051,7 @@
#define IOCSRF_EIODECODE BIT_ULL(9)
#define IOCSRF_FLATMODE BIT_ULL(10)
#define IOCSRF_VM BIT_ULL(11)
+#define IOCSRF_AVEC BIT_ULL(15)

#define LOONGARCH_IOCSR_VENDOR 0x10

@@ -1055,6 +1062,7 @@
#define LOONGARCH_IOCSR_MISC_FUNC 0x420
#define IOCSR_MISC_FUNC_TIMER_RESET BIT_ULL(21)
#define IOCSR_MISC_FUNC_EXT_IOI_EN BIT_ULL(48)
+#define IOCSR_MISC_FUNC_AVEC_EN BIT_ULL(51)

#define LOONGARCH_IOCSR_CPUTEMP 0x428

@@ -1375,9 +1383,10 @@ __BUILD_CSR_OP(tlbidx)
index 000000000000..9a6832986b3d
--- /dev/null
+++ b/drivers/irqchip/irq-loongarch-avec.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2024 Loongson Technologies, Inc.
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/radix-tree.h>
+#include <linux/spinlock.h>
+
+#include <asm/loongarch.h>
+#include <asm/setup.h>
+
+#define VECTORS_PER_REG 64
+#define IRR_INVALID_MASK 0x80000000UL
+#define IRR_VECTOR_MASK 0xffUL
+#define AVEC_MSG_OFFSET 0x100000
+
+static phys_addr_t msi_base_v2;
+static DEFINE_PER_CPU(struct irq_desc * [NR_VECTORS], irq_map);
+
+#ifdef CONFIG_SMP
+struct pending_list {
+ struct list_head head;
+};
+
+static DEFINE_PER_CPU(struct pending_list, pending_list);
+static struct cpumask intersect_mask;
+#endif
+
+struct loongarch_avec_chip {
+ struct fwnode_handle *fwnode;
+ struct irq_domain *domain;
+ struct irq_matrix *vector_matrix;
+ raw_spinlock_t lock;
+};
+
+static struct loongarch_avec_chip loongarch_avec;
+
+struct loongarch_avec_data {
+ struct list_head entry;
+ unsigned int cpu;
+ unsigned int vec;
+ unsigned int prev_cpu;
+ unsigned int prev_vec;
+ unsigned int moving : 1,
+ managed : 1;
+};
+
+ ret = irq_matrix_alloc(loongarch_avec.vector_matrix, &intersect_mask, false, &cpu);
+ irr = csr_read64(LOONGARCH_CSR_ISR0);
+ case 1:
+ irr = csr_read64(LOONGARCH_CSR_ISR1);
+ case 2:
+ irr = csr_read64(LOONGARCH_CSR_ISR2);
+ case 3:
+ irr = csr_read64(LOONGARCH_CSR_ISR3);
+ vector = csr_read64(LOONGARCH_CSR_IRR);
+ if (vector & IRR_INVALID_MASK)
+ return;
+
+ vector &= IRR_VECTOR_MASK;
+ ret = irq_matrix_alloc(loongarch_avec.vector_matrix, cpu_online_mask, false, &cpu);
index dd4d699170f4..8b7aae22e782 100644
@@ -289,4 +302,31 @@ int __init pch_msi_acpi_init(struct irq_domain *parent,

return ret;
}
+
+int __init pch_msi_acpi_init_v2(struct irq_domain *parent, struct acpi_madt_msi_pic *msi_entry)

maobibo

unread,
Jul 9, 2024, 11:38:27 PMJul 9
to Tianyang Zhang, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, chenh...@kernel.org, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Tianyang,
It is strange here, What is the base MSI address on earth, hardcoded
address 0xFD00000000? and What is the relationship with address parsed
from MADT table pchmsi_entry->msg_address?
Variable msi_base_v2 is never used.

Regards
Bibo Mao

Tianyang Zhang

unread,
Jul 9, 2024, 11:49:07 PMJul 9
to maobibo, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, chenh...@kernel.org, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Thank you for your correction. Here is my verification that different
platforms have a legacy issue that should be fixed to its previous state
I will resend this patch

maobibo

unread,
Jul 10, 2024, 12:06:42 AMJul 10
to Tianyang Zhang, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, chenh...@kernel.org, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Ok, need not be so hurry.
Can we add a bit in IOCSR register indicating the relationship for this
to solve legacy compatible problem?

I do not think that it it true in later for all HW design to minus
AVEC_MSG_OFFSET from MADT table base address, normally it should be the
same with MADT table.

Regards
Bibo Mao

Huacai Chen

unread,
Jul 10, 2024, 12:08:23 AMJul 10
to Tianyang Zhang, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Hi, Tianyang,
EIOINTC also has 256 irqs each, so I think here should be:
#define NR_IRQS (64 + NR_VECTORS * (NR_CPUS + MAX_IO_PICS))
Don't remove it, though we don't use it as an indicator.
Maybe pv_ipi_interrupt() also need to handle SMP_CLEAR_VECT.
Use isr instead of irr here.


Huacai

Tianyang Zhang

unread,
Jul 10, 2024, 12:48:42 AMJul 10
to cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, chenh...@kernel.org, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, zhangt...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
index 000000000000..fe3ca3e3ceb2
+ msg->address_hi = 0x0;
+ msg->address_lo = (msi_base_v2 | (avec_data->vec & 0xff) << 4) |

Tianyang Zhang

unread,
Jul 10, 2024, 9:40:37 PMJul 10
to Thomas Gleixner, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, chenh...@kernel.org, ker...@xen0n.name, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Hi, Thomas

在 2024/7/11 上午12:30, Thomas Gleixner 写道:
> On Wed, Jul 10 2024 at 12:38, Tianyang Zhang wrote:
>> Introduce the advanced extended interrupt controllers. This feature will
>> allow each core to have 256 independent interrupt vectors and MSI
>> interrupts can be independently routed to any vector on any CPU.
> Why are you resending V4 if there have been review comments on the
> original V4 submission?
>
> Thanks,
>
> tglx

Due to the patch I submitted at the time introducing a fixed code for
testing,

I was eager to fix this issue and hope that subsequent reviews will be
based

on this correct version.

I have reviewed the kernel submission documentation and found that the

usage of RESEND is inappropriate. The next time there is a modification

issue, I will update the version and resubmit it


Thanks

TIanyang



Tianyang Zhang

unread,
Jul 11, 2024, 9:34:42 PMJul 11
to Thomas Gleixner, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, chenh...@kernel.org, ker...@xen0n.name, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org

在 2024/7/11 下午10:49, Thomas Gleixner 写道:
> Please address the review feedback of the original V4 and send a V5 with
> a proper documentation of changes from V4 to V5.
>
> Thanks,
>
> tglx

Ok, I will compile all the collected opinions and submit the V5 version
based on the original V4 patch


Tianyang


Tianyang Zhang

unread,
Jul 14, 2024, 11:35:12 PMJul 14
to cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, chenh...@kernel.org, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, zhangt...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Introduce the advanced extended interrupt controllers. This feature will
allow each core to have 256 independent interrupt vectors and MSI
interrupts can be independently routed to any vector on any CPU.

Co-developed-by: Jianmin Lv <lvji...@loongson.cn>
Signed-off-by: Jianmin Lv <lvji...@loongson.cn>
Co-developed-by: Liupu Wang <wang...@loongson.cn>
Signed-off-by: Liupu Wang <wang...@loongson.cn>
Co-developed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Tianyang Zhang <zhangt...@loongson.cn>
---
.../arch/loongarch/irq-chip-model.rst | 33 ++
.../zh_CN/arch/loongarch/irq-chip-model.rst | 37 +-
arch/loongarch/Kconfig | 1 +
arch/loongarch/include/asm/cpu-features.h | 1 +
arch/loongarch/include/asm/cpu.h | 2 +
arch/loongarch/include/asm/hardirq.h | 3 +-
arch/loongarch/include/asm/irq.h | 23 +-
arch/loongarch/include/asm/loongarch.h | 18 +-
arch/loongarch/include/asm/smp.h | 2 +
arch/loongarch/kernel/cpu-probe.c | 3 +-
arch/loongarch/kernel/paravirt.c | 5 +
arch/loongarch/kernel/smp.c | 5 +
drivers/irqchip/Makefile | 2 +-
drivers/irqchip/irq-loongarch-avec.c | 433 ++++++++++++++++++
drivers/irqchip/irq-loongarch-cpu.c | 4 +-
drivers/irqchip/irq-loongson-eiointc.c | 3 +
drivers/irqchip/irq-loongson-pch-msi.c | 42 +-
17 files changed, 603 insertions(+), 14 deletions(-)
V4->V5:
Retain feature CPUCFG1_MSGINT
Fixed up coding style
Delete the test code introduced by V4, and now msi msg address still uses the 32-bit address
diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
index d41138abcf26..b09891e4a4b2 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -12,11 +12,12 @@
extern void ack_bad_irq(unsigned int irq);
#define ack_bad_irq ack_bad_irq

-#define NR_IPI 2
+#define NR_IPI 3

enum ipi_msg_type {
IPI_RESCHEDULE,
IPI_CALL_FUNCTION,
+ IPI_CLEAR_VECT,
};

typedef struct {
index eb09adda54b7..c21dc52338e3 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -252,8 +252,8 @@
#define CSR_ESTAT_EXC_WIDTH 6
#define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT)
#define CSR_ESTAT_IS_SHIFT 0
-#define CSR_ESTAT_IS_WIDTH 14
-#define CSR_ESTAT_IS (_ULCAST_(0x3fff) << CSR_ESTAT_IS_SHIFT)
+#define CSR_ESTAT_IS_WIDTH 15
+#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT)

#define LOONGARCH_CSR_ERA 0x6 /* ERA */

@@ -999,10 +999,17 @@
#define CSR_FWPC_SKIP_SHIFT 16
#define CSR_FWPC_SKIP (_ULCAST_(1) << CSR_FWPC_SKIP_SHIFT)

+#define LOONGARCH_CSR_ISR0 0xa0
+#define LOONGARCH_CSR_ISR1 0xa1
+#define LOONGARCH_CSR_ISR2 0xa2
+#define LOONGARCH_CSR_ISR3 0xa3
+
+#define LOONGARCH_CSR_IRR 0xa4
+
/*
* CSR_ECFG IM
*/
-#define ECFG0_IM 0x00001fff
+#define ECFG0_IM 0x00005fff
#define ECFGB_SIP0 0
#define ECFGF_SIP0 (_ULCAST_(1) << ECFGB_SIP0)
#define ECFGB_SIP1 1
@@ -1045,6 +1052,7 @@
#define IOCSRF_EIODECODE BIT_ULL(9)
#define IOCSRF_FLATMODE BIT_ULL(10)
#define IOCSRF_VM BIT_ULL(11)
+#define IOCSRF_AVEC BIT_ULL(15)

#define LOONGARCH_IOCSR_VENDOR 0x10

@@ -1055,6 +1063,7 @@
#define LOONGARCH_IOCSR_MISC_FUNC 0x420
#define IOCSR_MISC_FUNC_TIMER_RESET BIT_ULL(21)
#define IOCSR_MISC_FUNC_EXT_IOI_EN BIT_ULL(48)
+#define IOCSR_MISC_FUNC_AVEC_EN BIT_ULL(51)

#define LOONGARCH_IOCSR_CPUTEMP 0x428

@@ -1375,9 +1384,10 @@ __BUILD_CSR_OP(tlbidx)
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index 1633ed4f692f..834c99cb4df4 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -97,6 +97,11 @@ static irqreturn_t pv_ipi_interrupt(int irq, void *dev)
info->ipi_irqs[IPI_CALL_FUNCTION]++;
}

+ if (action & SMP_CLEAR_VECT) {
+ complete_irq_moving();
+ info->ipi_irqs[IPI_CLEAR_VECT]++;
+ }
+
return IRQ_HANDLED;
index 9f6f88274bec..1062e713cea4 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -109,7 +109,7 @@ obj-$(CONFIG_LS1X_IRQ) += irq-ls1x.o
obj-$(CONFIG_TI_SCI_INTR_IRQCHIP) += irq-ti-sci-intr.o
obj-$(CONFIG_TI_SCI_INTA_IRQCHIP) += irq-ti-sci-inta.o
obj-$(CONFIG_TI_PRUSS_INTC) += irq-pruss-intc.o
-obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o
+obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o irq-loongarch-avec.o
obj-$(CONFIG_LOONGSON_LIOINTC) += irq-loongson-liointc.o
obj-$(CONFIG_LOONGSON_EIOINTC) += irq-loongson-eiointc.o
obj-$(CONFIG_LOONGSON_HTPIC) += irq-loongson-htpic.o
diff --git a/drivers/irqchip/irq-loongarch-avec.c b/drivers/irqchip/irq-loongarch-avec.c
new file mode 100644
index 000000000000..744f46638def
+ u64 isr;
+
+ raw_spin_lock(&loongarch_avec.lock);
+
+ list_for_each_entry_safe(adata, tmp, &plist->head, entry) {
+ cpu = adata->prev_cpu;
+ vector = adata->prev_vec;
+ bias = vector / VECTORS_PER_REG;
+ switch (bias) {
+ case 0:
+ isr = csr_read64(LOONGARCH_CSR_ISR0);
+ case 1:
+ isr = csr_read64(LOONGARCH_CSR_ISR1);
+ case 2:
+ isr = csr_read64(LOONGARCH_CSR_ISR2);
+ case 3:
+ isr = csr_read64(LOONGARCH_CSR_ISR3);
+ }
+
+ if (isr & (1UL << (vector % VECTORS_PER_REG))) {
2.36.0


Huacai Chen

unread,
Jul 15, 2024, 2:49:28 AMJul 15
to Tianyang Zhang, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Hi, Tianyang,
You still forgot to update NR_IRQS. And it is better to implement
arch_probe_nr_irqs() as other architectures because NR_IRQS is very
large now.

Since there is no other serious problems now (I think), to save
everyone's time please wait me to fix the above issue and other
bike-sheds. Then you can test my version and submit it as V6.


Huacai

Tianyang Zhang

unread,
Jul 15, 2024, 3:21:24 AMJul 15
to Huacai Chen, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Hi, Huacai
Considering hardware limitations, we should indeed use
64+256*(MAX_IO_PICS + NR_CPUS) as NR_iRQS.

If we choose CONFIG_SPARSE_IRQ, in my understanding, the value of
nr_irqs is only used as a marker for

the maximum value of the system, even if the value is too large and will
not have a serious negative impact on the system.

Therefore, it may not be necessary to use arch_probe_nr_irqs


Tianyang

Huacai Chen

unread,
Jul 15, 2024, 3:28:50 AMJul 15
to Tianyang Zhang, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
All architectures provide arch_probe_nr_irqs() have selected SPARSE_IRQ.
Now you need to do nothing except wait me one or two days, take it easy. :)

Huacai

Tianyang Zhang

unread,
Jul 15, 2024, 3:33:27 AMJul 15
to Huacai Chen, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Ok, I got it

Thank you for your guidance and help


Tianyang

Huacai Chen

unread,
Jul 16, 2024, 5:26:41 AMJul 16
to Tianyang Zhang, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Hi, Tianyang,

I'm fixing potential bugs, except NR_IRQS I found some other issues
which need your double check. Please see below carefully.

On Mon, Jul 15, 2024 at 11:34 AM Tianyang Zhang
<zhangt...@loongson.cn> wrote:
>
Maybe it is better to use cpuhotplug callbacks, which is similar to
this method in irq-loongson-eiointc.c

cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_EIOINTC_STARTING,
"irqchip/loongarch/eiointc:starting",
eiointc_router_init, NULL);
Here missing a chained_irq_exit(chip, desc), right?

Moreover, I think it is better to handle all avecintc interrupts in
one dispatch, which means

chained_irq_enter(chip, desc);

while (true) {
vector = csr_read64(LOONGARCH_CSR_IRR);
if (vector & IRR_INVALID_MASK)
goto out;

vector &= IRR_VECTOR_MASK;

d = this_cpu_read(irq_map[vector]);
if (d)
generic_handle_irq_desc(d);
else {
spurious_interrupt();
pr_warn("Unexpected IRQ occurs on CPU#%d
[vector %ld]\n", smp_processor_id(), vector);
}
}

out:
chained_irq_exit(chip, desc);

Do you think so?
I think these lines are also needed, right?
.irq_mask = irq_chip_mask_parent,
.irq_unmask = irq_chip_unmask_parent,
.irq_set_affinity = irq_chip_set_affinity_parent,

Huacai

Tianyang Zhang

unread,
Jul 17, 2024, 11:15:14 PM (12 days ago) Jul 17
to Huacai Chen, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Hi , Huacai
Okay, I will adapt to this in the next version
Miss  chained_irq_exit is realy a potential bugs, Thanks

Regarding the second suggestion, the consideration at that time was to
minimize the

granularity of avec interrupt execution as much as possible, so that
higher priority tasks could be executed.

However, this has indeed caused some efficiency reduction. I am
consulting with hardware personnel,

and if the value of the IRR register will not be infinitely refreshed
when the interrupt is turned off,

then I think the above code is more appropriate
The original intention of this design was to use
MSI_FLAG_USE_DEF_CHIP_OPS, and

then update it with the default value in pci_msi_domain_update_chip_ops
& msi_domain_update_chip_ops,

so there is no explicit indication


Tianyang

Huacai Chen

unread,
Jul 18, 2024, 12:38:35 AM (12 days ago) Jul 18
to Tianyang Zhang, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
On Thu, Jul 18, 2024 at 11:12 AM Tianyang Zhang
Unnecessary, I have completed here and you can test with it.
https://github.com/chenhuacai/linux/commits/loongarch-next

If no problems, please submit the last patch as V6, otherwise please
feedback to me.

Huacai

Tianyang Zhang

unread,
Jul 18, 2024, 4:38:19 AM (12 days ago) Jul 18
to Huacai Chen, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
Hi, Huacai
I have received your modifications and will test them as soon as
possible and submit V6 patch based on other feedback

Thank you again


Tianyang

Huacai Chen

unread,
Jul 18, 2024, 5:33:58 AM (12 days ago) Jul 18
to Tianyang Zhang, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org
OK, the changelog may be:
1. Fix definition of NR_IRQS
2. Define arch_probe_nr_irqs()
3. Fix a missing chained_irq_exit() in avecintc_irq_dispatch()
4. Handle all avecintc interrupts in one dispatch
5. Use cpuhotplug callbacks instead of direct call to
avec_online_cpu()/avec_offline_cpu()
6. Rename {SMP,ACTION}_CLEAR_VECT to {SMP,ACTION}_CLEAR_VECTOR
7. Use avecintc_ prefix instead of loongarch_avec_ to keep consistancy

And please pay attention to the definition of pch_msi_irq_chip_v2. I
changed the irq_mask/irq_unmask callbacks but may be not correct.

Huacai

Tianyang Zhang

unread,
Jul 23, 2024, 10:08:41 PM (6 days ago) Jul 23
to cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, chenh...@kernel.org, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, zhangt...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org, Huacai Chen
Introduce the advanced extended interrupt controllers (AVECINTC). This
feature will allow each core to have 256 independent interrupt vectors
and MSI interrupts can be independently routed to any vector on any CPU.

Co-developed-by: Jianmin Lv <lvji...@loongson.cn>
Signed-off-by: Jianmin Lv <lvji...@loongson.cn>
Co-developed-by: Liupu Wang <wang...@loongson.cn>
Signed-off-by: Liupu Wang <wang...@loongson.cn>
Co-developed-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Thomas Gleixner <tg...@linutronix.de>
Co-developed-by: Huacai Chen <chenh...@loongson.cn>
Signed-off-by: Huacai Chen <chenh...@loongson.cn>
Signed-off-by: Tianyang Zhang <zhangt...@loongson.cn>
---
.../arch/loongarch/irq-chip-model.rst | 32 ++
.../zh_CN/arch/loongarch/irq-chip-model.rst | 32 ++
arch/loongarch/Kconfig | 1 +
arch/loongarch/include/asm/cpu-features.h | 1 +
arch/loongarch/include/asm/cpu.h | 2 +
arch/loongarch/include/asm/hardirq.h | 3 +-
arch/loongarch/include/asm/irq.h | 25 +-
arch/loongarch/include/asm/loongarch.h | 18 +-
arch/loongarch/include/asm/smp.h | 2 +
arch/loongarch/kernel/cpu-probe.c | 3 +-
arch/loongarch/kernel/irq.c | 14 +-
arch/loongarch/kernel/paravirt.c | 5 +
arch/loongarch/kernel/smp.c | 6 +
drivers/irqchip/Makefile | 2 +-
drivers/irqchip/irq-loongarch-avec.c | 448 ++++++++++++++++++
drivers/irqchip/irq-loongarch-cpu.c | 5 +-
drivers/irqchip/irq-loongson-eiointc.c | 7 +-
drivers/irqchip/irq-loongson-pch-msi.c | 39 ++
include/linux/cpuhotplug.h | 3 +-
19 files changed, 632 insertions(+), 16 deletions(-)
V5->V6:
Fix definition of NR_IRQS
Define arch_probe_nr_irqs()
Handle all avecintc interrupts in one dispatch
Use cpuhotplug callbacks instead of direct call to avec_online_cpu()/avec_offline_cpu()
Rename {SMP,ACTION}_CLEAR_VECT to {SMP,ACTION}_CLEAR_VECTOR
Use avecintc_ prefix instead of loongarch_avec_ to keep consistancy

diff --git a/Documentation/arch/loongarch/irq-chip-model.rst b/Documentation/arch/loongarch/irq-chip-model.rst
index 7988f4192363..6dd48256e39f 100644
--- a/Documentation/arch/loongarch/irq-chip-model.rst
+++ b/Documentation/arch/loongarch/irq-chip-model.rst
@@ -85,6 +85,38 @@ to CPUINTC directly::
| Devices |
+---------+

+Advanced Extended IRQ model
+===========================
+
+In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt go
+to CPUINTC directly, CPU UARTS interrupts go to LIOINTC, PCH-MSI interrupts go
+to AVECINTC, and then go to CPUINTC directly, while all other devices interrupts
+go to PCH-PIC/PCH-LPC and gathered by EIOINTC, and then go to CPUINTC directly::
+
+ +-----+ +-----------------------+ +-------+
+ | IPI | --> | CPUINTC | <-- | Timer |
+ +-----+ +-----------------------+ +-------+
+ ^ ^ ^
+ | | |
+ +---------+ +----------+ +---------+ +-------+
+ | EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs |
+ +---------+ +----------+ +---------+ +-------+
+ ^ ^
+ | |
+ +---------+ +---------+
+ | PCH-PIC | | PCH-MSI |
+ +---------+ +---------+
+ ^ ^ ^
+ | | |
+ +---------+ +---------+ +---------+
+ | Devices | | PCH-LPC | | Devices |
+ +---------+ +---------+ +---------+
+ ^
+ |
+ +---------+
+ | Devices |
+ +---------+
+
ACPI-related definitions
========================

diff --git a/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst b/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst
index f1e9ab18206c..472761938682 100644
--- a/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst
+++ b/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst
@@ -87,6 +87,38 @@ PCH-LPC/PCH-MSI,然后被EIOINTC统一收集,再直接到达CPUINTC::
| Devices |
+---------+

+高级扩展IRQ模型
+===============
+
+在这种模型里面,IPI(Inter-Processor Interrupt)和CPU本地时钟中断直接发送到CPUINTC,
+CPU串口(UARTs)中断发送到LIOINTC,PCH-MSI中断发送到AVECINTC,而后通过AVECINTC直接
+送达CPUINTC,而其他所有设备的中断则分别发送到所连接的PCH-PIC/PCH-LPC,然后由EIOINTC
+统一收集,再直接到达CPUINTC::
+
+ +-----+ +-----------------------+ +-------+
+ | IPI | --> | CPUINTC | <-- | Timer |
+ +-----+ +-----------------------+ +-------+
+ ^ ^ ^
+ | | |
+ +---------+ +----------+ +---------+ +-------+
+ | EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs |
+ +---------+ +----------+ +---------+ +-------+
+ ^ ^
+ | |
+ +---------+ +---------+
+ | PCH-PIC | | PCH-MSI |
+ +---------+ +---------+
+ ^ ^ ^
+ | | |
+ +---------+ +---------+ +---------+
+ | Devices | | PCH-LPC | | Devices |
+ +---------+ +---------+ +---------+
+ ^
+ |
+ +---------+
+ | Devices |
+ +---------+
+
ACPI相关的定义
==============

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index ebdb7156560c..e30641fa8070 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -85,6 +85,7 @@ config LOONGARCH
index 1d7feb719515..10da8d6961cb 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -12,12 +12,13 @@
extern void ack_bad_irq(unsigned int irq);
#define ack_bad_irq ack_bad_irq

-#define NR_IPI 3
+#define NR_IPI 4

enum ipi_msg_type {
IPI_RESCHEDULE,
IPI_CALL_FUNCTION,
IPI_IRQ_WORK,
+ IPI_CLEAR_VECTOR,
};

typedef struct {
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 480418bc5071..1b255bf8168f 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -39,11 +39,22 @@ void spurious_interrupt(void);

#define NR_IRQS_LEGACY 16

+/*
+ * 256 Vectors Mapping for AVECINTC:
+ *
+ * 0 - 15: Mapping classic IPs, e.g. IP0-12.
+ * 16 - 255: Mapping vectors for external IRQ.
+ *
+ */
+#define NR_VECTORS 256
+#define NR_LEGACY_VECTORS 16
+#define IRQ_MATRIX_BITS NR_VECTORS
+
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
void arch_trigger_cpumask_backtrace(const struct cpumask *mask, int exclude_cpu);

#define MAX_IO_PICS 2
-#define NR_IRQS (64 + (256 * MAX_IO_PICS))
+#define NR_IRQS (64 + NR_VECTORS * (NR_CPUS + MAX_IO_PICS))

struct acpi_vector_group {
int node;
@@ -65,7 +76,7 @@ extern struct acpi_vector_group msi_group[MAX_IO_PICS];
#define LOONGSON_LPC_LAST_IRQ (LOONGSON_LPC_IRQ_BASE + 15)

#define LOONGSON_CPU_IRQ_BASE 16
-#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 14)
+#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 15)

#define LOONGSON_PCH_IRQ_BASE 64
#define LOONGSON_PCH_ACPI_IRQ (LOONGSON_PCH_IRQ_BASE + 47)
@@ -92,15 +103,21 @@ int liointc_acpi_init(struct irq_domain *parent,
struct acpi_madt_lio_pic *acpi_liointc);
int eiointc_acpi_init(struct irq_domain *parent,
struct acpi_madt_eio_pic *acpi_eiointc);
+int avecintc_acpi_init(struct irq_domain *parent);
+
+void complete_irq_moving(void);

int htvec_acpi_init(struct irq_domain *parent,
struct acpi_madt_ht_pic *acpi_htvec);
int pch_lpc_acpi_init(struct irq_domain *parent,
struct acpi_madt_lpc_pic *acpi_pchlpc);
-int pch_msi_acpi_init(struct irq_domain *parent,
- struct acpi_madt_msi_pic *acpi_pchmsi);
int pch_pic_acpi_init(struct irq_domain *parent,
struct acpi_madt_bio_pic *acpi_pchpic);
+int pch_msi_acpi_init(struct irq_domain *parent,
+ struct acpi_madt_msi_pic *acpi_pchmsi);
+int pch_msi_acpi_init_v2(struct irq_domain *parent,
+ struct acpi_madt_msi_pic *acpi_pchmsi);
+
int find_pch_pic(u32 gsi);
struct fwnode_handle *get_pch_msi_handle(int pci_segment);

diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 04a78010fc72..70834a47257d 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -253,8 +253,8 @@
#define CSR_ESTAT_EXC_WIDTH 6
#define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT)
#define CSR_ESTAT_IS_SHIFT 0
-#define CSR_ESTAT_IS_WIDTH 14
-#define CSR_ESTAT_IS (_ULCAST_(0x3fff) << CSR_ESTAT_IS_SHIFT)
+#define CSR_ESTAT_IS_WIDTH 15
+#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT)

#define LOONGARCH_CSR_ERA 0x6 /* ERA */

@@ -649,6 +649,13 @@

#define LOONGARCH_CSR_CTAG 0x98 /* TagLo + TagHi */

+#define LOONGARCH_CSR_ISR0 0xa0
+#define LOONGARCH_CSR_ISR1 0xa1
+#define LOONGARCH_CSR_ISR2 0xa2
+#define LOONGARCH_CSR_ISR3 0xa3
+
+#define LOONGARCH_CSR_IRR 0xa4
+
#define LOONGARCH_CSR_PRID 0xc0

/* Shadow MCSR : 0xc0 ~ 0xff */
@@ -1011,7 +1018,7 @@
/*
* CSR_ECFG IM
*/
-#define ECFG0_IM 0x00001fff
+#define ECFG0_IM 0x00005fff
#define ECFGB_SIP0 0
#define ECFGF_SIP0 (_ULCAST_(1) << ECFGB_SIP0)
#define ECFGB_SIP1 1
@@ -1054,6 +1061,7 @@
#define IOCSRF_EIODECODE BIT_ULL(9)
#define IOCSRF_FLATMODE BIT_ULL(10)
#define IOCSRF_VM BIT_ULL(11)
+#define IOCSRF_AVEC BIT_ULL(15)

#define LOONGARCH_IOCSR_VENDOR 0x10

@@ -1065,6 +1073,7 @@
#define IOCSR_MISC_FUNC_SOFT_INT BIT_ULL(10)
#define IOCSR_MISC_FUNC_TIMER_RESET BIT_ULL(21)
#define IOCSR_MISC_FUNC_EXT_IOI_EN BIT_ULL(48)
+#define IOCSR_MISC_FUNC_AVEC_EN BIT_ULL(51)

#define LOONGARCH_IOCSR_CPUTEMP 0x428

@@ -1387,9 +1396,10 @@ __BUILD_CSR_OP(tlbidx)
#define INT_TI 11 /* Timer */
#define INT_IPI 12
#define INT_NMI 13
+#define INT_AVEC 14

/* ExcCodes corresponding to interrupts */
-#define EXCCODE_INT_NUM (INT_NMI + 1)
+#define EXCCODE_INT_NUM (INT_AVEC + 1)
#define EXCCODE_INT_START 64
#define EXCCODE_INT_END (EXCCODE_INT_START + EXCCODE_INT_NUM - 1)

diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index 50db503f44e3..3383c9d24e94 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -70,10 +70,12 @@ extern int __cpu_logical_map[NR_CPUS];
#define ACTION_RESCHEDULE 1
#define ACTION_CALL_FUNCTION 2
#define ACTION_IRQ_WORK 3
+#define ACTION_CLEAR_VECTOR 4
#define SMP_BOOT_CPU BIT(ACTION_BOOT_CPU)
#define SMP_RESCHEDULE BIT(ACTION_RESCHEDULE)
#define SMP_CALL_FUNCTION BIT(ACTION_CALL_FUNCTION)
#define SMP_IRQ_WORK BIT(ACTION_IRQ_WORK)
+#define SMP_CLEAR_VECTOR BIT(ACTION_CLEAR_VECTOR)

struct secondary_data {
unsigned long stack;
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index 55320813ee08..14f0449f5452 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -106,7 +106,6 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
elf_hwcap |= HWCAP_LOONGARCH_CRC32;
}

-
config = read_cpucfg(LOONGARCH_CPUCFG2);
if (config & CPUCFG2_LAM) {
c->options |= LOONGARCH_CPU_LAM;
@@ -174,6 +173,8 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options |= LOONGARCH_CPU_FLATMODE;
if (config & IOCSRF_EIODECODE)
c->options |= LOONGARCH_CPU_EIODECODE;
+ if (config & IOCSRF_AVEC)
+ c->options |= LOONGARCH_CPU_AVECINT;
if (config & IOCSRF_VM)
c->options |= LOONGARCH_CPU_HYPERVISOR;

diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index f4991c03514f..1311546a7b4e 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -87,6 +87,18 @@ static void __init init_vec_parent_group(void)
acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse);
}

+int __init arch_probe_nr_irqs(void)
+{
+ int nr_io_pics = bitmap_weight(loongson_sysconf.cores_io_master, NR_CPUS);
+
+ if (!cpu_has_avecint)
+ nr_irqs = (64 + NR_VECTORS * nr_io_pics);
+ else
+ nr_irqs = (64 + NR_VECTORS * (nr_cpu_ids + nr_io_pics));
+
+ return NR_IRQS_LEGACY;
+}
+
void __init init_IRQ(void)
{
int i;
@@ -102,7 +114,7 @@ void __init init_IRQ(void)
mp_ops.init_ipi();
#endif

- for (i = 0; i < NR_IRQS; i++)
+ for (i = 0; i < nr_irqs; i++)
irq_set_noprobe(i);

for_each_possible_cpu(i) {
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index 9c9b75b76f62..4d736a4e488d 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -134,6 +134,11 @@ static irqreturn_t pv_ipi_interrupt(int irq, void *dev)
info->ipi_irqs[IPI_IRQ_WORK]++;
}

+ if (action & SMP_CLEAR_VECTOR) {
+ complete_irq_moving();
+ info->ipi_irqs[IPI_CLEAR_VECTOR]++;
+ }
+
return IRQ_HANDLED;
}

diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index ca405ab86aae..4adbbef3450a 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -72,6 +72,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNCTION] = "Function call interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
+ [IPI_CLEAR_VECTOR] = "Clear vector interrupts",
};

void show_ipi_list(struct seq_file *p, int prec)
@@ -248,6 +249,11 @@ static irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
per_cpu(irq_stat, cpu).ipi_irqs[IPI_IRQ_WORK]++;
}

+ if (action & SMP_CLEAR_VECTOR) {
+ complete_irq_moving();
+ per_cpu(irq_stat, cpu).ipi_irqs[IPI_CLEAR_VECTOR]++;
+ }
+
return IRQ_HANDLED;
}

diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index d9dc3d99aaa8..7f4f5637fece 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -108,7 +108,7 @@ obj-$(CONFIG_LS1X_IRQ) += irq-ls1x.o
obj-$(CONFIG_TI_SCI_INTR_IRQCHIP) += irq-ti-sci-intr.o
obj-$(CONFIG_TI_SCI_INTA_IRQCHIP) += irq-ti-sci-inta.o
obj-$(CONFIG_TI_PRUSS_INTC) += irq-pruss-intc.o
-obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o
+obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o irq-loongarch-avec.o
obj-$(CONFIG_LOONGSON_LIOINTC) += irq-loongson-liointc.o
obj-$(CONFIG_LOONGSON_EIOINTC) += irq-loongson-eiointc.o
obj-$(CONFIG_LOONGSON_HTPIC) += irq-loongson-htpic.o
diff --git a/drivers/irqchip/irq-loongarch-avec.c b/drivers/irqchip/irq-loongarch-avec.c
new file mode 100644
index 000000000000..a90971a4262f
--- /dev/null
+++ b/drivers/irqchip/irq-loongarch-avec.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2024 Loongson Technologies, Inc.
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/radix-tree.h>
+#include <linux/spinlock.h>
+
+#include <asm/loongarch.h>
+#include <asm/setup.h>
+
+#define VECTORS_PER_REG 64
+#define IRR_VECTOR_MASK 0xffUL
+#define IRR_INVALID_MASK 0x80000000UL
+#define AVEC_MSG_OFFSET 0x100000
+
+static phys_addr_t msi_base_addr;
+
+#ifdef CONFIG_SMP
+struct pending_list {
+ struct list_head head;
+};
+
+static struct cpumask intersect_mask;
+static DEFINE_PER_CPU(struct pending_list, pending_list);
+#endif
+
+static DEFINE_PER_CPU(struct irq_desc * [NR_VECTORS], irq_map);
+
+struct avecintc_chip {
+ struct fwnode_handle *fwnode;
+ struct irq_domain *domain;
+ struct irq_matrix *vector_matrix;
+ raw_spinlock_t lock;
+};
+
+static struct avecintc_chip loongarch_avec;
+
+struct avecintc_data {
+ struct list_head entry;
+ unsigned int cpu;
+ unsigned int vec;
+ unsigned int prev_cpu;
+ unsigned int prev_vec;
+ unsigned int moving : 1,
+ managed : 1;
+};
+
+static inline void avecintc_ack_irq(struct irq_data *d)
+{
+}
+
+static inline void avecintc_mask_irq(struct irq_data *d)
+{
+}
+
+static inline void avecintc_unmask_irq(struct irq_data *d)
+{
+}
+
+#ifdef CONFIG_SMP
+static inline void pending_list_init(int cpu)
+{
+ struct pending_list *plist = per_cpu_ptr(&pending_list, cpu);
+
+ INIT_LIST_HEAD(&plist->head);
+}
+
+static void avecintc_sync(struct avecintc_data *adata)
+{
+ struct pending_list *plist;
+
+ if (cpu_online(adata->prev_cpu)) {
+ plist = per_cpu_ptr(&pending_list, adata->prev_cpu);
+ list_add_tail(&adata->entry, &plist->head);
+ adata->moving = 1;
+ mp_ops.send_ipi_single(adata->prev_cpu, ACTION_CLEAR_VECTOR);
+ }
+}
+
+static int avecintc_set_affinity(struct irq_data *data, const struct cpumask *dest,
+ bool force)
+{
+ unsigned int cpu, ret, vector;
+ unsigned long flags;
+ struct avecintc_data *adata;
+
+ raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
+ adata = irq_data_get_irq_chip_data(data);
+
+ if (adata->vec && cpu_online(adata->cpu) && cpumask_test_cpu(adata->cpu, dest)) {
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+ return 0;
+ }
+
+ if (adata->moving)
+ return -EBUSY;
+
+ cpumask_and(&intersect_mask, dest, cpu_online_mask);
+
+ ret = irq_matrix_alloc(loongarch_avec.vector_matrix, &intersect_mask, false, &cpu);
+ if (ret < 0) {
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+ return ret;
+ }
+
+ vector = ret;
+ adata->cpu = cpu;
+ adata->vec = vector;
+ per_cpu_ptr(irq_map, adata->cpu)[adata->vec] = irq_data_to_desc(data);
+ avecintc_sync(adata);
+
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+ irq_data_update_effective_affinity(data, cpumask_of(cpu));
+
+ return IRQ_SET_MASK_OK;
+}
+
+static int avecintc_online_cpu(unsigned int cpu)
+{
+ unsigned long flags;
+
+ if (!loongarch_avec.vector_matrix)
+ return 0;
+
+ raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
+
+ irq_matrix_online(loongarch_avec.vector_matrix);
+
+ pending_list_init(cpu);
+
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+
+ return 0;
+}
+
+static int avecintc_offline_cpu(unsigned int cpu)
+{
+ unsigned long flags;
+ struct pending_list *plist = per_cpu_ptr(&pending_list, cpu);
+
+ if (!loongarch_avec.vector_matrix)
+ return 0;
+
+ raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
+ if (list_empty(&plist->head))
+ irq_matrix_offline(loongarch_avec.vector_matrix);
+ else
+ pr_warn("CPU#%d advanced vector is busy\n", cpu);
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+
+ return 0;
+}
+
+void complete_irq_moving(void)
+{
+ struct pending_list *plist = this_cpu_ptr(&pending_list);
+ struct avecintc_data *adata, *tdata;
+ int cpu, vector, bias;
+ uint64_t isr;
+
+ raw_spin_lock(&loongarch_avec.lock);
+
+ list_for_each_entry_safe(adata, tdata, &plist->head, entry) {
+ cpu = adata->prev_cpu;
+ vector = adata->prev_vec;
+ bias = vector / VECTORS_PER_REG;
+ switch (bias) {
+ case 0:
+ isr = csr_read64(LOONGARCH_CSR_ISR0);
+ case 1:
+ isr = csr_read64(LOONGARCH_CSR_ISR1);
+ case 2:
+ isr = csr_read64(LOONGARCH_CSR_ISR2);
+ case 3:
+ isr = csr_read64(LOONGARCH_CSR_ISR3);
+ }
+
+ if (isr & (1UL << (vector % VECTORS_PER_REG))) {
+ mp_ops.send_ipi_single(cpu, ACTION_CLEAR_VECTOR);
+ continue;
+ }
+ list_del(&adata->entry);
+ irq_matrix_free(loongarch_avec.vector_matrix, cpu, vector, adata->managed);
+ this_cpu_write(irq_map[vector], NULL);
+ adata->prev_cpu = adata->cpu;
+ adata->prev_vec = adata->vec;
+ adata->moving = 0;
+ }
+
+ raw_spin_unlock(&loongarch_avec.lock);
+}
+#endif
+
+static void avecintc_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
+{
+ struct avecintc_data *adata;
+
+ adata = irq_data_get_irq_chip_data(d);
+
+ msg->address_hi = 0x0;
+ msg->address_lo = (msi_base_addr | (adata->vec & 0xff) << 4) |
+ ((cpu_logical_map(adata->cpu & 0xffff)) << 12);
+ msg->data = 0x0;
+}
+
+static struct irq_chip avec_irq_controller = {
+ .name = "AVECINTC",
+ .irq_ack = avecintc_ack_irq,
+ .irq_mask = avecintc_mask_irq,
+ .irq_unmask = avecintc_unmask_irq,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = avecintc_set_affinity,
+#endif
+ .irq_compose_msi_msg = avecintc_compose_msi_msg,
+};
+
+static void avecintc_irq_dispatch(struct irq_desc *desc)
+{
+ unsigned long vector;
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct irq_desc *d;
+
+ chained_irq_enter(chip, desc);
+
+ while (true) {
+ vector = csr_read64(LOONGARCH_CSR_IRR);
+ if (vector & IRR_INVALID_MASK)
+ break;
+
+ vector &= IRR_VECTOR_MASK;
+
+ d = this_cpu_read(irq_map[vector]);
+ if (d)
+ generic_handle_irq_desc(d);
+ else {
+ spurious_interrupt();
+ pr_warn("Unexpected IRQ occurs on CPU#%d [vector %ld]\n",
+ smp_processor_id(), vector);
+ }
+ }
+
+ chained_irq_exit(chip, desc);
+}
+
+static int avecintc_domain_alloc(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs, void *arg)
+{
+ unsigned int cpu, i, ret;
+ unsigned long flags;
+ struct irq_data *irqd;
+ struct avecintc_data *adata;
+
+ raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
+ for (i = 0; i < nr_irqs; i++) {
+ irqd = irq_domain_get_irq_data(domain, virq + i);
+ adata = kzalloc(sizeof(*adata), GFP_KERNEL);
+ if (!adata) {
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+ return -ENOMEM;
+ }
+
+ ret = irq_matrix_alloc(loongarch_avec.vector_matrix, cpu_online_mask, false, &cpu);
+ if (ret < 0) {
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+ return ret;
+ }
+
+ adata->prev_cpu = adata->cpu = cpu;
+ adata->prev_vec = adata->vec = ret;
+ adata->managed = irqd_affinity_is_managed(irqd);
+ irq_domain_set_info(domain, virq + i, virq + i, &avec_irq_controller,
+ adata, handle_edge_irq, NULL, NULL);
+ adata->moving = 0;
+ irqd_set_single_target(irqd);
+ irqd_set_affinity_on_activate(irqd);
+
+ per_cpu_ptr(irq_map, adata->cpu)[adata->vec] = irq_data_to_desc(irqd);
+ }
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+
+ return 0;
+}
+
+static void clear_free_vector(struct irq_data *irqd)
+{
+ bool managed = irqd_affinity_is_managed(irqd);
+ struct avecintc_data *adata = irq_data_get_irq_chip_data(irqd);
+
+ per_cpu(irq_map, adata->cpu)[adata->vec] = NULL;
+ irq_matrix_free(loongarch_avec.vector_matrix, adata->cpu, adata->vec, managed);
+ adata->cpu = 0;
+ adata->vec = 0;
+
+#ifdef CONFIG_SMP
+ if (!adata->moving)
+ return;
+
+ per_cpu(irq_map, adata->prev_cpu)[adata->prev_vec] = NULL;
+ irq_matrix_free(loongarch_avec.vector_matrix,
+ adata->prev_cpu, adata->prev_vec, adata->managed);
+ adata->moving = 0;
+ adata->prev_vec = 0;
+ adata->prev_cpu = 0;
+ list_del_init(&adata->entry);
+#endif
+}
+
+static void avecintc_domain_free(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
+{
+ unsigned int i;
+ unsigned long flags;
+ struct irq_data *d;
+
+ raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
+ for (i = 0; i < nr_irqs; i++) {
+ d = irq_domain_get_irq_data(domain, virq + i);
+ if (d) {
+ clear_free_vector(d);
+ irq_domain_reset_irq_data(d);
+
+ }
+ }
+
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+}
+
+static const struct irq_domain_ops avecintc_domain_ops = {
+ .alloc = avecintc_domain_alloc,
+ .free = avecintc_domain_free,
+};
+
+static int __init irq_matrix_init(void)
+{
+ int i;
+
+ loongarch_avec.vector_matrix = irq_alloc_matrix(NR_VECTORS, 0, NR_VECTORS - 1);
+ if (!loongarch_avec.vector_matrix)
+ return -ENOMEM;
+
+ for (i = 0; i < NR_LEGACY_VECTORS; i++)
+ irq_matrix_assign_system(loongarch_avec.vector_matrix, i, false);
+
+ irq_matrix_online(loongarch_avec.vector_matrix);
+
+ return 0;
+}
+
+static int __init avecintc_init(struct irq_domain *parent)
+{
+ int ret, parent_irq;
+ unsigned long value;
+
+ raw_spin_lock_init(&loongarch_avec.lock);
+
+ loongarch_avec.fwnode = irq_domain_alloc_named_fwnode("AVECINTC");
+ if (!loongarch_avec.fwnode) {
+ pr_err("Unable to allocate domain handle\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ loongarch_avec.domain = irq_domain_create_tree(loongarch_avec.fwnode,
+ &avecintc_domain_ops, NULL);
+ if (!loongarch_avec.domain) {
+ pr_err("Unable to create IRQ domain\n");
+ ret = -ENOMEM;
+ goto out_free_handle;
+ }
+
+ parent_irq = irq_create_mapping(parent, INT_AVEC);
+ if (!parent_irq) {
+ pr_err("Failed to mapping hwirq\n");
+ ret = -EINVAL;
+ goto out_remove_domain;
+ }
+ irq_set_chained_handler_and_data(parent_irq, avecintc_irq_dispatch, NULL);
+
+ ret = irq_matrix_init();
+ if (ret < 0) {
+ pr_err("Failed to init irq matrix\n");
+ goto out_free_matrix;
+ }
+#ifdef CONFIG_SMP
+ pending_list_init(0);
+#endif
+ value = iocsr_read64(LOONGARCH_IOCSR_MISC_FUNC);
+ value |= IOCSR_MISC_FUNC_AVEC_EN;
+ iocsr_write64(value, LOONGARCH_IOCSR_MISC_FUNC);
+
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_AVECINTC_STARTING,
+ "irqchip/loongarch/avecintc:starting",
+ avecintc_online_cpu, avecintc_offline_cpu);
+ return ret;
+
+out_free_matrix:
+ kfree(loongarch_avec.vector_matrix);
+out_remove_domain:
+ irq_domain_remove(loongarch_avec.domain);
+out_free_handle:
+ irq_domain_free_fwnode(loongarch_avec.fwnode);
+out:
+ return ret;
+}
+
+static int __init pch_msi_parse_madt(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_madt_msi_pic *pchmsi_entry = (struct acpi_madt_msi_pic *)header;
+
+ msi_base_addr = pchmsi_entry->msg_address - AVEC_MSG_OFFSET;
+
+ return pch_msi_acpi_init_v2(loongarch_avec.domain, pchmsi_entry);
+}
+
+static inline int __init acpi_cascade_irqdomain_init(void)
+{
+ return acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 1);
+}
+
+int __init avecintc_acpi_init(struct irq_domain *parent)
+{
+ int ret;
+
+ ret = avecintc_init(parent);
+ if (ret < 0) {
+ pr_err("Failed to init IRQ domain\n");
+ return ret;
+ }
+
+ ret = acpi_cascade_irqdomain_init();
+ if (ret < 0) {
+ pr_err("Failed to init cascade IRQ domain\n");
+ return ret;
+ }
+
+ return ret;
+}
diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c
index 9d8f2c406043..4fdc490b94c3 100644
--- a/drivers/irqchip/irq-loongarch-cpu.c
+++ b/drivers/irqchip/irq-loongarch-cpu.c
@@ -138,7 +138,10 @@ static int __init acpi_cascade_irqdomain_init(void)
if (r < 0)
return r;

- return 0;
+ if (cpu_has_avecint)
+ r = avecintc_acpi_init(irq_domain);
+
+ return r;
}

static int __init cpuintc_acpi_init(union acpi_subtable_headers *header,
diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c
index b1f2080be2be..895d15b96669 100644
--- a/drivers/irqchip/irq-loongson-eiointc.c
+++ b/drivers/irqchip/irq-loongson-eiointc.c
@@ -360,6 +360,9 @@ static int __init acpi_cascade_irqdomain_init(void)
if (r < 0)
return r;

+ if (cpu_has_avecint)
+ return 0;
+
r = acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 1);
if (r < 0)
return r;
@@ -396,8 +399,8 @@ static int __init eiointc_init(struct eiointc_priv *priv, int parent_irq,

if (nr_pics == 1) {
register_syscore_ops(&eiointc_syscore_ops);
- cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_LOONGARCH_STARTING,
- "irqchip/loongarch/intc:starting",
+ cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_EIOINTC_STARTING,
+ "irqchip/loongarch/eiointc:starting",
eiointc_router_init, NULL);
}

diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c
index dd4d699170f4..74bac8e2e7cb 100644
--- a/drivers/irqchip/irq-loongson-pch-msi.c
+++ b/drivers/irqchip/irq-loongson-pch-msi.c
@@ -268,6 +268,9 @@ struct fwnode_handle *get_pch_msi_handle(int pci_segment)
{
int i;

+ if (cpu_has_avecint)
+ return pch_msi_handle[0];
+
for (i = 0; i < MAX_IO_PICS; i++) {
if (msi_group[i].pci_segment == pci_segment)
return pch_msi_handle[i];
@@ -289,4 +292,40 @@ int __init pch_msi_acpi_init(struct irq_domain *parent,

return ret;
}
+
+static struct irq_chip pch_msi_irq_chip_v2 = {
+ .name = "PCH PCI MSI",
+ .irq_ack = irq_chip_ack_parent,
+};
+
+static struct msi_domain_info pch_msi_domain_info_v2 = {
+ .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
+ .chip = &pch_msi_irq_chip_v2,
+};
+
+int __init pch_msi_acpi_init_v2(struct irq_domain *parent,
+ struct acpi_madt_msi_pic *acpi_pchmsi)
+{
+ struct irq_domain *msi_domain;
+
+ if (pch_msi_handle[0])
+ return 0;
+
+ pch_msi_handle[0] = irq_domain_alloc_fwnode(&acpi_pchmsi->msg_address);
+ if (!pch_msi_handle[0]) {
+ pr_err("Unable to allocate domain handle\n");
+ return -ENOMEM;
+ }
+
+ msi_domain = pci_msi_create_irq_domain(pch_msi_handle[0],
+ &pch_msi_domain_info_v2, parent);
+ if (!msi_domain) {
+ pr_err("Failed to create PCI MSI domain\n");
+ kfree(pch_msi_handle[0]);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
#endif
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 7a5785f405b6..c57ad616fee5 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -144,7 +144,8 @@ enum cpuhp_state {
CPUHP_AP_IRQ_ARMADA_XP_STARTING,
CPUHP_AP_IRQ_BCM2836_STARTING,
CPUHP_AP_IRQ_MIPS_GIC_STARTING,
- CPUHP_AP_IRQ_LOONGARCH_STARTING,
+ CPUHP_AP_IRQ_EIOINTC_STARTING,
+ CPUHP_AP_IRQ_AVECINTC_STARTING,
CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
CPUHP_AP_IRQ_RISCV_IMSIC_STARTING,
CPUHP_AP_ARM_MVEBU_COHERENCY,
--
2.43.0


Tianyang Zhang

unread,
Jul 26, 2024, 6:25:45 AM (4 days ago) Jul 26
to cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, chenh...@kernel.org, ker...@xen0n.name, tg...@linutronix.de, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, zhangt...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org, Huacai Chen
Introduce the advanced extended interrupt controllers (AVECINTC). This
feature will allow each core to have 256 independent interrupt vectors
and MSI interrupts can be independently routed to any vector on any CPU.

The whole topology of irqchips in LoongArch machines looks like this if
AVECINTC is supported:

+-----+ +-----------------------+ +-------+
| IPI | --> | CPUINTC | <-- | Timer |
+-----+ +-----------------------+ +-------+
^ ^ ^
| | |
+---------+ +----------+ +---------+ +-------+
| EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs |
+---------+ +----------+ +---------+ +-------+
^ ^
| |
+---------+ +---------+
| PCH-PIC | | PCH-MSI |
+---------+ +---------+
^ ^ ^
| | |
+---------+ +---------+ +---------+
| Devices | | PCH-LPC | | Devices |
+---------+ +---------+ +---------+
^
|
+---------+
| Devices |
+---------+

Co-developed-by: Jianmin Lv <lvji...@loongson.cn>
Signed-off-by: Jianmin Lv <lvji...@loongson.cn>
Co-developed-by: Liupu Wang <wang...@loongson.cn>
Signed-off-by: Liupu Wang <wang...@loongson.cn>
Co-developed-by: Huacai Chen <chenh...@loongson.cn>
Signed-off-by: Huacai Chen <chenh...@loongson.cn>
Signed-off-by: Tianyang Zhang <zhangt...@loongson.cn>
---
.../arch/loongarch/irq-chip-model.rst | 32 ++
.../zh_CN/arch/loongarch/irq-chip-model.rst | 32 ++
arch/loongarch/Kconfig | 1 +
arch/loongarch/include/asm/cpu-features.h | 1 +
arch/loongarch/include/asm/cpu.h | 2 +
arch/loongarch/include/asm/hardirq.h | 3 +-
arch/loongarch/include/asm/irq.h | 25 +-
arch/loongarch/include/asm/loongarch.h | 18 +-
arch/loongarch/include/asm/smp.h | 2 +
arch/loongarch/kernel/cpu-probe.c | 3 +-
arch/loongarch/kernel/irq.c | 14 +-
arch/loongarch/kernel/paravirt.c | 5 +
arch/loongarch/kernel/smp.c | 6 +
drivers/irqchip/Makefile | 2 +-
drivers/irqchip/irq-loongarch-avec.c | 447 ++++++++++++++++++
drivers/irqchip/irq-loongarch-cpu.c | 5 +-
drivers/irqchip/irq-loongson-eiointc.c | 7 +-
drivers/irqchip/irq-loongson-pch-msi.c | 41 +-
include/linux/cpuhotplug.h | 3 +-
19 files changed, 632 insertions(+), 17 deletions(-)
V6->V7:
Fixed compatibility issue with cpuhp_setup_state_nocalls when CONFIG_SMP is turned off
Rename avecintc_online/offline_cpu as avecintc_cpu_online/offline
Use pch_msi_handle[0] as default value of get_pch_msi_handle
Rework commit-message

diff --git a/Documentation/arch/loongarch/irq-chip-model.rst b/Documentation/arch/loongarch/irq-chip-model.rst
index 7988f41923639..6dd48256e39f7 100644
index f1e9ab18206c3..472761938682c 100644
index ebdb7156560c7..e30641fa80706 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -85,6 +85,7 @@ config LOONGARCH
select GENERIC_ENTRY
select GENERIC_GETTIMEOFDAY
select GENERIC_IOREMAP if !ARCH_IOREMAP
+ select GENERIC_IRQ_MATRIX_ALLOCATOR
select GENERIC_IRQ_MULTI_HANDLER
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h
index 2eafe6a6aca81..16a716f88a5ca 100644
--- a/arch/loongarch/include/asm/cpu-features.h
+++ b/arch/loongarch/include/asm/cpu-features.h
@@ -65,5 +65,6 @@
#define cpu_has_guestid cpu_opt(LOONGARCH_CPU_GUESTID)
#define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR)
#define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW)
+#define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT)

#endif /* __ASM_CPU_FEATURES_H */
diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h
index 48b9f7168bcca..843f9c4ec9807 100644
--- a/arch/loongarch/include/asm/cpu.h
+++ b/arch/loongarch/include/asm/cpu.h
@@ -99,6 +99,7 @@ enum cpu_type_enum {
#define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */
#define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */
#define CPU_FEATURE_PTW 26 /* CPU has hardware page table walker */
+#define CPU_FEATURE_AVECINT 27 /* CPU has avec interrupt */

#define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG)
#define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM)
@@ -127,5 +128,6 @@ enum cpu_type_enum {
#define LOONGARCH_CPU_GUESTID BIT_ULL(CPU_FEATURE_GUESTID)
#define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR)
#define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW)
+#define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT)

#endif /* _ASM_CPU_H */
diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
index 1d7feb7195157..10da8d6961cb0 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -12,12 +12,13 @@
extern void ack_bad_irq(unsigned int irq);
#define ack_bad_irq ack_bad_irq

-#define NR_IPI 3
+#define NR_IPI 4

enum ipi_msg_type {
IPI_RESCHEDULE,
IPI_CALL_FUNCTION,
IPI_IRQ_WORK,
+ IPI_CLEAR_VECTOR,
};

typedef struct {
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 480418bc5071a..1b255bf8168f0 100644
index 04a78010fc725..70834a47257de 100644
index 50db503f44e3c..3383c9d24e942 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -70,10 +70,12 @@ extern int __cpu_logical_map[NR_CPUS];
#define ACTION_RESCHEDULE 1
#define ACTION_CALL_FUNCTION 2
#define ACTION_IRQ_WORK 3
+#define ACTION_CLEAR_VECTOR 4
#define SMP_BOOT_CPU BIT(ACTION_BOOT_CPU)
#define SMP_RESCHEDULE BIT(ACTION_RESCHEDULE)
#define SMP_CALL_FUNCTION BIT(ACTION_CALL_FUNCTION)
#define SMP_IRQ_WORK BIT(ACTION_IRQ_WORK)
+#define SMP_CLEAR_VECTOR BIT(ACTION_CLEAR_VECTOR)

struct secondary_data {
unsigned long stack;
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index 55320813ee081..14f0449f54520 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -106,7 +106,6 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
elf_hwcap |= HWCAP_LOONGARCH_CRC32;
}

-
config = read_cpucfg(LOONGARCH_CPUCFG2);
if (config & CPUCFG2_LAM) {
c->options |= LOONGARCH_CPU_LAM;
@@ -174,6 +173,8 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options |= LOONGARCH_CPU_FLATMODE;
if (config & IOCSRF_EIODECODE)
c->options |= LOONGARCH_CPU_EIODECODE;
+ if (config & IOCSRF_AVEC)
+ c->options |= LOONGARCH_CPU_AVECINT;
if (config & IOCSRF_VM)
c->options |= LOONGARCH_CPU_HYPERVISOR;

diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index f4991c03514f4..1311546a7b4ee 100644
index 9c9b75b76f62f..4d736a4e488dd 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -134,6 +134,11 @@ static irqreturn_t pv_ipi_interrupt(int irq, void *dev)
info->ipi_irqs[IPI_IRQ_WORK]++;
}

+ if (action & SMP_CLEAR_VECTOR) {
+ complete_irq_moving();
+ info->ipi_irqs[IPI_CLEAR_VECTOR]++;
+ }
+
return IRQ_HANDLED;
}

diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index ca405ab86aaef..4adbbef3450ac 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -72,6 +72,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNCTION] = "Function call interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
+ [IPI_CLEAR_VECTOR] = "Clear vector interrupts",
};

void show_ipi_list(struct seq_file *p, int prec)
@@ -248,6 +249,11 @@ static irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
per_cpu(irq_stat, cpu).ipi_irqs[IPI_IRQ_WORK]++;
}

+ if (action & SMP_CLEAR_VECTOR) {
+ complete_irq_moving();
+ per_cpu(irq_stat, cpu).ipi_irqs[IPI_CLEAR_VECTOR]++;
+ }
+
return IRQ_HANDLED;
}

diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 15635812b2d66..e3679ec2b9f76 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -110,7 +110,7 @@ obj-$(CONFIG_LS1X_IRQ) += irq-ls1x.o
obj-$(CONFIG_TI_SCI_INTR_IRQCHIP) += irq-ti-sci-intr.o
obj-$(CONFIG_TI_SCI_INTA_IRQCHIP) += irq-ti-sci-inta.o
obj-$(CONFIG_TI_PRUSS_INTC) += irq-pruss-intc.o
-obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o
+obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o irq-loongarch-avec.o
obj-$(CONFIG_LOONGSON_LIOINTC) += irq-loongson-liointc.o
obj-$(CONFIG_LOONGSON_EIOINTC) += irq-loongson-eiointc.o
obj-$(CONFIG_LOONGSON_HTPIC) += irq-loongson-htpic.o
diff --git a/drivers/irqchip/irq-loongarch-avec.c b/drivers/irqchip/irq-loongarch-avec.c
new file mode 100644
index 0000000000000..0033aafeb1ecf
--- /dev/null
+++ b/drivers/irqchip/irq-loongarch-avec.c
@@ -0,0 +1,447 @@
+static int avecintc_cpu_online(unsigned int cpu)
+{
+ unsigned long flags;
+
+ if (!loongarch_avec.vector_matrix)
+ return 0;
+
+ raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
+
+ irq_matrix_online(loongarch_avec.vector_matrix);
+
+ pending_list_init(cpu);
+
+ raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
+
+ return 0;
+}
+
+static int avecintc_cpu_offline(unsigned int cpu)
+ pr_warn("Unexpected IRQ occurs on CPU#%d [vector %ld]\n", smp_processor_id(), vector);
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_AVECINTC_STARTING,
+ "irqchip/loongarch/avecintc:starting",
+ avecintc_cpu_online, avecintc_cpu_offline);
+#endif
+ value = iocsr_read64(LOONGARCH_IOCSR_MISC_FUNC);
+ value |= IOCSR_MISC_FUNC_AVEC_EN;
+ iocsr_write64(value, LOONGARCH_IOCSR_MISC_FUNC);
+
index b35903a06902f..808d241c481fb 100644
--- a/drivers/irqchip/irq-loongarch-cpu.c
+++ b/drivers/irqchip/irq-loongarch-cpu.c
@@ -140,7 +140,10 @@ static int __init acpi_cascade_irqdomain_init(void)
if (r < 0)
return r;

- return 0;
+ if (cpu_has_avecint)
+ r = avecintc_acpi_init(irq_domain);
+
+ return r;
}

static int __init cpuintc_acpi_init(union acpi_subtable_headers *header,
diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c
index b1f2080be2beb..895d15b96669e 100644
--- a/drivers/irqchip/irq-loongson-eiointc.c
+++ b/drivers/irqchip/irq-loongson-eiointc.c
@@ -360,6 +360,9 @@ static int __init acpi_cascade_irqdomain_init(void)
if (r < 0)
return r;

+ if (cpu_has_avecint)
+ return 0;
+
r = acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 1);
if (r < 0)
return r;
@@ -396,8 +399,8 @@ static int __init eiointc_init(struct eiointc_priv *priv, int parent_irq,

if (nr_pics == 1) {
register_syscore_ops(&eiointc_syscore_ops);
- cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_LOONGARCH_STARTING,
- "irqchip/loongarch/intc:starting",
+ cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_EIOINTC_STARTING,
+ "irqchip/loongarch/eiointc:starting",
eiointc_router_init, NULL);
}

diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c
index dd4d699170f4e..e4c22c239628e 100644
--- a/drivers/irqchip/irq-loongson-pch-msi.c
+++ b/drivers/irqchip/irq-loongson-pch-msi.c
@@ -268,11 +268,14 @@ struct fwnode_handle *get_pch_msi_handle(int pci_segment)
{
int i;

+ if (cpu_has_avecint)
+ return pch_msi_handle[0];
+
for (i = 0; i < MAX_IO_PICS; i++) {
if (msi_group[i].pci_segment == pci_segment)
return pch_msi_handle[i];
}
- return NULL;
+ return pch_msi_handle[0];
index 89f5c34ce4df9..287605bd5d4a7 100644

Huacai Chen

unread,
4:53 AM (3 hours ago) 4:53 AM
to Thomas Gleixner, Tianyang Zhang, cor...@lwn.net, al...@kernel.org, siya...@loongson.cn, ker...@xen0n.name, jiaxu...@flygoat.com, gaol...@loongson.cn, wang...@loongson.cn, lvji...@loongson.cn, yi...@loongson.cn, mho...@suse.com, ak...@linux-foundation.org, dian...@chromium.org, mao...@loongson.cn, xry...@xry111.site, zhaot...@loongson.cn, nat...@kernel.org, yangt...@loongson.cn, zhoub...@loongson.cn, loon...@lists.linux.dev, linu...@vger.kernel.org, linux-...@vger.kernel.org, Huacai Chen
Hi, Thomas,

On Fri, Jul 26, 2024 at 11:12 PM Thomas Gleixner <tg...@linutronix.de> wrote:
>
> On Fri, Jul 26 2024 at 18:24, Tianyang Zhang wrote:
> > +static int avecintc_set_affinity(struct irq_data *data, const struct cpumask *dest,
> > + bool force)
>
> Please align arguments according to documentation. In this case don't
> use a line break. Just use the full 100 character width all over the place
>
> > +{
> > + unsigned int cpu, ret, vector;
> > + unsigned long flags;
> > + struct avecintc_data *adata;
>
> Please look for variable declarations in:
>
> https://www.kernel.org/doc/html/latest/process/maintainer-tip.html
>
> and fix it up all over the place.
OK, thanks.

>
> > +
> > + raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
>
> This does not need irqsave because the function is always called with
> interrupts disabled.
OK, thanks.

>
> > + adata = irq_data_get_irq_chip_data(data);
> > +
> > + if (adata->vec && cpu_online(adata->cpu) && cpumask_test_cpu(adata->cpu, dest)) {
> > + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> > + return 0;
> > + }
> > +
> > + if (adata->moving)
> > + return -EBUSY;
>
> This leaks loongarch_avec.lock. Please use
>
> scoped_guard(raw_spin_lock)(&loongarch_avec.lock);
OK, thanks.

>
> above so you spare all the unlocks and cannot miss one.
> guard(raw_spin_lock)
>
> interrupts are disabled.
OK, thanks.

>
> > + irq_matrix_online(loongarch_avec.vector_matrix);
> > +
> > + pending_list_init(cpu);
> > +
> > + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> > +
> > + return 0;
> > +}
> > +
> > +static int avecintc_cpu_offline(unsigned int cpu)
> > +{
> > + unsigned long flags;
> > + struct pending_list *plist = per_cpu_ptr(&pending_list, cpu);
> > +
> > + if (!loongarch_avec.vector_matrix)
> > + return 0;
> > +
> > + raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
>
> Ditto.
>
> > + if (list_empty(&plist->head))
> > + irq_matrix_offline(loongarch_avec.vector_matrix);
> > + else
> > + pr_warn("CPU#%d advanced vector is busy\n", cpu);
>
> Seriously? You leave the matrix online so allocation can be made from an
> offline CPU?
Yes, we should always call irq_matrix_offline(loongarch_avec.vector_matrix).

>
> > + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> > +
> > + return 0;
> > +}
> > +
> > +void complete_irq_moving(void)
> > +{
> > + struct pending_list *plist = this_cpu_ptr(&pending_list);
> > + struct avecintc_data *adata, *tdata;
> > + int cpu, vector, bias;
> > + uint64_t isr;
> > +
> > + raw_spin_lock(&loongarch_avec.lock);
>
> guard()
>
> > + list_for_each_entry_safe(adata, tdata, &plist->head, entry) {
> > + cpu = adata->prev_cpu;
> > + vector = adata->prev_vec;
> > + bias = vector / VECTORS_PER_REG;
> > + switch (bias) {
> > + case 0:
> > + isr = csr_read64(LOONGARCH_CSR_ISR0);
> > + case 1:
> > + isr = csr_read64(LOONGARCH_CSR_ISR1);
> > + case 2:
> > + isr = csr_read64(LOONGARCH_CSR_ISR2);
> > + case 3:
> > + isr = csr_read64(LOONGARCH_CSR_ISR3);
> > + }
> > +
> > + if (isr & (1UL << (vector % VECTORS_PER_REG))) {
> > + mp_ops.send_ipi_single(cpu, ACTION_CLEAR_VECTOR);
>
> Is it guaranteed that the device vector is handled _before_ the next
> IPI is handled? If not this is a live lock.
I have also concerned about this in an early version, at last Tianyang
confirmed it is OK.

>
> > + continue;
> > + }
> > + list_del(&adata->entry);
> > + irq_matrix_free(loongarch_avec.vector_matrix, cpu, vector, adata->managed);
> > + this_cpu_write(irq_map[vector], NULL);
> > + adata->prev_cpu = adata->cpu;
> > + adata->prev_vec = adata->vec;
> > + adata->moving = 0;
> > + }
> > +
> > + raw_spin_unlock(&loongarch_avec.lock);
> > +}
> > +#endif
> > +
> > +static void avecintc_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
> > +{
> > + struct avecintc_data *adata;
> > +
> > + adata = irq_data_get_irq_chip_data(d);
>
> Move the assignement up to the declaration.
OK, thanks.
> See bracket rules.
Do you mean even if there is only one statement in the if condition,
we still need to do like this?
if (xxx) {
yyy;
} else {
zzz;
}

>
> > + spurious_interrupt();
> > + pr_warn("Unexpected IRQ occurs on CPU#%d [vector %ld]\n", smp_processor_id(), vector);
> > + }
>
> > + }
> > +
> > + chained_irq_exit(chip, desc);
> > +}
> > +
> > +static int avecintc_domain_alloc(struct irq_domain *domain,
> > + unsigned int virq, unsigned int nr_irqs, void *arg)
> > +{
> > + unsigned int cpu, i, ret;
> > + unsigned long flags;
> > + struct irq_data *irqd;
> > + struct avecintc_data *adata;
> > +
> > + raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
>
>
> guard(raw_spinlock_irqsave)(....);
OK, thanks.

>
> > + for (i = 0; i < nr_irqs; i++) {
> > + irqd = irq_domain_get_irq_data(domain, virq + i);
> > + adata = kzalloc(sizeof(*adata), GFP_KERNEL);
> > + if (!adata) {
> > + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> > + return -ENOMEM;
> > + }
> > +
> > + ret = irq_matrix_alloc(loongarch_avec.vector_matrix, cpu_online_mask, false, &cpu);
> > + if (ret < 0) {
> > + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> > + return ret;
> > + }
> > +
> > + adata->prev_cpu = adata->cpu = cpu;
> > + adata->prev_vec = adata->vec = ret;
> > + adata->managed = irqd_affinity_is_managed(irqd);
> > + irq_domain_set_info(domain, virq + i, virq + i, &avec_irq_controller,
> > + adata, handle_edge_irq, NULL, NULL);
> > + adata->moving = 0;
>
> Initialize first before doing enything else.
OK, thanks.

>
> > + irqd_set_single_target(irqd);
> > + irqd_set_affinity_on_activate(irqd);
> > +
> > + per_cpu_ptr(irq_map, adata->cpu)[adata->vec] = irq_data_to_desc(irqd);
> > + }
> > + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> > +
> > + return 0;
> > +}
> > +
> > +static void clear_free_vector(struct irq_data *irqd)
> > +{
> > + bool managed = irqd_affinity_is_managed(irqd);
> > + struct avecintc_data *adata = irq_data_get_irq_chip_data(irqd);
> > +
> > + per_cpu(irq_map, adata->cpu)[adata->vec] = NULL;
> > + irq_matrix_free(loongarch_avec.vector_matrix, adata->cpu, adata->vec, managed);
> > + adata->cpu = 0;
>
> 0 is a valid CPU number, no?
Yes, UINT_MAX is better.

>
> > + adata->vec = 0;
> > +
> > +#ifdef CONFIG_SMP
> > + if (!adata->moving)
> > + return;
> > +
> > + per_cpu(irq_map, adata->prev_cpu)[adata->prev_vec] = NULL;
> > + irq_matrix_free(loongarch_avec.vector_matrix,
> > + adata->prev_cpu, adata->prev_vec, adata->managed);
> > + adata->moving = 0;
> > + adata->prev_vec = 0;
> > + adata->prev_cpu = 0;
> > + list_del_init(&adata->entry);
> > +#endif
> > +}
> > +
> > +static void avecintc_domain_free(struct irq_domain *domain,
> > + unsigned int virq, unsigned int nr_irqs)
> > +{
> > + unsigned int i;
> > + unsigned long flags;
> > + struct irq_data *d;
> > +
> > + raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
>
> guard()
OK, thanks.
> The code above just failed to allocate the matrix, so why are you trying
> to free it?
Yes, this is a mistake.

>
> And this happily fails to uninstall the chained handler.
>
> > +out_remove_domain:
> > + irq_domain_remove(loongarch_avec.domain);
> > +out_free_handle:
> > + irq_domain_free_fwnode(loongarch_avec.fwnode);
> > +out:
> > + return ret;
> > +}
> > +
> Please don't do that. Convert this to use per device MSI domains.
OK, thanks. But it is better to split the conversion to another patch
(so we can convert both V1 and V2).

Huacai
>
> Thanks,
>
> tglx

Reply all
Reply to author
Forward
0 new messages