By reusing allocated space, the following patch reduces the time
to run microcode.ctl on a 1024 cpu system from approximately 80
seconds down to 1 or 2 seconds.
Signed-off-by: Dimitri Sivanich <siva...@sgi.com>
---
arch/x86/kernel/microcode_intel.c | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
Index: linux/arch/x86/kernel/microcode_intel.c
===================================================================
--- linux.orig/arch/x86/kernel/microcode_intel.c
+++ linux/arch/x86/kernel/microcode_intel.c
@@ -343,10 +343,11 @@ static enum ucode_state generic_load_mic
int (*get_ucode_data)(void *, const void *, size_t))
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- u8 *ucode_ptr = data, *new_mc = NULL, *mc;
+ u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL;
int new_rev = uci->cpu_sig.rev;
unsigned int leftover = size;
enum ucode_state state = UCODE_OK;
+ unsigned int curr_mc_size = 0;
while (leftover) {
struct microcode_header_intel mc_header;
@@ -361,9 +362,15 @@ static enum ucode_state generic_load_mic
break;
}
- mc = vmalloc(mc_size);
- if (!mc)
- break;
+ /* For performance reasons, reuse mc area when possible */
+ if (!mc || mc_size > curr_mc_size) {
+ if (mc)
+ vfree(mc);
+ mc = vmalloc(mc_size);
+ if (!mc)
+ break;
+ curr_mc_size = mc_size;
+ }
if (get_ucode_data(mc, ucode_ptr, mc_size) ||
microcode_sanity_check(mc) < 0) {
@@ -376,13 +383,16 @@ static enum ucode_state generic_load_mic
vfree(new_mc);
new_rev = mc_header.rev;
new_mc = mc;
- } else
- vfree(mc);
+ mc = NULL; /* trigger new vmalloc */
+ }
ucode_ptr += mc_size;
leftover -= mc_size;
}
+ if (mc)
+ vfree(mc);
+
if (leftover) {
if (new_mc)
vfree(new_mc);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
This approach seems reasonable in the scope of the current framework.
Acked-by: Dmitry Adamushko <dmitry.a...@gmail.com>
However, I think a better approach would be to have some kind of
shared storage for loaded microcode updates. Given that for the
majority of SMP systems all the cpus are normally updated to the very
same new instance of microcode, it should be enough to do a search for
the first cpu, cache the instance of microcode and then reuse it for
others.
-- Dmitry
And/or update processors in parallel.
--
error compiling committee.c: too many arguments to function
Systems that size are probably not booted all that often. Something to consider
before putting a lot of effort into it, I think.
--
Bill Davidsen <davi...@tmr.com>
"We have more to fear from the bungling of the incompetent than from
the machinations of the wicked." - from Slashdot
x86: Improve Intel microcode loader performance
We've noticed that on large SGI UV system configurations,
running microcode.ctl can take very long periods of time. This
is due to the large number of vmalloc/vfree calls made by the
Intel generic_load_microcode() logic.
By reusing allocated space, the following patch reduces the time
to run microcode.ctl on a 1024 cpu system from approximately 80
seconds down to 1 or 2 seconds.
Signed-off-by: Dimitri Sivanich <siva...@sgi.com>
Acked-by: Dmitry Adamushko <dmitry.a...@gmail.com>
Cc: Avi Kivity <a...@redhat.com>
Cc: Bill Davidsen <davi...@tmr.com>
LKML-Reference: <20100305174...@sgi.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
arch/x86/kernel/microcode_intel.c | 22 ++++++++++++++++------
1 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 85a343e..3561702 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -343,10 +343,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
int (*get_ucode_data)(void *, const void *, size_t))
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- u8 *ucode_ptr = data, *new_mc = NULL, *mc;
+ u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL;
int new_rev = uci->cpu_sig.rev;
unsigned int leftover = size;
enum ucode_state state = UCODE_OK;
+ unsigned int curr_mc_size = 0;
while (leftover) {
struct microcode_header_intel mc_header;
@@ -361,9 +362,15 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
break;
}
- mc = vmalloc(mc_size);
- if (!mc)
- break;
+ /* For performance reasons, reuse mc area when possible */
+ if (!mc || mc_size > curr_mc_size) {
+ if (mc)
+ vfree(mc);
+ mc = vmalloc(mc_size);
+ if (!mc)
+ break;
+ curr_mc_size = mc_size;
+ }
if (get_ucode_data(mc, ucode_ptr, mc_size) ||
microcode_sanity_check(mc) < 0) {
@@ -376,13 +383,16 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,