diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c
index f61fb8e..afbb951 100644
--- a/arch/x86_64/kernel/tce.c
+++ b/arch/x86_64/kernel/tce.c
@@ -42,7 +42,7 @@ static inline void flush_tce(void* tceaddr)
if (cpu_has_clflush)
asm volatile("clflush (%0)" :: "r" (tceaddr));
else
- asm volatile("wbinvd":::"memory");
+ wbinvd();
}
void tce_build(struct iommu_table *tbl, unsigned long index,
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 9148f4a..0a75790 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -77,7 +77,7 @@ static void flush_kernel_map(void *arg)
much cheaper than WBINVD. Disable clflush for now because
the high level code is not ready yet */
if (1 || !cpu_has_clflush)
- asm volatile("wbinvd" ::: "memory");
+ wbinvd();
else list_for_each_entry(pg, l, lru) {
void *adr = page_address(pg);
if (cpu_has_clflush)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
I guess it can be just removed there. I don' think there are any calgary
machines without clflush
> }
>
> void tce_build(struct iommu_table *tbl, unsigned long index,
> diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
> index 9148f4a..0a75790 100644
> --- a/arch/x86_64/mm/pageattr.c
> +++ b/arch/x86_64/mm/pageattr.c
> @@ -77,7 +77,7 @@ static void flush_kernel_map(void *arg)
> much cheaper than WBINVD. Disable clflush for now because
> the high level code is not ready yet */
> if (1 || !cpu_has_clflush)
> - asm volatile("wbinvd" ::: "memory");
> + wbinvd();
> else list_for_each_entry(pg, l, lru) {
> void *adr = page_address(pg);
> if (cpu_has_clflush)
>
This code has changed recently in the queue. Please resubmit later.
-Andi
That seems like an unsafe thing to do (unless we err out somewhere);
lest there is, say, a microcode update disabling clflush due to an erratum.
It also seems to me we should have a macro for clflush(), especially
since it is "clflush (%0)" :: "r" in a number of places, which really
isn't correct; it should be "clflush %0" : "+m", both to allow
addressing modes to be used and to give gcc a modicum of a hint that
something is going on with a specific memory location.
-hpa
This also removes one instance of hard-coded wbinvd, based on a patch
by Bauder de Oliveira Costa.
Cc: Andi Kleen <an...@firstfloor.org>
Cc: Glauber de Oliveira Costa <gco...@redhat.com>
Signed-off-by: H. Peter Anvin <h...@zytor.com>
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index 37992ff..cb4ded4 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -70,10 +70,10 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
static void cache_flush_page(struct page *p)
{
- unsigned long adr = (unsigned long)page_address(p);
+ void *adr = page_address(p);
int i;
for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
- asm volatile("clflush (%0)" :: "r" (adr + i));
+ clflush(adr+i);
}
static void flush_kernel_map(void *arg)
diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c
index f61fb8e..c01b535 100644
--- a/arch/x86_64/kernel/tce.c
+++ b/arch/x86_64/kernel/tce.c
@@ -40,9 +40,9 @@ static inline void flush_tce(void* tceaddr)
{
/* a single tce can't cross a cache line */
if (cpu_has_clflush)
- asm volatile("clflush (%0)" :: "r" (tceaddr));
+ clflush(tceaddr);
else
- asm volatile("wbinvd":::"memory");
+ wbinvd();
}
void tce_build(struct iommu_table *tbl, unsigned long index,
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 9148f4a..863e929 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -65,7 +65,7 @@ static void cache_flush_page(void *adr)
{
int i;
for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
- asm volatile("clflush (%0)" :: "r" (adr + i));
+ clflush(adr+i);
}
static void flush_kernel_map(void *arg)
diff --git a/drivers/char/agp/efficeon-agp.c b/drivers/char/agp/efficeon-agp.c
index df8da72..41f46df 100644
--- a/drivers/char/agp/efficeon-agp.c
+++ b/drivers/char/agp/efficeon-agp.c
@@ -221,7 +221,7 @@ static int efficeon_create_gatt_table(struct agp_bridge_data *bridge)
SetPageReserved(virt_to_page((char *)page));
for (offset = 0; offset < PAGE_SIZE; offset += clflush_chunk)
- asm volatile("clflush %0" : : "m" (*(char *)(page+offset)));
+ clflush((char *)page+offset);
efficeon_private.l1_table[index] = page;
@@ -268,15 +268,16 @@ static int efficeon_insert_memory(struct agp_memory * mem, off_t pg_start, int t
*page = insert;
/* clflush is slow, so don't clflush until we have to */
- if ( last_page &&
- ((unsigned long)page^(unsigned long)last_page) & clflush_mask )
- asm volatile("clflush %0" : : "m" (*last_page));
+ if (last_page &&
+ (((unsigned long)page^(unsigned long)last_page) &
+ clflush_mask))
+ clflush(last_page);
last_page = page;
}
if ( last_page )
- asm volatile("clflush %0" : : "m" (*last_page));
+ clflush(last_page);
agp_bridge->driver->tlb_flush(mem);
return 0;
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index 609756c..d05476d 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -160,6 +160,10 @@ static inline void native_wbinvd(void)
asm volatile("wbinvd": : :"memory");
}
+static inline void clflush(volatile void *__p)
+{
+ asm volatile("clflush %0" : "+m" (*(char __force *)__p));
+}
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h
index e4f246d..8634067 100644
--- a/include/asm-x86_64/system.h
+++ b/include/asm-x86_64/system.h
@@ -113,6 +113,11 @@ static inline void write_cr4(unsigned long val)
#endif /* __KERNEL__ */
+static inline void clflush(volatile void *__p)
+{
+ asm volatile("clflush %0" : "+m" (*(char __force *)__p));
+}
+
#define nop() __asm__ __volatile__ ("nop")
#ifdef CONFIG_SMP
I don't see much sense in it. CLFLUSH is not priviledged, paravirt
doesn't need to change and this adds just an unnecessary layer of abstraction.
-Andi
The main reason is that everyone seems to invoke it either incorrectly
or suboptimally.
-hpa
>
> Cc: Andi Kleen <an...@firstfloor.org>
> Cc: Glauber de Oliveira Costa <gco...@redhat.com>
> Signed-off-by: H. Peter Anvin <h...@zytor.com>
That would be Mr. Typo!
>> Cc: Andi Kleen <an...@firstfloor.org>
>> Cc: Glauber de Oliveira Costa <gco...@redhat.com>
I got it right here at least :-/
-hpa
> Andi Kleen wrote:
> > On Friday 20 July 2007 00:55:40 Glauber de Oliveira Costa wrote:
> >> This patch uses the already-existant wbinvd() macro to replace
> >> raw assembly to perform this very same task in some .c files
> >>
> >> Signed-off-by: Glauber de Oliveira Costa <gco...@redhat.com>
> >>
> >> diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c
> >> index f61fb8e..afbb951 100644
> >> --- a/arch/x86_64/kernel/tce.c
> >> +++ b/arch/x86_64/kernel/tce.c
> >> @@ -42,7 +42,7 @@ static inline void flush_tce(void* tceaddr)
> >> if (cpu_has_clflush)
> >> asm volatile("clflush (%0)" :: "r" (tceaddr));
> >> else
> >> - asm volatile("wbinvd":::"memory");
> >> + wbinvd();
> >
> > I guess it can be just removed there. I don' think there are any
> > calgary machines without clflush
> >
> That seems like an unsafe thing to do (unless we err out somewhere);
> lest there is, say, a microcode update disabling clflush due to an
> erratum.
I agree.
Cheers,
Muli
Mostly looks good, but see below.
> diff --git a/drivers/char/agp/efficeon-agp.c b/drivers/char/agp/efficeon-agp.c
> index df8da72..41f46df 100644
> --- a/drivers/char/agp/efficeon-agp.c
> +++ b/drivers/char/agp/efficeon-agp.c
> @@ -221,7 +221,7 @@ static int efficeon_create_gatt_table(struct agp_bridge_data *bridge)
> SetPageReserved(virt_to_page((char *)page));
>
> for (offset = 0; offset < PAGE_SIZE; offset += clflush_chunk)
> - asm volatile("clflush %0" : : "m" (*(char *)(page+offset)));
> + clflush((char *)page+offset);
The original code flushes (*(page+offset)) whereas the new code
flushes (pagee+offset). Assuming this is a bug-fix, it should go as a
separate patch.
> @@ -268,15 +268,16 @@ static int efficeon_insert_memory(struct agp_memory * mem, off_t pg_start, int t
> *page = insert;
>
> /* clflush is slow, so don't clflush until we have to */
> - if ( last_page &&
> - ((unsigned long)page^(unsigned long)last_page) & clflush_mask )
> - asm volatile("clflush %0" : : "m" (*last_page));
> + if (last_page &&
> + (((unsigned long)page^(unsigned long)last_page) &
> + clflush_mask))
> + clflush(last_page);
Same thing.
> last_page = page;
> }
>
> if ( last_page )
> - asm volatile("clflush %0" : : "m" (*last_page));
> + clflush(last_page);
Here too.
Cheers,
Muli
No, it doesn't. There is a * in the inline, as there should be.
-hpa
I guess I don't follow. Compare:
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index 37992ff..cb4ded4 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -70,10 +70,10 @@ static struct page *split_large_page(unsigned long
address, pgprot_t prot,
static void cache_flush_page(struct page *p)
{
- unsigned long adr = (unsigned long)page_address(p);
+ void *adr = page_address(p);
int i;
for (i = 0; i < PAGE_SIZE; i +=
boot_cpu_data.x86_clflush_size)
- asm volatile("clflush (%0)" :: "r" (adr + i));
+ clflush(adr+i);
}
Original code: clflush (adr + i).
New code: clflush (adr + i)
diff --git a/drivers/char/agp/efficeon-agp.c b/drivers/char/agp/efficeon-agp.c
index df8da72..41f46df 100644
--- a/drivers/char/agp/efficeon-agp.c
+++ b/drivers/char/agp/efficeon-agp.c
@@ -221,7 +221,7 @@ static int efficeon_create_gatt_table(struct agp_bridge_data *bridge)
SetPageReserved(virt_to_page((char *)page));
for (offset = 0; offset < PAGE_SIZE; offset += clflush_chunk)
- asm volatile("clflush %0" : : "m" (*(char *)(page+offset)));
+ clflush((char *)page+offset);
Original code: clflush *(page + offset)
^
New code: clflush (page + offset)
So it looks like we have a purely syntactic patch that does something
different than the original code in one of the above places. What am I
missing?
Cheers,
Muli
+static inline void clflush(volatile void *__p)
+{
+ asm volatile("clflush %0" : "+m" (*(char __force *)__p));
^
+}
Ok, let's try again:
You're changing this (pageattr.c)
asm volatile("clflush (%0)" :: "r" (adr + i));
into this:
asm volatile("clflush %0" : "+m" (*(char __force*)(adr + i)));
The original one calls clflush with (adr + i), the new one with (*(adr
+ i)). Are these calls equivalent? if not, and I don't think they are,
you are changing the semantics of the code (presumably, because it
fixes a bug), and *that should be a separate patch*.
Cheers,
Muli
Yes, they are. The parentheses which are part of the old assembly
string has the same effect as the asterisk operator in C.
The difference between the two is that the latter form allows the C
compiler to select the addressing mode, which allows the full range of
addressing modes, whereas the former forces it to use a single register
indirect.
-hpa
Just to be absolutely obvious about it:
: tazenda 15 ; cat demo.c
#define __force
static inline void clflush1(volatile void *__p)
{
asm volatile("clflush %0" : "+m" (*(char __force *)__p));
}
static inline void clflush2(volatile void *__p)
{
asm volatile("clflush (%0)" :: "r" (__p));
}
void demo(void *q)
{
clflush1(q);
clflush2(q);
}
: tazenda 16 ; gcc -m32 -O3 -S demo.c
: tazenda 17 ; cat demo.s
.file "demo.c"
.text
.p2align 4,,15
globl demo
.type demo, @function
demo:
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %eax
#APP
clflush (%eax)
clflush (%eax)
#NO_APP
popl %ebp
ret
.size demo, .-demo
.ident "GCC: (GNU) 4.1.2 20070626 (Red Hat 4.1.2-13)"
.section .note.GNU-stack,"",@progbits
Ah, I see. Thanks for the explanation!
Cheers,
Muli
Where is it incorrect?
-Andi
Using "r" or "m" (as opposed as "+m") gives gcc the wrong dependency
information, and it could, at least in theory, cause memory references
to be moved around it -- especially when using "r".
-hpa
That doesn't sound correct. Taking the address should be like an escaping
pointer and equivalent to read/write and effectively disable optimizations
on this memory.
-Andi
It might be in the existing gcc for all I know, but there is no explicit
guarantee to that effect. Remember that inline assembly is really
nothing but the guts of gcc exposed to the programmer, and that the gcc
people are very blunt about not wanting to muck with the former to suit
the latter. This means that there are a lot of things that may
accidentally work in one version of gcc which breaks in another.
There are two documented ways to tell gcc that you're mucking with
memory, and those are "m" constraints ("m" read, "=m" write, "+m"
read/write) and "memory" clobbers; the latter which affects all memory.
-hpa