Hi. Today I updated my Julia installation (versioninfo() below) and I noticed a big slowdown when using @threads in this simple code
using Base.Threads
function constVel{T<:AbstractFloat, N<:Integer}(n1::N, n2::N, v::T)
@assert(n1 > zero(N)); @assert(n2 > zero(N))
@assert(v > zero(T));
return ones(T, n1, n2)*v;
end
ricker(fp::Float32, t::Array{Float32,1}) = ( t0 = 1.05f0/fp;
e.^(-(pi*fp.*(t-t0)).^2).*(1.0f0 -
2.0f0.*(pi*fp)^2.*(t-t0).^2) )
function fdstar{T<:AbstractFloat}(d1::T, d2::T,
c2dt2::Array{T,2},
pp::Array{T,2}, p::Array{T,2},
pm::Array{T,2}, src::Array{T,2})
n1, n2 = size(c2dt2);
const c1 = T[1.0 -2.0 1.0]/(d1*d1)
const c2 = T[1.0 -2.0 1.0]/(d2*d2)
#for i2=2:(n2-1)
@threads for i2=2:(n2-1)
@simd for i1=2:(n1-1)
@inbounds pp[i1,i2] = (2*p[i1,i2] - pm[i1,i2] + src[i1,i2] +
c2dt2[i1,i2]*(
(c1[1]*p[i1+1,i2] +
c1[2]*p[i1+0,i2] +
c1[3]*p[i1-1,i2])
+
(c2[1]*p[i1,i2+1] +
c2[2]*p[i1,i2+0] +
c2[3]*p[i1,i2-1]) ))
end
end
end
function modeling{T<:AbstractFloat, N<:Integer}(d1::T, d2::T, dt::T,
v::Array{T,2}, w::Array{T,1},
is1::N, is2::N)
@assert(length(v) > zero(N)); @assert(d1 > zero(T));
@assert(d2 > zero(T)); @assert(is1 > zero(N)); @assert(is2 > zero(N));
@assert(length(w) > zero(N));
const nt::N = length(w)
const n1, n2 = size(v)
pp = zeros(T, n1, n2)
p = zeros(T, n1, n2)
pm = zeros(T, n1, n2)
c2dt2 = zeros(T, n1, n2)
src = zeros(T, n1, n2)
for i in eachindex(v)
c2dt2[i] = v[i]*v[i]*dt*dt
end
const sscale = (c2dt2[is1,is2]*(one(T)/(d1*d2)));
wav = w.*sscale
@printf("c2dt2@src=%f d1=%f d2=%f dt=%f is1=%d is2=%d sscale=%f nt=%d\n",
c2dt2[is1,is2],d1,d2,dt, is1, is2, sscale, nt);
@assert(is1 < n1); @assert(is2 < n2 );
for it=0:(nt-1)
src[is1,is2] = wav[it+1]
fdstar(d1, d2, c2dt2, pp, p, pm, src)
tmp = pm; pm = p; p = pp; pp = tmp;
end
end
function main{T<:AbstractFloat, N<:Integer}(n1::N, n2::N, nt::N,
d1::T, d2::T, dt::T,
v0::T, fp::T)
@assert(n1 > zero(N)); @assert(n2 > zero(N))
@assert(d1 > zero(T)); @assert(d2 > zero(T))
@assert(dt > zero(T)); @assert(v0 > zero(T))
const vel = constVel(n1, n2, v0)
const time = collect(0:(nt-1))*dt
const wav = ricker(fp, time)
const is1::N = ceil(Int, n1/2);
const is2::N = ceil(Int, n2/2);
@time modeling(d1, d2, dt, vel, wav, is1, is2)
end
ms=1.0f0/1000.0f0
main(301, 301, 8001, 5.0f0, 5.0f0, ms*0.15f0, 1500.0f0, 10.0f0)
Running this code without @threads take approximately 9 seconds. When using @threads I gave up after a little more of two minutes without ending. Besides when using the @threads option
now appears the following message: "WARNING: could not attach metadata for @simd loop".
I was using a Julia version twenty two days old (unfortunately I cant remember the exact commit) and I was getting moderate speedups.
versioninfo():
Julia Version 0.5.0-dev+2436
Commit d1a2937 (2016-01-31 20:48 UTC)
Platform Info:
System: Linux (x86_64-redhat-linux)
CPU: Intel(R) Core(TM) i7-4500U CPU @ 1.80GHz
WORD_SIZE: 64
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
LAPACK: libopenblas64_
LIBM: libopenlibm
LLVM: libLLVM-3.7.1
thanks.