threads slowdown

129 views
Skip to first unread message

alan souza

unread,
Jan 31, 2016, 10:43:27 PM1/31/16
to julia-dev
Hi.  Today I updated  my Julia installation (versioninfo() below) and I noticed a big slowdown when using @threads in this simple code

using Base.Threads

function constVel{T<:AbstractFloat, N<:Integer}(n1::N, n2::N, v::T)
    @assert(n1 > zero(N)); @assert(n2 > zero(N))
    @assert(v > zero(T));
    return ones(T, n1, n2)*v;
end

ricker(fp::Float32, t::Array{Float32,1}) = ( t0 = 1.05f0/fp;
                                             e.^(-(pi*fp.*(t-t0)).^2).*(1.0f0 -
                                             2.0f0.*(pi*fp)^2.*(t-t0).^2) )

function fdstar{T<:AbstractFloat}(d1::T, d2::T,
                                        c2dt2::Array{T,2},
                                        pp::Array{T,2}, p::Array{T,2},
                                        pm::Array{T,2}, src::Array{T,2})
    n1, n2 = size(c2dt2);

    const c1  = T[1.0 -2.0 1.0]/(d1*d1)
    const c2  = T[1.0 -2.0 1.0]/(d2*d2)
   

                        #for i2=2:(n2-1)  
    @threads for i2=2:(n2-1)
        @simd for i1=2:(n1-1)
            @inbounds pp[i1,i2] = (2*p[i1,i2] - pm[i1,i2] + src[i1,i2] +
                                   c2dt2[i1,i2]*(
                                   (c1[1]*p[i1+1,i2] +
                                    c1[2]*p[i1+0,i2] +
                                    c1[3]*p[i1-1,i2])
                                   +
                                   (c2[1]*p[i1,i2+1] +
                                    c2[2]*p[i1,i2+0] +
                                    c2[3]*p[i1,i2-1]) ))
        end
    end
end

function modeling{T<:AbstractFloat, N<:Integer}(d1::T, d2::T, dt::T,
                                                v::Array{T,2}, w::Array{T,1},
                                                is1::N, is2::N)
    @assert(length(v) > zero(N)); @assert(d1 > zero(T));
    @assert(d2 > zero(T)); @assert(is1 > zero(N)); @assert(is2 > zero(N));
    @assert(length(w) > zero(N));

    const nt::N = length(w)
    const n1, n2      = size(v)
   
    pp    = zeros(T, n1, n2)
    p     = zeros(T, n1, n2)
    pm    = zeros(T, n1, n2)
    c2dt2 = zeros(T, n1, n2)
    src   = zeros(T, n1, n2)

    for i in eachindex(v)
        c2dt2[i] = v[i]*v[i]*dt*dt
    end
   
    const sscale = (c2dt2[is1,is2]*(one(T)/(d1*d2)));
    wav   = w.*sscale
   
    @printf("c2dt2@src=%f d1=%f d2=%f dt=%f is1=%d is2=%d sscale=%f nt=%d\n",
            c2dt2[is1,is2],d1,d2,dt, is1, is2, sscale, nt);
   
    @assert(is1 < n1);            @assert(is2 < n2 );
   
    for it=0:(nt-1)
        src[is1,is2] = wav[it+1]
        fdstar(d1, d2, c2dt2, pp, p, pm, src)
        tmp = pm; pm = p; p = pp; pp = tmp;
    end
end

function main{T<:AbstractFloat, N<:Integer}(n1::N, n2::N, nt::N,
                                            d1::T, d2::T, dt::T,
                                            v0::T, fp::T)
    @assert(n1 > zero(N)); @assert(n2 > zero(N))
    @assert(d1 > zero(T)); @assert(d2 > zero(T))
    @assert(dt > zero(T)); @assert(v0 > zero(T))
   
    const vel  = constVel(n1, n2, v0)
    const time = collect(0:(nt-1))*dt
    const wav  = ricker(fp, time)
    const is1::N =  ceil(Int, n1/2);
    const is2::N =  ceil(Int, n2/2);
   
    @time modeling(d1, d2, dt, vel, wav, is1, is2)
end

ms=1.0f0/1000.0f0

main(301, 301, 8001, 5.0f0, 5.0f0, ms*0.15f0, 1500.0f0, 10.0f0)

Running this code without @threads  take approximately 9 seconds. When using @threads I gave up after a little more of two minutes without ending. Besides  when using the @threads option
now appears the following message: "WARNING: could not attach metadata for @simd loop".

I was using a Julia version twenty two days old (unfortunately I cant remember the exact commit) and I was getting moderate speedups.

versioninfo():

Julia Version 0.5.0-dev+2436
Commit d1a2937 (2016-01-31 20:48 UTC)
Platform Info:
  System: Linux (x86_64-redhat-linux)
  CPU: Intel(R) Core(TM) i7-4500U CPU @ 1.80GHz
  WORD_SIZE: 64
  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.7.1

thanks.
Reply all
Reply to author
Forward
0 new messages