function vecsumNaive(u,a,b,c,d)julia> @time vecsumNaive(u,a,b,c,d); 0.433867 seconds (517.62 k allocations: 68.861 MB, 9.93% gc time)
julia> @time vecsumNaive(u,a,b,c,d); 0.031010 seconds (60.01 k allocations: 48.684 MB, 18.60% gc time)
julia> @time vecsumNaive(u,a,b,c,d); 0.031556 seconds (60.01 k allocations: 48.684 MB, 12.79% gc time)
julia>
julia> @time vecsumUnrolled(u,a,b,c,d); 0.025642 seconds (10.56 k allocations: 510.544 KB)
julia> @time vecsumUnrolled(u,a,b,c,d); 0.003166 seconds (6 allocations: 8.266 KB)
julia> @time vecsumUnrolled(u,a,b,c,d); 0.003533 seconds (6 allocations: 8.266 KB)
# The Kuramoto-Shivashinksy eqn is (subscripts indicate differentiation)## u_t = -u*u_x - u_xx - u_xxxx on domain [0,Lx] with periodic BCs # or # u_t = L(u) + N(u) ## where L(u) = -u_xx - u_xxxx and N(u) = -u u_x
# The numerical integration scheme: Discretize u in space with a Fourier expansion ## u(x,t) = sum_k uf[k] exp(2pi i k x/Lx)## Discretize u in time with Crank-Nicolson Adams-Bashforth time stepping formula# (I - dt/2 L) u^{n+1} = (I + dt/2 L) u^n + 3dt/2 N^n - dt/2 N^{n-1}## u^{n+1} = (I - dt/2 L)^{-1} [(I + dt/2 L) u^n + 3dt/2 N^n - dt/2 N^{n-1}]## let A = (I + dt/2 L)# let B = (I - dt/2 L)^{-1}, then# # u^{n+1} = B ( A u^n + 3dt/2 N^n - dt/2 N^{n-1})
function ksintegrateNaive(u, Lx, dt, Nt);
# integrate Kuramoto-Shivashinsky eqn for inputs# u = initial condition, array of gridpoint values of u(x) on uniform grid# Lx = periodic domain length# dt = time step# Nt = number of time steps
Nx = length(u) # number of gridpoints kx = vcat(0:Nx/2-1, 0, -Nx/2+1:-1) # integer wavenumbers: exp(2*pi*kx*x/L) alpha = 2*pi*kx/Lx # real wavenumbers: exp(alpha*x) D = 1im*alpha; # spectral D = d/dx operator L = alpha.^2 - alpha.^4 # spectral L = -D^2 - D^4 operator G = -0.5*D # spectral -1/2 D operator, to eval -u u_x = 1/2 d/dx u^2
# convenience variables dt2 = dt/2; dt32 = 3*dt/2; A = ones(Nx) + dt2*L; B = (ones(Nx) - dt2*L).^(-1);
# evaluate nonlinear term Nnf = G.*fft(u.^2); # Nnf == -1/2 d/dx (u^2) = -u u_x, spectral Nn1f = Nnf; # use Nnf1 = Nnf at first time step
# compute Fourier coeffs of u uf = fft(u);
# timestepping loop for n = 0:Nt
# compute new nonlinear term Nn1f = copy(Nnf); # shift nonlinear term in time: N^{n-1} <- N^n Nnf = G.*fft(real(ifft(uf)).^2); # compute Nn = -u u_x
############################################################## # use lots of temporary arrays to evaluate timestepping formula uf = B .* (A .* uf + dt32*Nnf - dt2*Nn1f);
end u = real(ifft(u))end
function ksintegrateUnrolled(u, Lx, dt, Nt);# integrate Kuramoto-Shivashinsky eqn for inputs# u = initial condition, array of gridpoint values of u(x) on uniform grid# Lx = periodic domain length# dt = time step# Nt = number of time steps
Nx = length(u) # number of gridpoints kx = vcat(0:Nx/2-1, 0, -Nx/2+1:-1) # integer wavenumbers: exp(2*pi*kx*x/L) alpha = 2*pi*kx/Lx # real wavenumbers: exp(alpha*x) D = 1im*alpha; # spectral D = d/dx operator L = alpha.^2 - alpha.^4 # spectral L = -D^2 - D^4 operator G = -0.5*D # spectral -1/2 D operator, to eval -u u_x = 1/2 d/dx u^2
# convenience variables dt2 = dt/2; dt32 = 3*dt/2; A = ones(Nx) + dt2*L; B = (ones(Nx) - dt2*L).^(-1);
# compute uf == Fourier coeffs of u and Nnf == Fourier coeffs of -u u_x uf = fft(u); Nnf = G.*fft(u.^2); # Nnf == -1/2 d/dx (u^2) = -u u_x, spectral Nn1f = Nnf; # use Nnf1 = Nnf at first time step
# timestepping loop for n = 0:Nt
Nn1f = copy(Nnf); # shift nonlinear term in time: N^{n-1} <- N^n Nnf = G.*fft(real(ifft(uf)).^2); # compute Nn = -u u_x
############################################################## # unroll array operations to compute timestepping formula for n = 1:length(uf) @fastmath @inbounds uf[n] = B[n]* (A[n] * uf[n] + dt32*Nnf[n] - dt2*Nn1f[n]); end
end u = real(ifft(u))end
Nx = 512;Lx = Nx/16*pi # spatial domain [0, L] periodicdt = 1/16; # discrete time step T = 200; # integrate from t=0 to t=Tnplot = round(Int,1/dt); # save every nploth time stepNt = round(Int, T/dt); # total number of timestepsx = Lx*(0:Nx-1)/Nx;u = cos(x/16).*(1+2*sin(x/16)).*(1+0.01*sin(x/32)); # an initial condition
ulia> include("ksintegrate.jl");
julia> @time ksintegrateNaive(u,Lx,dt,Nt); 1.776498 seconds (3.86 M allocations: 466.410 MB, 2.35% gc time)
julia> @time ksintegrateNaive(u,Lx,dt,Nt); 0.287348 seconds (653.28 k allocations: 328.565 MB, 8.34% gc time)
julia> @time ksintegrateNaive(u,Lx,dt,Nt); 0.287859 seconds (653.28 k allocations: 328.565 MB, 8.37% gc time)
julia>
julia> @time ksintegrateUnrolled(u,Lx,dt,Nt); 0.823053 seconds (23.44 M allocations: 774.411 MB, 7.56% gc time)
julia> @time ksintegrateUnrolled(u,Lx,dt,Nt); 0.836033 seconds (23.41 M allocations: 773.040 MB, 7.52% gc time)
julia> @time ksintegrateUnrolled(u,Lx,dt,Nt); 0.800421 seconds (23.41 M allocations: 773.040 MB, 7.91% gc time)
julia> Nx=88
julia> typeof(Nx)Int64
julia> kx = vcat(0:Nx/2-1, 0, -Nx/2+1:-1)8-element Array{Float64,1}: 0.0 1.0 2.0 3.0 0.0 -3.0 -2.0 -1.0
Nn1f = copy(Nnf);
Though unrelated to unrolling, here copy seems to be redundant since Nnf is assigned to another arrray after that.
u = real(ifft(u))u = real(ifft(uf))