function test()
a = float32([1:38192])
b = Array(Complex64, length(a))
tic()
for i = 1:100
b = fft(a)
end
toc();
end
This takes about 0.146 seconds (Matlab takes about 0.06 seconds)
Now, I have implemented three helper functions:
function get_fft_plan(ri)
ro = Array(Complex64, length(ri)>>1 + 1)
r2c = Base.FFTW.Plan(ri, ro, 1, Base.FFTW.PATIENT, Base.FFTW.NO_TIMELIMIT)
return ro, r2c
end
function execute_fft_plan(ri, ro, r2c, symmetry)
Base.FFTW.execute(r2c.plan, ri, ro)
if ~(symmetry)
return ro
else symmetry
roo = make_fft_symmetry(ro)
return roo
end
end
function make_fft_symmetry(ro)
len = 2*(length(ro) - 1)
roo = Array(Complex64, len)
roo[1] = ro[1]
for i = 2:len/2+1
roo[i] = conj(ro[i])
roo[end-i+2] = ro[i]
end
return roo
end
Now, my updated test function looks like
function test()
a = float32([1:38192])
b = Array(Complex64, length(a))
ro, r2c = get_fft_plan(a)
tic()
for i = 1:100
b = execute_fft_plan(a, ro, r2c, true)
end
toc();
endfunction get_fft_plan(ri; nthread=1)
if nthread > 1
Base.FFTW.set_num_threads(nthread)
end
ro = Array(Complex64, length(ri)>>1 + 1)
r2c = Base.FFTW.Plan(ri, ro, 1, Base.FFTW.PATIENT, Base.FFTW.NO_TIMELIMIT)
return ro, r2c
end
Now, the execution time is about 0.05 seconds with nthread=8 (Matlab takes about 0.06 seconds).
function get_fft_plan(ri; nthread=1)
ro = Array(Complex64, length(ri)>>1 + 1)
roo = Array(Complex64, length(ri))
if nthread > 1
Base.FFTW.set_num_threads(nthread)
end
r2c = Base.FFTW.Plan(ri, ro, 1, Base.FFTW.PATIENT, Base.FFTW.NO_TIMELIMIT)
return ro, roo, r2c
end
function execute_fft_plan!(ri, ro, roo, r2c; full=true)
Base.FFTW.execute(r2c.plan, ri, ro)
if full
len = 2*(length(ro) - 1)
roo[1] = ro[1]
for i = 2:len/2+1
roo[i] = conj(ro[i])
roo[end-i+2] = ro[i]
end
end
endfunction test()
FF = fast_fft
a = float32([1:38192])
ro, roo, r2c = FF.get_fft_plan(a, nthread=8)
tic()
for i = 1:100
FF.execute_fft_plan!(a, ro, roo, r2c)
end
toc();
endThe variable "roo" hold the full length fft result.Now the execution time is 0.063 seconds (nthread=1) and 0.032 seconds (nthread=8). This is a big improvement over the standard fft (0.146 seconds, please refer the first posting to this thread) and matlab (0.060 seconds).
What is the point of making the FFT output full length? Anything you might want to do you can do with the half-length array.
If the full-length results are only for validation, it seems a bit pointless to optimize their performance.
I'm guessing Matlab ships with a huge pile of pre-generated wisdom?
Perhaps we should do a blog post on FFTs in julia that captures the discussion in this thread. Some of this stuff can also be included in the performance section of the manual.-viral
Hmm. I wonder who should write that post...
While we're discussing this, I still think it's a bit strange that fft_plan returns an anonymous function, it just somehow doesn't seem very julian. In fact, I think there's room to improve the whole fft interface, perhaps in a way that more closely matches the underlying FFTW interface, rather than mimicking the matlab interface.
The thinking was that the only useful thing to do with a plan, under normal circumstances, is to execute it. But maybe that was the wrong decision.
Is there some technical reason you can't save a single plan? Would it make sense to add the ability to save and load individual plans to FFTW?
How big are wisdom files, in general?
float32([1:38192]) by float64([1:38192]) but the script does not work,Base.FFTW.Plan(ri, ro, 1, Base.FFTW.PATIENT, Base.FFTW.NO_TIMELIMIT),
I was wondering if this just work for float32 arrays? In case to be positive
could anyone recommend me another way to optimize the fft routine?
Thank you so much and I like so much Julia.
I am trying to optimize FFT performance following the suggestion in the discussion .I have the following testing function:
function test()
a = float32([1:38192])
b = Array(Complex64, length(a))
tic()
for i = 1:100
b = fft(a)
end
toc();
end
This takes about 0.146 seconds (Matlab takes about 0.06 seconds)Now, I have implemented three helper functions:
function get_fft_plan(ri)
ro = Array(Complex64, length(ri)>>1 + 1)
r2c = Base.FFTW.Plan(ri, ro, 1, Base.FFTW.PATIENT, Base.FFTW.NO_TIMELIMIT)
return ro, r2c
end
function execute_fft_plan(ri, ro, r2c, symmetry)
Base.FFTW.execute(r2c.plan, ri, ro)
if ~(symmetry)
return ro
else symmetry
roo = make_fft_symmetry(ro)
return roo
end
end
function make_fft_symmetry(ro)
len = 2*(length(ro) - 1)
roo = Array(Complex64, len)
roo[1] = ro[1]
for i = 2:len/2+1
roo[i] = conj(ro[i])
roo[end-i+2] = ro[i]
end
return roo
end
Now, my updated test function looks like
function test()
a = float32([1:38192])
b = Array(Complex64, length(a))
ro, r2c = get_fft_plan(a)
tic()
for i = 1:100
b = execute_fft_plan(a, ro, r2c, true)
end
toc();
end