bool extendframes(THDoubleTensor* input,int fext, THDoubleTensor* output) {
auto result = false;
auto inputsize1d=input->size[0]; auto frameextdim = 2*fext+ 1; auto tlen = inputsize1d * frameextdim;
double* inputdata = THDoubleTensor_data(input); double* storage = new double[tlen]; if (storage){ auto k=0u; for (auto i = 0; i < inputsize1d; ++i) { for(auto j = -fext; j <= fext; j++){ // If its smaller than 0, take the 0th element auto tmpidx= i+j>0?(i+j):0; // Check if the extended index is in range, if it is use the extended index // otherwise use the last sample tmpidx= (tmpidx>inputsize1d-1)?inputsize1d-1:tmpidx; storage[k++] = inputdata[tmpidx]; } } }
THDoubleStorage* outputstorage = THDoubleStorage_newWithData(storage,tlen); if (outputstorage){
long sizedata[2] = { inputsize1d,frameextdim }; long stridedata[2] = { frameextdim, 1};
THLongStorage* size = THLongStorage_newWithData(sizedata, 2); THLongStorage* stride = THLongStorage_newWithData(stridedata, 2);
THDoubleTensor_setStorage(output, outputstorage, 0LL, size, stride); result =true; } return result;}
extern "C" {
bool extendframes_d(THDoubleTensor* input,int fext,THDoubleTensor* output){ return extendframes(input,fext,output);}
}
require 'torch'require 'xlua'local ffi = require 'ffi'package.cpath = package.cpath .. ";./extendframes/?.so"
ffi.cdef[[ bool extendframes_d(THDoubleTensor* input,int fext,THDoubleTensor* output);]]
local cflua = ffi.load(package.searchpath('libextendframes', package.cpath))
function extendframes(input,fext) if not ((input:type() ~= 'torch.FloatTensor') or (input:type() ~= 'torch.DoubleTensor')) then xlua.error("Error, currently only torch.FloatTensors or torch.DoubleTensors are supported!" ) end local out = nil if input:type() == 'torch.DoubleTensor' then out = torch.DoubleTensor() cflua.extendframes_d(input:contiguous():cdata(),fext,out:cdata()) end return outendwhile (true)do local tic = torch.tic() local extended = extendframes(torch.Tensor(1000000):fill(5),5) collectgarbage() print(collectgarbage('count'),torch.toc(tic))endlocal function foo() for i=1,1000 do local a = extendframes(torch.randn(1000),5) endend
local function foo2() foo() collectgarbage(); collectgarbage() os.execute('ps ax -o rss,user,command | grep luajit | sort -nr')end
while (true)do -- local tic = torch.tic() foo2() -- local extended = extf(torch.randn(100000),5) -- collectgarbage() -- print(extended) -- print(collectgarbage('count'),torch.toc(tic))end9983218584828015237191645967654852063680072424481130489815298486810715801158248124479213313401417884void* outputstorage = THAlloc(sizeof(storage));
memcpy(outputstorage,storage,sizeof(storage)); if (outputstorage){ long sizedata[2] = { inputsize1d,frameextdim }; long stridedata[2] = { frameextdim, 1};
THLongStorage* size = THLongStorage_newWithData(sizedata, 2); THLongStorage* stride = THLongStorage_newWithData(stridedata, 2);
THFloatTensor_setStorage(output, (THFloatStorage*)outputstorage, 0LL, size, stride); result = true; }double* storage = new double[tlen];double* storage = (double*) malloc(tlen*sizeof(double));--
You received this message because you are subscribed to the Google Groups "torch7" group.
To unsubscribe from this group and stop receiving emails from it, send an email to torch7+un...@googlegroups.com.
To post to this group, send email to tor...@googlegroups.com.
Visit this group at https://groups.google.com/group/torch7.
For more options, visit https://groups.google.com/d/optout.
Hi Heini,I'm late to the party, but you dont free the THStorage, that's why the memory is not getting deallocated.THDoubleStorage* outputstorage = THDoubleStorage_newWithData(storage,tlen);// This makes refcount of outputstorage to be 1THDoubleTensor_setStorage(output, outputstorage, 0LL, size, stride);// This makes refcount of outputstorage to be 2// Next, you need to doTHDoubleStorage_free(outputstorage)// so that it's refcount is correctly back to 1 (so that when the tensor is deallocated, the storage is also deallocated)Also, dont allocate the data with C++ new, but use THAlloc or malloc, because new/delete are not always compatible with malloc/free.