Cuda error Float64 GeForce GTX 850M 4GB (CNMeM is disabled)

42 views
Skip to first unread message

Amila Deepal

unread,
Oct 3, 2015, 4:08:11 AM10/3/15
to pylearn-dev
how to fix this error

MemoryError: Error allocating 19360000 bytes of device memory (out of memory).

Apply node that caused the error: GpuElemwise{maximum,no_inplace}(GpuElemwise{maximum,no_inplace}.0, GpuSubtensor{::, ::, int64:int64:int64, int64:int64:int64}.0)

Toposort index: 293

Inputs types: [CudaNdarrayType(float32, 4D), CudaNdarrayType(float32, 4D)]

Inputs shapes: [(50, 200, 22, 22), (50, 200, 22, 22)]

Inputs strides: [(96800, 484, 22, 1), (561800, 2809, 106, 2)]

Inputs values: ['not shown', 'not shown']

Outputs clients: [[GpuElemwise{Composite{Cast{float32}(EQ(i0, i1))},no_inplace}(GpuElemwise{maximum,no_inplace}.0, GpuSubtensor{::, ::, int64:int64:int64, int64:int64:int64}.0), GpuElemwise{Composite{Cast{float32}(EQ(i0, i1))},no_inplace}(GpuElemwise{maximum,no_inplace}.0, GpuElemwise{maximum,no_inplace}.0), GpuElemwise{maximum,no_inplace}(GpuElemwise{maximum,no_inplace}.0, GpuSubtensor{::, ::, int64:int64:int64, int64:int64:int64}.0), GpuElemwise{Composite{Cast{float32}(EQ(i0, i1))},no_inplace}(GpuElemwise{maximum,no_inplace}.0, GpuElemwise{maximum,no_inplace}.0)]]

 

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.

HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.




this is my notebook


import theano

print theano.config.device

 

output

gpu0

Using gpu device 0: GeForce GTX 850M (CNMeM is disabled)

train = open('conv.yaml', 'r').read()

train_params = {'batch_size': 50,

                    'output_channels_h2': 200,

                    'output_channels_h3': 150,

                    'max_epochs': 500,

                    'save_path': '.'}

train = train % (train_params)

print train

 

output

!obj:pylearn2.train.Train {

    dataset: &train !obj:xor.XOR {

        which_set: 'train'

    },

     model: !obj:pylearn2.models.mlp.MLP {

        batch_size: 50,

        input_space: !obj:pylearn2.space.Conv2DSpace {

            shape: [64, 64],

            num_channels: 1

        },

        layers: [ !obj:pylearn2.models.mlp.ConvRectifiedLinear {

                     layer_name: 'h2',

                     output_channels: 200,

                     irange: .05,

                     kernel_shape: [12, 12],

                     pool_shape: [11, 11],

                     pool_stride: [2, 2],

                     max_kernel_norm: 1.9365

                 }, !obj:pylearn2.models.mlp.ConvRectifiedLinear {

                     layer_name: 'h3',

                     output_channels: 150,

                     irange: .05,

                     kernel_shape: [12, 12],

                     pool_shape: [11, 11],

                     pool_stride: [2, 2],

                     max_kernel_norm: 1.9365

                 }, !obj:pylearn2.models.mlp.Softmax {

                     max_col_norm: 1.9365,

                     layer_name: 'y',

                     n_classes: 2,

                     istdev: .05

                 }

                ],

    },

    algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {

        batch_size: 50,

        learning_rate: .01,

        learning_rule: !obj:pylearn2.training_algorithms.learning_rule.Momentum {

            init_momentum: .5

        },

        monitoring_dataset:

            {

                'train' : *train,

                'valid' : !obj:xor.XOR {

                              which_set: 'valid',

                          },

                'test'  : !obj:xor.XOR {

                              which_set: 'test',

                          }

            },

        cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [

            !obj:pylearn2.costs.cost.MethodCost {

                method: 'cost_from_X'

            }, !obj:pylearn2.costs.mlp.WeightDecay {

                coeffs: [.00005, .00005, .00005 ]

            }

            ]

        },

        termination_criterion: !obj:pylearn2.termination_criteria.And {

            criteria: [

                !obj:pylearn2.termination_criteria.MonitorBased {

                    channel_name: "valid_y_misclass",

                    prop_decrease: 0.50,

                    N: 10

                },

                !obj:pylearn2.termination_criteria.EpochCounter {

                    max_epochs: 500

                },

            ]

        },

    },

    extensions:

        [ !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest {

             channel_name: 'valid_y_misclass',

             save_path: "./convolutional_network_best.pkl"

        }, !obj:pylearn2.training_algorithms.learning_rule.MomentumAdjustor {

            start: 1,

            saturate: 10,

            final_momentum: .99

        }

    ]

}

 

 

 

from pylearn2.config import yaml_parse

train = yaml_parse.load(train)

train.main_loop()

 

output

Input shape: (64, 64)

/home/amila/Documents/Algorithm/git/pylearn2/pylearn2/model_extensions/norm_constraint.py:96: UserWarning: MaxL2FilterNorm is deprecated and may be removed on or after 2016-01-31. Use ConstrainFilterL2Norm.

  warnings.warn("MaxL2FilterNorm is deprecated and may be removed on or"

Detector space: (53, 53)

WARNING (theano.gof.compilelock): Overriding existing lock by dead process '9016' (I am process '10095')

WARNING:theano.gof.compilelock:Overriding existing lock by dead process '9016' (I am process '10095')

Output space: (22, 22)

Input shape: (22, 22)

Detector space: (11, 11)

Output space: (1, 1)

***************************

<open file 'Dataset/m1.pkl', mode 'rb' at 0x7fd726108d20>

***************************

<open file 'Dataset/m1.pkl', mode 'rb' at 0x7fd726108d20>

***************************

<open file 'Dataset/m1.pkl', mode 'rb' at 0x7fd726108d20>

Parameter and initial learning rate summary:

/home/amila/Documents/Algorithm/git/pylearn2/pylearn2/costs/mlp/__init__.py:105: UserWarning: Coefficients should be given as a dictionary with layer names as key. The support of coefficients as list would be deprecated from 03/06/2015

  warnings.warn("Coefficients should be given as a dictionary "

        h2_W: 0.00999999977648

        h2_b: 0.00999999977648

        h3_W: 0.00999999977648

        h3_b: 0.00999999977648

        softmax_b: 0.00999999977648

        softmax_W: 0.00999999977648

Compiling sgd_update...

Compiling sgd_update done. Time elapsed: 0:03:00.145412

compiling begin_record_entry...

compiling begin_record_entry done. Time elapsed: 0.302745 seconds

Monitored channels:

        learning_rate

        momentum

        test_h2_kernel_norms_max

        test_h2_kernel_norms_mean

        test_h2_kernel_norms_min

        test_h2_max_x_max_u

        test_h2_max_x_mean_u

        test_h2_max_x_min_u

        test_h2_mean_x_max_u

        test_h2_mean_x_mean_u

        test_h2_mean_x_min_u

        test_h2_min_x_max_u

        test_h2_min_x_mean_u

        test_h2_min_x_min_u

        test_h2_range_x_max_u

        test_h2_range_x_mean_u

        test_h2_range_x_min_u

        test_h3_kernel_norms_max

        test_h3_kernel_norms_mean

        test_h3_kernel_norms_min

        test_h3_max_x_max_u

        test_h3_max_x_mean_u

        test_h3_max_x_min_u

        test_h3_mean_x_max_u

        test_h3_mean_x_mean_u

        test_h3_mean_x_min_u

        test_h3_min_x_max_u

        test_h3_min_x_mean_u

        test_h3_min_x_min_u

        test_h3_range_x_max_u

        test_h3_range_x_mean_u

        test_h3_range_x_min_u

        test_objective

        test_term_0

        test_term_1_weight_decay

        test_y_col_norms_max

        test_y_col_norms_mean

        test_y_col_norms_min

        test_y_max_max_class

        test_y_mean_max_class

        test_y_min_max_class

        test_y_misclass

        test_y_nll

        test_y_row_norms_max

        test_y_row_norms_mean

        test_y_row_norms_min

        total_seconds_last_epoch

        train_h2_kernel_norms_max

        train_h2_kernel_norms_mean

        train_h2_kernel_norms_min

        train_h2_max_x_max_u

        train_h2_max_x_mean_u

        train_h2_max_x_min_u

        train_h2_mean_x_max_u

        train_h2_mean_x_mean_u

        train_h2_mean_x_min_u

        train_h2_min_x_max_u

        train_h2_min_x_mean_u

        train_h2_min_x_min_u

        train_h2_range_x_max_u

        train_h2_range_x_mean_u

        train_h2_range_x_min_u

        train_h3_kernel_norms_max

        train_h3_kernel_norms_mean

        train_h3_kernel_norms_min

        train_h3_max_x_max_u

        train_h3_max_x_mean_u

        train_h3_max_x_min_u

        train_h3_mean_x_max_u

        train_h3_mean_x_mean_u

        train_h3_mean_x_min_u

        train_h3_min_x_max_u

        train_h3_min_x_mean_u

        train_h3_min_x_min_u

        train_h3_range_x_max_u

        train_h3_range_x_mean_u

        train_h3_range_x_min_u

        train_objective

        train_term_0

        train_term_1_weight_decay

        train_y_col_norms_max

        train_y_col_norms_mean

        train_y_col_norms_min

        train_y_max_max_class

        train_y_mean_max_class

        train_y_min_max_class

        train_y_misclass

        train_y_nll

        train_y_row_norms_max

        train_y_row_norms_mean

        train_y_row_norms_min

        training_seconds_this_epoch

        valid_h2_kernel_norms_max

        valid_h2_kernel_norms_mean

        valid_h2_kernel_norms_min

        valid_h2_max_x_max_u

        valid_h2_max_x_mean_u

        valid_h2_max_x_min_u

        valid_h2_mean_x_max_u

        valid_h2_mean_x_mean_u

        valid_h2_mean_x_min_u

        valid_h2_min_x_max_u

        valid_h2_min_x_mean_u

        valid_h2_min_x_min_u

        valid_h2_range_x_max_u

        valid_h2_range_x_mean_u

        valid_h2_range_x_min_u

        valid_h3_kernel_norms_max

        valid_h3_kernel_norms_mean

        valid_h3_kernel_norms_min

        valid_h3_max_x_max_u

        valid_h3_max_x_mean_u

        valid_h3_max_x_min_u

        valid_h3_mean_x_max_u

        valid_h3_mean_x_mean_u

        valid_h3_mean_x_min_u

        valid_h3_min_x_max_u

        valid_h3_min_x_mean_u

        valid_h3_min_x_min_u

        valid_h3_range_x_max_u

        valid_h3_range_x_mean_u

        valid_h3_range_x_min_u

        valid_objective

        valid_term_0

        valid_term_1_weight_decay

        valid_y_col_norms_max

        valid_y_col_norms_mean

        valid_y_col_norms_min

        valid_y_max_max_class

        valid_y_mean_max_class

        valid_y_min_max_class

        valid_y_misclass

        valid_y_nll

        valid_y_row_norms_max

        valid_y_row_norms_mean

        valid_y_row_norms_min

Compiling accum...

graph size: 617

graph size: 601

graph size: 601

Compiling accum done. Time elapsed: 45.237817 seconds

Monitoring step:

        Epochs seen: 0

        Batches seen: 0

        Examples seen: 0

        learning_rate: 0.00999999977648

        momentum: 0.5

        test_h2_kernel_norms_max: 0.387725800276

        test_h2_kernel_norms_mean: 0.345946192741

        test_h2_kernel_norms_min: 0.305102437735

        test_h2_max_x_max_u: 230.06086731

        test_h2_max_x_mean_u: 31.6456222534

        test_h2_max_x_min_u: 0.0

        test_h2_mean_x_max_u: 91.5726013184

        test_h2_mean_x_mean_u: 12.9892511368

        test_h2_mean_x_min_u: 0.0

        test_h2_min_x_max_u: 20.9689464569

        test_h2_min_x_mean_u: 2.2737891674

        test_h2_min_x_min_u: 0.0

        test_h2_range_x_max_u: 220.946548462

        test_h2_range_x_mean_u: 29.3718338013

        test_h2_range_x_min_u: 0.0

        test_h3_kernel_norms_max: 1.93650007248

        test_h3_kernel_norms_mean: 1.93649983406

        test_h3_kernel_norms_min: 1.93649971485

        test_h3_max_x_max_u: 283.14831543

        test_h3_max_x_mean_u: 51.1744270325

        test_h3_max_x_min_u: 0.0

        test_h3_mean_x_max_u: 120.546844482

        test_h3_mean_x_mean_u: 21.9558963776

        test_h3_mean_x_min_u: 0.0

        test_h3_min_x_max_u: 23.8653907776

        test_h3_min_x_mean_u: 4.29808712006

        test_h3_min_x_min_u: 0.0

        test_h3_range_x_max_u: 262.027435303

        test_h3_range_x_mean_u: 46.8763389587

        test_h3_range_x_min_u: 0.0

        test_objective: 31.9130859375

        test_term_0: 31.88372612

        test_term_1_weight_decay: 0.0293594188988

        test_y_col_norms_max: 0.633233249187

        test_y_col_norms_mean: 0.597740232944

        test_y_col_norms_min: 0.562247216702

        test_y_max_max_class: 1.0

        test_y_mean_max_class: 0.999999940395

        test_y_min_max_class: 0.999995112419

        test_y_misclass: 0.479999989271

        test_y_nll: 31.88372612

        test_y_row_norms_max: 0.157307714224

        test_y_row_norms_mean: 0.0602008700371

        test_y_row_norms_min: 0.00178229133599

        total_seconds_last_epoch: 0.0

        train_h2_kernel_norms_max: 0.387725800276

        train_h2_kernel_norms_mean: 0.345946192741

        train_h2_kernel_norms_min: 0.305102437735

        train_h2_max_x_max_u: 243.726852417

        train_h2_max_x_mean_u: 33.7648925781

        train_h2_max_x_min_u: 0.0

        train_h2_mean_x_max_u: 99.983215332

        train_h2_mean_x_mean_u: 14.0688905716

        train_h2_mean_x_min_u: 0.0

        train_h2_min_x_max_u: 23.7867488861

        train_h2_min_x_mean_u: 2.76674509048

        train_h2_min_x_min_u: 0.0

        train_h2_range_x_max_u: 230.56652832

        train_h2_range_x_mean_u: 30.9981498718

        train_h2_range_x_min_u: 0.0

        train_h3_kernel_norms_max: 1.93650007248

        train_h3_kernel_norms_mean: 1.93649983406

        train_h3_kernel_norms_min: 1.93649971485

        train_h3_max_x_max_u: 300.967773438

        train_h3_max_x_mean_u: 53.1630554199

        train_h3_max_x_min_u: 0.0

        train_h3_mean_x_max_u: 130.112106323

        train_h3_mean_x_mean_u: 23.4053134918

        train_h3_mean_x_min_u: 0.0

        train_h3_min_x_max_u: 31.2767448425

        train_h3_min_x_mean_u: 5.46948862076

        train_h3_min_x_min_u: 0.0

        train_h3_range_x_max_u: 271.967102051

        train_h3_range_x_mean_u: 47.6935653687

        train_h3_range_x_min_u: 0.0

        train_objective: 41.5000610352

        train_term_0: 41.470703125

        train_term_1_weight_decay: 0.0293594188988

        train_y_col_norms_max: 0.633233249187

        train_y_col_norms_mean: 0.597740232944

        train_y_col_norms_min: 0.562247216702

        train_y_max_max_class: 1.0

        train_y_mean_max_class: 1.0

        train_y_min_max_class: 0.999998927116

        train_y_misclass: 0.534999966621

        train_y_nll: 41.470703125

        train_y_row_norms_max: 0.157307714224

        train_y_row_norms_mean: 0.0602008700371

        train_y_row_norms_min: 0.00178229133599

        training_seconds_this_epoch: 0.0

        valid_h2_kernel_norms_max: 0.387725800276

        valid_h2_kernel_norms_mean: 0.345946192741

        valid_h2_kernel_norms_min: 0.305102437735

        valid_h2_max_x_max_u: 238.583999634

        valid_h2_max_x_mean_u: 33.3813781738

        valid_h2_max_x_min_u: 0.0

        valid_h2_mean_x_max_u: 99.798789978

        valid_h2_mean_x_mean_u: 14.0398483276

        valid_h2_mean_x_min_u: 0.0

        valid_h2_min_x_max_u: 27.0806751251

        valid_h2_min_x_mean_u: 3.11637234688

        valid_h2_min_x_min_u: 0.0

        valid_h2_range_x_max_u: 221.638320923

        valid_h2_range_x_mean_u: 30.2650108337

        valid_h2_range_x_min_u: 0.0

        valid_h3_kernel_norms_max: 1.93650007248

        valid_h3_kernel_norms_mean: 1.93649983406

        valid_h3_kernel_norms_min: 1.93649971485

        valid_h3_max_x_max_u: 296.574981689

        valid_h3_max_x_mean_u: 52.8513298035

        valid_h3_max_x_min_u: 0.0

        valid_h3_mean_x_max_u: 129.056564331

        valid_h3_mean_x_mean_u: 23.3177261353

        valid_h3_mean_x_min_u: 0.0

        valid_h3_min_x_max_u: 33.7482261658

        valid_h3_min_x_mean_u: 5.89528799057

        valid_h3_min_x_min_u: 0.0

        valid_h3_range_x_max_u: 264.845550537

        valid_h3_range_x_mean_u: 46.9560432434

        valid_h3_range_x_min_u: 0.0

        valid_objective: 31.3763618469

        valid_term_0: 31.3470020294

        valid_term_1_weight_decay: 0.0293594188988

        valid_y_col_norms_max: 0.633233249187

        valid_y_col_norms_mean: 0.597740232944

        valid_y_col_norms_min: 0.562247216702

        valid_y_max_max_class: 1.0

        valid_y_mean_max_class: 1.0

        valid_y_min_max_class: 1.0

        valid_y_misclass: 0.419999986887

        valid_y_nll: 31.3470020294

        valid_y_row_norms_max: 0.157307714224

        valid_y_row_norms_mean: 0.0602008700371

        valid_y_row_norms_min: 0.00178229133599

Saving to ./convolutional_network_best.pkl...

Saving to ./convolutional_network_best.pkl done. Time elapsed: 1.796074 seconds

---------------------------------------------------------------------------

MemoryError                               Traceback (most recent call last)

<ipython-input-3-a29d25125a51> in <module>()

      1 from pylearn2.config import yaml_parse

      2 train = yaml_parse.load(train)

----> 3 train.main_loop()

 

/home/amila/Documents/Algorithm/git/pylearn2/pylearn2/train.pyc in main_loop(self, time_budget)

    218                         callbacks=[self.training_seconds.set_value]

    219                     ):

--> 220                         rval = self.algorithm.train(dataset=self.dataset)

    221                     if rval is not None:

    222                         raise ValueError(

 

/home/amila/Documents/Algorithm/git/pylearn2/pylearn2/training_algorithms/sgd.pyc in train(self, dataset)

    453             for callback in on_load_batch:

    454                 callback(*batch)

--> 455             self.sgd_update(*batch)

    456             # iterator might return a smaller batch if dataset size

    457             # isn't divisible by batch_size

 

/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)

    849                         self.fn.nodes[self.fn.position_of_error],

    850                         self.fn.thunks[self.fn.position_of_error],

--> 851                         storage_map=self.fn.storage_map)

    852                 else:

    853                     # For the c linker We don't have access from

 

/usr/local/lib/python2.7/dist-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)

    312         # extra long error message in that case.

    313         pass

--> 314     reraise(exc_type, exc_value, exc_trace)

    315

    316

 

/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)

    838         t0_fn = time.time()

    839         try:

--> 840             outputs = self.fn()

    841         except Exception:

    842             if hasattr(self.fn, 'position_of_error'):

 

MemoryError: Error allocating 19360000 bytes of device memory (out of memory).

Apply node that caused the error: GpuElemwise{maximum,no_inplace}(GpuElemwise{maximum,no_inplace}.0, GpuSubtensor{::, ::, int64:int64:int64, int64:int64:int64}.0)

Toposort index: 293

Inputs types: [CudaNdarrayType(float32, 4D), CudaNdarrayType(float32, 4D)]

Inputs shapes: [(50, 200, 22, 22), (50, 200, 22, 22)]

Inputs strides: [(96800, 484, 22, 1), (561800, 2809, 106, 2)]

Inputs values: ['not shown', 'not shown']

Outputs clients: [[GpuElemwise{Composite{Cast{float32}(EQ(i0, i1))},no_inplace}(GpuElemwise{maximum,no_inplace}.0, GpuSubtensor{::, ::, int64:int64:int64, int64:int64:int64}.0), GpuElemwise{Composite{Cast{float32}(EQ(i0, i1))},no_inplace}(GpuElemwise{maximum,no_inplace}.0, GpuElemwise{maximum,no_inplace}.0), GpuElemwise{maximum,no_inplace}(GpuElemwise{maximum,no_inplace}.0, GpuSubtensor{::, ::, int64:int64:int64, int64:int64:int64}.0), GpuElemwise{Composite{Cast{float32}(EQ(i0, i1))},no_inplace}(GpuElemwise{maximum,no_inplace}.0, GpuElemwise{maximum,no_inplace}.0)]]

 

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.

HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

 

 

My .theanorc is

 [global]

device=gpu0

floatX=float32

 

[mode]=FAST_RUN

 

[nvcc]

fastmath=True

 

[cuda]

root=/usr/local/cuda-7.5

Reply all
Reply to author
Forward
0 new messages