CUDA 8 pooling_layer.cu:212 Check failed: error == cudaSuccess (8 vs. 0) invalid device function

1,417 views
Skip to first unread message

Venkat

unread,
Nov 12, 2016, 9:01:54 AM11/12/16
to Caffe Users
Hi All,


I built Caffe using below docker configuration. 

FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04

MAINTAINER caffe...@googlegroups.com


RUN apt-get update && apt-get install -y --no-install-recommends \

        build-essential \

        cmake \

        git \

        wget \

        libatlas-base-dev \

        libboost-all-dev \

        libgflags-dev \

        libgoogle-glog-dev \

        libhdf5-serial-dev \

        libleveldb-dev \

        liblmdb-dev \

        libopencv-dev \

        libprotobuf-dev \

        libsnappy-dev \

        protobuf-compiler \

        python-dev \

        python-numpy \

        python-pip \

        python-scipy && \

    rm -rf /var/lib/apt/lists/*


ENV CAFFE_ROOT=/opt/caffe

WORKDIR $CAFFE_ROOT


# FIXME: clone a specific git tag and use ARG instead of ENV once DockerHub supports this.

ENV CLONE_TAG=master


RUN git clone -b ${CLONE_TAG} --depth 1 https://github.com/BVLC/caffe.git . && \

    for req in $(cat python/requirements.txt) pydot; do pip install $req; done && \

    mkdir build && cd build && \

    cmake -DUSE_CUDNN=1 .. && \

    make -j"$(nproc)"


ENV PYCAFFE_ROOT $CAFFE_ROOT/python

ENV PYTHONPATH $PYCAFFE_ROOT:$PYTHONPATH

ENV PATH $CAFFE_ROOT/build/tools:$PYCAFFE_ROOT:$PATH

RUN echo "$CAFFE_ROOT/build/lib" >> /etc/ld.so.conf.d/caffe.conf && ldconfig


WORKDIR /workspace



Followed by running the caffe model as below command


nvidia-docker run -v `pwd`:`pwd` -w `pwd` -i -t caffe:gpu caffe train --solver=solver.prototxt -weights Emotiw.caffemodel



I am facing the following error,


I1111 04:25:39.775985     1 net.cpp:228] data does not need backward computation.

I1111 04:25:39.775993     1 net.cpp:270] This network produces output accuracy

I1111 04:25:39.776005     1 net.cpp:270] This network produces output loss

I1111 04:25:39.776032     1 net.cpp:283] Network initialization done.

I1111 04:25:39.776175     1 solver.cpp:60] Solver scaffolding done.

I1111 04:25:39.776871     1 caffe.cpp:155] Finetuning from Emotiw.caffemodel

I1111 04:25:48.997071     1 net.cpp:761] Ignoring source layer fc8_cat

I1111 04:27:41.813612     1 net.cpp:761] Ignoring source layer fc8_cat

I1111 04:27:41.852115     1 caffe.cpp:251] Starting Optimization

I1111 04:27:41.852200     1 solver.cpp:279] Solving PainwildNet

I1111 04:27:41.852221     1 solver.cpp:280] Learning Rate Policy: step

I1111 04:27:41.871546     1 solver.cpp:337] Iteration 0, Testing net (#0)

F1111 04:27:41.975798     1 pooling_layer.cu:212] Check failed: error == cudaSuccess (8 vs. 0)  invalid device function

*** Check failure stack trace: ***

    @     0x7fcd40cf25cd  google::LogMessage::Fail()

    @     0x7fcd40cf4433  google::LogMessage::SendToLog()

    @     0x7fcd40cf215b  google::LogMessage::Flush()

    @     0x7fcd40cf4e1e  google::LogMessageFatal::~LogMessageFatal()

    @     0x7fcd41488851  caffe::PoolingLayer<>::Forward_gpu()

    @     0x7fcd4128a652  caffe::Net<>::ForwardFromTo()

    @     0x7fcd4128a777  caffe::Net<>::Forward()

    @     0x7fcd412c244a  caffe::Solver<>::Test()

    @     0x7fcd412c30ce  caffe::Solver<>::TestAll()

    @     0x7fcd412c31ec  caffe::Solver<>::Step()

    @     0x7fcd412c3f19  caffe::Solver<>::Solve()

    @           0x40cf5f  train()

    @           0x4088d8  main

    @     0x7fcd3f205830  __libc_start_main

    @           0x4091a9  _start

    @              (nil)  (unknown)

*** Aborted at 1478838462 (unix time) try "date -d @1478838462" if you are using GNU date ***

PC: @     0x7fcd3f21c186 abort

*** SIGSEGV (@0x0) received by PID 1 (TID 0x7fcd41d84ac0) from PID 0; stack trace: ***

    @     0x7fcd3f21a4a0 (unknown)

    @     0x7fcd3f21c186 abort

    @     0x7fcd40cfb12c (unknown)

    @     0x7fcd40cf25cd google::LogMessage::Fail()

    @     0x7fcd40cf4433 google::LogMessage::SendToLog()

    @     0x7fcd40cf215b google::LogMessage::Flush()

    @     0x7fcd40cf4e1e google::LogMessageFatal::~LogMessageFatal()

    @     0x7fcd41488851 caffe::PoolingLayer<>::Forward_gpu()

    @     0x7fcd4128a652 caffe::Net<>::ForwardFromTo()

    @     0x7fcd4128a777 caffe::Net<>::Forward()

    @     0x7fcd412c244a caffe::Solver<>::Test()

    @     0x7fcd412c30ce caffe::Solver<>::TestAll()

    @     0x7fcd412c31ec caffe::Solver<>::Step()

    @     0x7fcd412c3f19 caffe::Solver<>::Solve()

    @           0x40cf5f train()

    @           0x4088d8 main

    @     0x7fcd3f205830 __libc_start_main

    @           0x4091a9 _start

    @                0x0 (unknown)



My GPU configuration is 

Sat Nov 12 08:57:32 2016       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 367.57                 Driver Version: 367.57                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  GeForce GTX 1080    Off  | 0000:03:00.0      On |                  N/A |
| 27%   36C    P8    10W / 180W |    753MiB /  8106MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID  Type  Process name                               Usage      |
|=============================================================================|
|    0      1187    G   /usr/lib/xorg/Xorg                              69MiB |
|    0      6681    G   unity-control-center                            10MiB |
|    0      7777    G   /usr/lib/xorg/Xorg                              44MiB |
|    0     11850    G   /usr/lib/xorg/Xorg                             296MiB |
|    0     12459    G   compiz                                         205MiB |
|    0     12722    G   ...chForDocWrittenScriptsInMainFrame/Default   112MiB |
+-----------------------------------------------------------------------------+


and my Solver.prototxt file is 

test_iter: 100
test_interval: 1000
base_lr: 0.00001
lr_policy: "step"
gamma: 0.1
stepsize: 20000
display: 20
max_iter: 13000
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
solver_mode: GPU

Trainval prototxt


 name: "TempWLDNET"

    layer {

      name: "data"

      type: "ImageData"

      top: "data"

      top: "label"

      include {

        phase: TRAIN

      }

      transform_param {

        mirror: true

        crop_size: 224 

        mean_file: "mean.binaryproto"

      }

      image_data_param {

        source: "train.txt"

        batch_size: 25

        new_height: 256 

        new_width: 256 

      }

    }

    layer {

      name: "data"

      type: "ImageData"

      top: "data"

      top: "label"

      include {

        phase: TEST

      }

      transform_param {

        mirror: false

        crop_size: 224 

        mean_file: "mean.binaryproto"

      }

      image_data_param {

        source: "test.txt"

        batch_size: 25

        new_height: 256 

        new_width: 256 

      }

    }

    layer {

      name: "conv1"

      type: "Convolution"

      bottom: "data"

      top: "conv1"

      param {

        lr_mult: 1

        decay_mult: 1

      }

      param {

        lr_mult: 2

        decay_mult: 0

      }

      convolution_param {

        num_output: 96

        kernel_size: 7

        stride: 2

        weight_filler {

          type: "gaussian"

          std: 0.01

        }

        bias_filler {

          type: "constant"

          value: 0

        }

      }

    }

    layer {

      name: "relu1"

      type: "ReLU"

      bottom: "conv1"

      top: "conv1"

    }

    layer {

      name: "norm1"

      type: "LRN"

      bottom: "conv1"

      top: "norm1"

      lrn_param {

        local_size: 5

        alpha: 0.0005

        beta: 0.75

      }

    }

    layer {

      name: "pool1"

      type: "Pooling"

      bottom: "norm1"

      top: "pool1"

      pooling_param {

        pool: MAX

        kernel_size: 3

        stride: 3

      }

    }

    layer {

      name: "conv2"

      type: "Convolution"

      bottom: "pool1"

      top: "conv2"

      param {

        lr_mult: 1

        decay_mult: 1

      }

      param {

        lr_mult: 2

        decay_mult: 0

      }

      convolution_param {

        num_output: 256

        pad: 2

        kernel_size: 5

        weight_filler {

          type: "gaussian"

          std: 0.01

        }

        bias_filler {

          type: "constant"

          value: 1

        }

      }

    }

    layer {

      name: "relu2"

      type: "ReLU"

      bottom: "conv2"

      top: "conv2"

    }

    layer {

      name: "pool2"

      type: "Pooling"

      bottom: "conv2"

      top: "pool2"

      pooling_param {

        pool: MAX

        kernel_size: 2

        stride: 2

      }

    }

    layer {

      name: "conv3"

      type: "Convolution"

      bottom: "pool2"

      top: "conv3"

      param {

        lr_mult: 1

        decay_mult: 1

      }

      param {

        lr_mult: 2

        decay_mult: 0

      }

      convolution_param {

        num_output: 512

        pad: 1

        kernel_size: 3

        weight_filler {

          type: "gaussian"

          std: 0.01

        }

        bias_filler {

          type: "constant"

          value: 0

        }

      }

    }

    layer {

      name: "relu3"

      type: "ReLU"

      bottom: "conv3"

      top: "conv3"

    }

    layer {

      name: "conv4"

      type: "Convolution"

      bottom: "conv3"

      top: "conv4"

      param {

        lr_mult: 1

        decay_mult: 1

      }

      param {

        lr_mult: 2

        decay_mult: 0

      }

      convolution_param {

        num_output: 512

        pad: 1

        kernel_size: 3

        weight_filler {

          type: "gaussian"

          std: 0.01

        }

        bias_filler {

          type: "constant"

          value: 1

        }

      }

    }

    layer {

      name: "relu4"

      type: "ReLU"

      bottom: "conv4"

      top: "conv4"

    }

    layer {

      name: "conv5"

      type: "Convolution"

      bottom: "conv4"

      top: "conv5"

      param {

        lr_mult: 1

        decay_mult: 1

      }

      param {

        lr_mult: 2

        decay_mult: 0

      }

      convolution_param {

        num_output: 512

        pad: 1

        kernel_size: 3

        weight_filler {

          type: "gaussian"

          std: 0.01

        }

        bias_filler {

          type: "constant"

          value: 0

        }

      }

    }

    layer {

      name: "relu5"

      type: "ReLU"

      bottom: "conv5"

      top: "conv5"

    }

    layer {

      name: "pool5"

      type: "Pooling"

      bottom: "conv5"

      top: "pool5"

      pooling_param {

        pool: MAX

        kernel_size: 3

        stride: 3

      }

    }

    layer {

      name: "fc6"

      type: "InnerProduct"

      bottom: "pool5"

      top: "fc6"

      param {

        lr_mult: 1

        decay_mult: 1

      }

      param {

        lr_mult: 2

        decay_mult: 0

      }

      inner_product_param {

        num_output: 4048

        weight_filler {

          type: "gaussian"

          std: 0.005

        }

        bias_filler {

          type: "constant"

          value: 1

        }

      }

    }

    layer {

      name: "relu6"

      type: "ReLU"

      bottom: "fc6"

      top: "fc6"

    }

    layer {

      name: "drop6"

      type: "Dropout"

      bottom: "fc6"

      top: "fc6"

      dropout_param {

        dropout_ratio: 0.5

      }

    }

    layer {

      name: "fc7"

      type: "InnerProduct"

      bottom: "fc6"

      top: "fc7"

      # Note that lr_mult can be set to 0 to disable any fine-tuning of this, and any other, layer

      param {

        lr_mult: 1

        decay_mult: 1

      }

      param {

        lr_mult: 2

        decay_mult: 0

      }

      inner_product_param {

        num_output: 4048

        weight_filler {

          type: "gaussian"

          std: 0.005

        }

        bias_filler {

          type: "constant"

          value: 1

        }

      }

    }

    layer {

      name: "relu7"

      type: "ReLU"

      bottom: "fc7"

      top: "fc7"

    }

    layer {

      name: "drop7"

      type: "Dropout"

      bottom: "fc7"

      top: "fc7"

      dropout_param {

        dropout_ratio: 0.5

      }

    }

    layer {

      name: "fc8_temp"

      type: "InnerProduct"

      bottom: "fc7"

      top: "fc8_temp"

      # lr_mult is set to higher than for other layers, because this layer is starting from random while the others are already trained

      param {

        lr_mult: 10

        decay_mult: 1

      }

      param {

        lr_mult: 20

        decay_mult: 0

      }

      inner_product_param {

        num_output: 16

        weight_filler {

          type: "gaussian"

          std: 0.01

        }

        bias_filler {

          type: "constant"

          value: 0

        }

      }

    }

    layer {

      name: "accuracy"

      type: "Accuracy"

      bottom: "fc8_temp"

      bottom: "label"

      top: "accuracy"

      include {

        phase: TEST

      }

    }

    layer {

      name: "loss"

      type: "SoftmaxWithLoss"

      bottom: "fc8_temp"

      bottom: "label"

      top: "loss"

    }



Can someone please help me why i am seeing this error and how to fix it. Please let me know if you need any additional information. Thank you in advance.

Felix Abecassis

unread,
Nov 12, 2016, 8:51:42 PM11/12/16
to Caffe Users
You're very close, the problem is that cmake doesn't compile for CUDA compute capability 6.0+ by default (the Pascal architecture). See this discussion:
https://github.com/NVIDIA/nvidia-docker/issues/236

par...@unlv.nevada.edu

unread,
Aug 14, 2017, 4:18:40 PM8/14/17
to Caffe Users
Thank you.
Reply all
Reply to author
Forward
0 new messages