前橋さま
グループ各位
お世話になります
ご指示ありがとうございました
一通り、cupy ,chainerをuninstallした後、
pip install cupy --no-cache-dir
pip install chainer --no-cache-dir
を行い、とりあえずはcupyのインストールが出来たようです。
次なる問題ですが..(スレッドを変えたほうが良いならば、変更いたします)
mnistのサンプルを試しに動かしたところ以下のように、重み行列Wの初期化
(正規乱数を代入する部分?)で止まります。
python3 train_mnist.py -g 0
GPU: 0
# unit: 1000
# Minibatch-size: 100
# epoch: 20
Exception in main training loop: CURAND_STATUS_LAUNCH_FAILURE
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/chainer/training/trainer.py", line 309, in run
entry.extension(self)
File "/usr/lib/python3.5/contextlib.py", line 77, in __exit__
self.gen.throw(type, value, traceback)
File "/usr/local/lib/python3.5/dist-packages/chainer/reporter.py", line 101, in scope
yield
File "/usr/local/lib/python3.5/dist-packages/chainer/training/trainer.py", line 306, in run
update()
File "/usr/local/lib/python3.5/dist-packages/chainer/training/updaters/standard_updater.py", line 149, in update
self.update_core()
File "/usr/local/lib/python3.5/dist-packages/chainer/training/updaters/standard_updater.py", line 160, in update_core
optimizer.update(loss_func, *in_arrays)
File "/usr/local/lib/python3.5/dist-packages/chainer/optimizer.py", line 650, in update
loss = lossfun(*args, **kwds)
File "/usr/local/lib/python3.5/dist-packages/chainer/links/model/classifier.py", line 134, in __call__
self.y = self.predictor(*args, **kwargs)
File "train_mnist.py", line 23, in __call__
h1 = F.relu(self.l1(x))
File "/usr/local/lib/python3.5/dist-packages/chainer/links/connection/linear.py", line 128, in __call__
self._initialize_params(in_size)
File "/usr/local/lib/python3.5/dist-packages/chainer/links/connection/linear.py", line 114, in _initialize_params
self.W.initialize((self.out_size, in_size))
File "/usr/local/lib/python3.5/dist-packages/chainer/variable.py", line 1411, in initialize
data = initializers.generate_array(self.initializer, shape, xp)
File "/usr/local/lib/python3.5/dist-packages/chainer/initializers/__init__.py", line 46, in generate_array
initializer(array)
File "/usr/local/lib/python3.5/dist-packages/chainer/initializers/normal.py", line 68, in __call__
Normal(s)(array)
File "/usr/local/lib/python3.5/dist-packages/chainer/initializers/normal.py", line 36, in __call__
array[...] = xp.random.normal(**args)
File "/usr/local/lib/python3.5/dist-packages/cupy/random/distributions.py", line 93, in normal
x = rs.normal(0, 1, size, dtype)
File "/usr/local/lib/python3.5/dist-packages/cupy/random/generator.py", line 116, in normal
return self._generate_normal(func, size, dtype, loc, scale)
File "/usr/local/lib/python3.5/dist-packages/cupy/random/generator.py", line 79, in _generate_normal
func(self._generator, out.data.ptr, out.size, *args)
File "cupy/cuda/curand.pyx", line 174, in cupy.cuda.curand.generateNormalDouble
File "cupy/cuda/curand.pyx", line 183, in cupy.cuda.curand.generateNormalDouble
File "cupy/cuda/curand.pyx", line 83, in cupy.cuda.curand.check_status
Will finalize trainer extensions and updater before reraising the exception.
Traceback (most recent call last):
File "train_mnist.py", line 124, in <module>
main()
File "train_mnist.py", line 120, in main
trainer.run()
File "/usr/local/lib/python3.5/dist-packages/chainer/training/trainer.py", line 320, in run
six.reraise(*sys.exc_info())
File "/usr/lib/python3/dist-packages/six.py", line 686, in reraise
raise value
File "/usr/local/lib/python3.5/dist-packages/chainer/training/trainer.py", line 309, in run
entry.extension(self)
File "/usr/lib/python3.5/contextlib.py", line 77, in __exit__
self.gen.throw(type, value, traceback)
File "/usr/local/lib/python3.5/dist-packages/chainer/reporter.py", line 101, in scope
yield
File "/usr/local/lib/python3.5/dist-packages/chainer/training/trainer.py", line 306, in run
update()
File "/usr/local/lib/python3.5/dist-packages/chainer/training/updaters/standard_updater.py", line 149, in update
self.update_core()
File "/usr/local/lib/python3.5/dist-packages/chainer/training/updaters/standard_updater.py", line 160, in update_core
optimizer.update(loss_func, *in_arrays)
File "/usr/local/lib/python3.5/dist-packages/chainer/optimizer.py", line 650, in update
loss = lossfun(*args, **kwds)
File "/usr/local/lib/python3.5/dist-packages/chainer/links/model/classifier.py", line 134, in __call__
self.y = self.predictor(*args, **kwargs)
File "train_mnist.py", line 23, in __call__
h1 = F.relu(self.l1(x))
File "/usr/local/lib/python3.5/dist-packages/chainer/links/connection/linear.py", line 128, in __call__
self._initialize_params(in_size)
File "/usr/local/lib/python3.5/dist-packages/chainer/links/connection/linear.py", line 114, in _initialize_params
self.W.initialize((self.out_size, in_size))
File "/usr/local/lib/python3.5/dist-packages/chainer/variable.py", line 1411, in initialize
data = initializers.generate_array(self.initializer, shape, xp)
File "/usr/local/lib/python3.5/dist-packages/chainer/initializers/__init__.py", line 46, in generate_array
initializer(array)
File "/usr/local/lib/python3.5/dist-packages/chainer/initializers/normal.py", line 68, in __call__
Normal(s)(array)
File "/usr/local/lib/python3.5/dist-packages/chainer/initializers/normal.py", line 36, in __call__
array[...] = xp.random.normal(**args)
File "/usr/local/lib/python3.5/dist-packages/cupy/random/distributions.py", line 93, in normal
x = rs.normal(0, 1, size, dtype)
File "/usr/local/lib/python3.5/dist-packages/cupy/random/generator.py", line 116, in normal
return self._generate_normal(func, size, dtype, loc, scale)
File "/usr/local/lib/python3.5/dist-packages/cupy/random/generator.py", line 79, in _generate_normal
func(self._generator, out.data.ptr, out.size, *args)
File "cupy/cuda/curand.pyx", line 174, in cupy.cuda.curand.generateNormalDouble
File "cupy/cuda/curand.pyx", line 183, in cupy.cuda.curand.generateNormalDouble
File "cupy/cuda/curand.pyx", line 83, in cupy.cuda.curand.check_status
cupy.cuda.curand.CURANDError: CURAND_STATUS_LAUNCH_FAILURE
こちらでは、cuda-9.2を使用しております。
/usr/local/cuda/lib64/
の中には、
libcurand.so (libcurand.so.9.2 へ、シンボリックリンク)
libcurand.so.7.5
libcurand.so.7.5.18
libcurand.so.9.2(libcurand.so.9.2.88 へ、シンボリックリンク)
libcurand.so.9.2.88
libcurand_static.a
これらのファイルが存在します。
依存関係の問題でしょうか?
追問となってしまい恐縮ですが、
このエラーを解消する方法をご教授お願いいたします。