While Traning chain model for my bengali dataset I am getting plagued by instability issues (errors like "Tridiagonalizing matrix that is too large or has NaNs" and "Cholesky decomposition failed. Maybe matrix is not positive definite"), Has anyone faced such issues? As per google groups reducing learning rate/ changing model topology could help
--
Go to http://kaldi-asr.org/forums.html to find out how to join the kaldi-help group
---
You received this message because you are subscribed to the Google Groups "kaldi-help" group.
To unsubscribe from this group and stop receiving emails from it, send an email to kaldi-help+...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/kaldi-help/78189510-3ddb-4815-a715-7e9a6e90d715n%40googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/kaldi-help/1bf79454-79ea-4377-9d59-3847439cdaabn%40googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/kaldi-help/c7e7611a-4fc7-4e66-a820-4f3bb276e7acn%40googlegroups.com.
// Warp reduce to 1 element. Threads implicitly synchronized within a warp.
if (tid < warpSize) {
# pragma unroll
for (int shift = warpSize; shift > 0; shift >>= 1) {
ssum[tid] += ssum[tid + shift];
}
}
.. so basically we'd now need to add on the line below the "for" statement:
__syncwarp()
but this would have to be done on many other kernels.
Perhaps you can help with this?
To view this discussion on the web visit https://groups.google.com/d/msgid/kaldi-help/57a0ecd0-dec7-423e-b621-fe0faf0950c7n%40googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/kaldi-help/9d8abd06-955c-4f85-9833-3e9a0c2398fcn%40googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/kaldi-help/616ce305-fd65-4d27-82f3-ec532d1f3c14n%40googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/kaldi-help/d98c3419-dfc7-4246-a524-4baf1db85908n%40googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/kaldi-help/900befde-ce85-4bb2-a843-e7665a980ba7n%40googlegroups.com.
trace1 = TraceMatMat(mat3, mat2, kNoTrans);
trace2 = TraceMatSmat(mat3, cu_smat2, kNoTrans);
To view this discussion on the web visit https://groups.google.com/d/msgid/kaldi-help/de7b4da6-b6f7-4979-9ade-821a42b37612n%40googlegroups.com.
if (tid < warpSize) {
for (int shift = warpSize; shift > 0; shift >>= 1) {
// here: __syncwarp(), cu-kernels.cu:1798
sdata[tid] = op.Reduce(sdata[tid], sdata[tid + shift]);
}
}
To view this discussion on the web visit https://groups.google.com/d/msgid/kaldi-help/e2d6d42b-bb36-4dde-9495-3b1ead145927n%40googlegroups.com.
git diff -b
diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu
index 8044ff699..78def14d9 100644
--- a/src/cudamatrix/cu-kernels.cu
+++ b/src/cudamatrix/cu-kernels.cu
@@ -2087,13 +2087,13 @@ static void _group_transform_reduce(
x_idx += threads_per_group;
}
sreduction[tid] = treduction;
- if (threads_per_group > warpSize) {
+
__syncthreads();
- }
// tree-reduce to 2x warpSize elements per group
# pragma unroll
- for (int shift = threads_per_group / 2; shift > warpSize; shift >>= 1) {
+ int shift = threads_per_group / 2;
+ for (; shift > warpSize; shift >>= 1) {
if (threadIdx.x < shift) {
sreduction[tid] = op.Reduce(sreduction[tid], sreduction[tid + shift]);
}
@@ -2101,14 +2101,12 @@ static void _group_transform_reduce(
}
// Warp-reduce to 1 element per group.
- // Threads implicitly synchronized within the warp.
- const int warp_reduce_size =
- threads_per_group / 2 < warpSize ? threads_per_group / 2 : warpSize;
- if (threadIdx.x < warp_reduce_size) {
# pragma unroll
- for (int shift = warp_reduce_size; shift > 0; shift >>= 1) {
+ for (; shift > 0; shift >>= 1) {
+ if (threadIdx.x < shift) {
sreduction[tid] = op.Reduce(sreduction[tid], sreduction[tid + shift]);
}
+ __syncwarp();
}
// Store the result.
To view this discussion on the web visit https://groups.google.com/d/msgid/kaldi-help/60af00ca-0e0f-41d3-b1d5-a46efdab716fn%40googlegroups.com.