# Train a chain model
if [ $stage -le 9 ]; then
local/chain/run_tdnn.sh --stage 0
fi
online2-wav-gmm-latgen-faster --do-endpointing=$do_endpointing \
--config=$srcdir/conf/online_decoding.conf \
--max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
--acoustic-scale=$acwt --word-symbol-table=$graphdir/words.txt \
$graphdir/HCLG.fst $spk2utt_rspecifier "$wav_rspecifier" \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
online2-wav-gmm-latgen-faster for decoding? or would .flac files be ok?
6) also the above code snippet does not pass the --global-cmvn-stats but i am getting an error about it when i tried to run the above code snippet. if it's required to pass that option, what should be the value for --global-cmvn-stats?
sorry for so many questions. i just want to make sure i am doing it right.
Thanks in advance
#!/bin/bash
data=data/
data_url=www.openslr.org/resources/31
lm_url=www.openslr.org/resources/11
. ./cmd.sh
. ./path.sh
stage=0
. utils/parse_options.sh
set -euo pipefail
mkdir -p $data
for part in dev-clean-2 train-clean-5; do
local/download_and_untar.sh $data $data_url $part
done
local/download_lm.sh $lm_url data/local/lm
# format the data as Kaldi data directories
for part in dev-clean-2 train-clean-5; do
# use underscore-separated names in data directories.
local/data_prep.sh $data/LibriSpeech/$part data/$(echo $part | sed s/-/_/g)
local/prepare_dict.sh --stage 3 --nj 30 --cmd "$train_cmd" \
data/local/lm data/local/lm data/local/dict_nosp
utils/prepare_lang.sh data/local/dict_nosp \
"<UNK>" data/local/lang_tmp_nosp data/lang_nosp
local/format_lms.sh --src-dir data/lang_nosp data/local/lm
# Create ConstArpaLm format language model for full 3-gram and 4-gram LMs
utils/build_const_arpa_lm.sh data/local/lm/lm_tglarge.arpa.gz \
data/lang_nosp data/lang_nosp_test_tglarge
#fi
featdir=mfcc
for x in dev_clean_2 train_clean_5 do
steps/make_mfcc.sh --nj 8 --cmd "$train_cmd" data/$x exp/make_feat/$x $featdir
steps/compute_cmvn_stats.sh data/$x exp/make_feat/$x $featdir
done
# Get the shortest 500 utterances first because those are more likely
# to have accurate alignments.
utils/subset_data_dir.sh --shortest data/train_clean_5 500 data/train_500short
steps/train_mono.sh --nj 4 --cmd "$train_cmd" data/train_500short data/lang exp/mono
utils/mkgraph.sh data/lang_nosp_test_tgsmall exp/mono exp/mono/graph_nosp_tgsmall
for test in dev_clean_2; do
steps/decode.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/mono/graph_nosp_tgsmall data/$test exp/mono/decode_nosp_tgsmall_$test
done
# Get alignments from monophone system.
steps/align_si.sh --nj 8 --cmd "$train_cmd" \
data/train_clean_5 data/lang_nosp exp/mono exp/mono_ali_train_clean_5
# train tri1 [first triphone pass]
steps/train_deltas.sh --cmd "$train_cmd" \
2000 10000 data/train_clean_5 data/lang_nosp exp/mono_ali_train_clean_5 exp/tri1
# decode tri1
utils/mkgraph.sh data/lang_nosp_test_tgsmall exp/tri1 exp/tri1/graph_nosp_tgsmall
for test in dev_clean_2; do
steps/decode.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/tri1/graph_nosp_tgsmall data/$test exp/tri1/decode_nosp_tgsmall_$test
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
data/$test exp/tri1/decode_nosp_{tgsmall,tgmed}_$test
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
data/$test exp/tri1/decode_nosp_{tgsmall,tglarge}_$test
done
# align tri1
steps/align_si.sh --nj 8 --cmd "$train_cmd" \
--use-graphs true data/train_clean_5 data/lang_nosp exp/tri1 exp/tri1_ali_train_clean_5
# train and decode tri2b [LDA+MLLT]
steps/train_lda_mllt.sh --cmd "$train_cmd" \
--splice-opts "--left-context=3 --right-context=3" \
2500 15000 data/train_clean_5 data/lang_nosp exp/tri1_ali_train_clean_5 exp/tri2b
#decode using the LDA+MLLT model
utils/mkgraph.sh data/lang_nosp_test_tgsmall exp/tri2b exp/tri2b/graph_nosp_tgsmall
for test in dev_clean_2; do
steps/decode.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/tri2b/graph_nosp_tgsmall data/$test exp/tri2b/decode_nosp_tgsmall_$test
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
data/$test exp/tri2b/decode_nosp_{tgsmall,tgmed}_$test
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
data/$test exp/tri2b/decode_nosp_{tgsmall,tglarge}_$test
done
# Align all data with LDA+MLLT system (tri2b)
steps/align_si.sh --nj 8 --cmd "$train_cmd" --use-graphs true \
data/train_clean_5 data/lang_nosp exp/tri2b exp/tri2b_ali_train_clean_5
# Do MMI on top of LDA+MLLT.
steps/make_denlats.sh --nj 8 --cmd "$train_cmd" \
data/train_clean_5 data/lang_nosp exp/tri2b exp/tri2b_denlats
steps/train_mmi.sh data/train_clean_5 data/lang_nosp exp/tri2b_ali_train_clean_5 exp/tri2b_denlats exp/tri2b_mmi
for test in dev_clean_2; do
steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" \
exp/tri2b/graph_nosp_tgsmall data/$test exp/tri2b_mmi/decode_it4
steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" \
exp/tri2b/graph_nosp_tgsmall data/$test exp/tri2b_mmi/decode_it3
done
# Do the same with boosting.
steps/train_mmi.sh --boost 0.05 data/train_clean_5 data/lang_nosp \
exp/tri2b_ali_train_clean_5 exp/tri2b_denlats exp/tri2b_mmi_b0.05
for test in dev_clean_2; do
steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" \
exp/tri2b/graph_nosp_tgsmall data/$test exp/tri2b_mmi_b0.05/decode_it4
steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" \
exp/tri2b/graph_nosp_tgsmall data/$test exp/tri2b_mmi_b0.05/decode_it3
done
# Do MPE.
steps/train_mpe.sh data/train_clean_5 data/lang_nosp exp/tri2b_ali_train_clean_5 exp/tri2b_denlats exp/tri2b_mpe
for test in dev_clean_2; do
steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" \
exp/tri2b/graph_nosp_tgsmall data/$test exp/tri2b_mpe/decode_it4
steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" \
exp/tri2b/graph_tgsmall data/$test exp/tri2b_mpe/decode_it3
done
## Do LDA+MLLT+SAT, and decode.
steps/train_sat.sh 2500 15000 data/train_clean_5 data/lang_nosp exp/tri2b_ali_train_clean_5 exp/tri3b
#decode using the tri3b model
utils/mkgraph.sh data/lang_nosp_test_tgsmall exp/tri3b exp/tri3b/graph_nosp_tgsmall
for test in dev_clean_2; do
steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/tri3b/graph_nosp_tgsmall data/$test exp/tri3b/decode_nosp_tgsmall_$test
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
data/$test exp/tri3b/decode_nosp_{tgsmall,tgmed}_$test
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
data/$test exp/tri3b/decode_nosp_{tgsmall,tglarge}_$test
done
### Not sure if this is needed???
#(
# utils/mkgraph.sh data/lang_ug exp/tri3b exp/tri3b/graph_ug
# steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
# exp/tri3b/graph_nosp_ug data/test exp/tri3b/decode_ug
#)
# Align all data with LDA+MLLT+SAT system (tri3b)
steps/align_fmllr.sh --nj 8 --cmd "$train_cmd" --use-graphs true \
data/train_clean_5 data/lang_nosp_tg_small exp/tri3b exp/tri3b_ali_train_clean_5
# Now we compute the pronunciation and silence probabilities from training data,
# and re-create the lang directory.
steps/get_prons.sh --cmd "$train_cmd" \
data/train_clean_5 data/lang_nosp exp/tri3b
utils/dict_dir_add_pronprobs.sh --max-normalize true \
data/local/dict_nosp \
exp/tri3b/pron_counts_nowb.txt exp/tri3b/sil_counts_nowb.txt \
exp/tri3b/pron_bigram_counts_nowb.txt data/local/dict
utils/prepare_lang.sh data/local/dict \
"<UNK>" data/local/lang_tmp data/lang
local/format_lms.sh --src-dir data/lang data/local/lm
utils/build_const_arpa_lm.sh \
data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge
steps/align_fmllr.sh --nj 5 --cmd "$train_cmd" \
data/train_clean_5 data/lang exp/tri3b exp/tri3b_ali_train_clean_5
# Test the tri3b system with the silprobs and pron-probs.
# decode using the tri3b model
utils/mkgraph.sh data/lang_test_tgsmall \
exp/tri3b exp/tri3b/graph_tgsmall
for test in dev_clean_2; do
steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
exp/tri3b/graph_tgsmall data/$test \
exp/tri3b/decode_tgsmall_$test
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test exp/tri3b/decode_{tgsmall,tgmed}_$test
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test exp/tri3b/decode_{tgsmall,tglarge}_$test
done
## MMI on top of tri3b (i.e. LDA+MLLT+SAT+MMI)
steps/make_denlats.sh --config conf/decode.config \
--nj 8 --cmd "$train_cmd" --transform-dir exp/tri3b_ali_train_clean_5 \
data/train_clean_5 data/lang_nosp exp/tri3b exp/tri3b_denlats
steps/train_mmi.sh data/train_clean_5 data/lang_nosp exp/tri3b_ali_train_clean_5 exp/tri3b_denlats exp/tri3b_mmi
for test in dev_clean_2; do
steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
--alignment-model exp/tri3b/final.alimdl --adapt-model exp/tri3b/final.mdl \
exp/tri3b/graph_nosp_tgsmall data/$test exp/tri3b_mmi/decode_nosp_tgsmall_$test
# Do a decoding that uses the exp/tri3b/decode directory to get transforms from.
steps/decode.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
--transform-dir exp/tri3b/decode_nosp_tgsmall_$test exp/tri3b/graph_nosp_tgsmall data/$test exp/tri3b_mmi/decode2_nosp_tgsmall_$test
done
#call prepare_online_decoding.sh
steps/online/prepare_online_decoding.sh --cmd "$train_cmd" data/train_clean_5 data/lang_nosp \
exp/tri3b exp/tri3b_mmi/final.mdl exp/tri3b_online/ || exit 1;
#online decoding
for test in dev_clean_2; do
steps/online/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 exp/tri3b/graph_nosp_tgsmall \
data/$test exp/tri3b_online/decode_$test
done
To unsubscribe from this group and stop receiving emails from it, send an email to kaldi-help+unsubscribe@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
Daniel Galvez
To unsubscribe from this group and stop receiving emails from it, send an email to kaldi-help+unsubscribe@googlegroups.com.
## MMI on top of tri3b (i.e. LDA+MLLT+SAT+MMI)
steps/make_denlats.sh --config conf/decode.config \
--nj 8 --cmd "$train_cmd" --transform-dir exp/tri3b_ali_train_clean_5 \
data/train_clean_5 data/lang_nosp exp/tri3b exp/tri3b_denlats
...
...
steps/make_denlats.sh: feature type is lda
steps/make_denlats.sh: using fMLLR transforms from exp/tri3b_ali_train_clean_5
steps/make_denlats.sh: mismatch in number of jobs with exp/tri3b_ali_train_clean_5
#call prepare_online_decoding.sh
steps/online/prepare_online_decoding.sh --cmd "$train_cmd" data/train_clean_5 data/lang_nosp \
exp/tri3b exp/tri3b_online/ || exit 1;
#online decoding
for test in dev_clean_2; do
steps/online/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 exp/tri3b/graph_nosp_tgsmall \
data/$test exp/tri3b_online/decode_$test
done
%WER 48.51 [ 9768 / 20138, 447 ins, 2260 del, 7061 sub ] exp/mono/decode_nosp_tgsmall_dev_clean_2/wer_8_0.0
%WER 21.01 [ 4230 / 20138, 489 ins, 575 del, 3166 sub ] exp/tri1/decode_nosp_tglarge_dev_clean_2/wer_13_0.0
%WER 24.98 [ 5030 / 20138, 398 ins, 948 del, 3684 sub ] exp/tri1/decode_nosp_tgmed_dev_clean_2/wer_15_0.0
%WER 27.52 [ 5541 / 20138, 433 ins, 1053 del, 4055 sub ] exp/tri1/decode_nosp_tgsmall_dev_clean_2/wer_14_0.0
%WER 18.67 [ 3759 / 20138, 452 ins, 516 del, 2791 sub ] exp/tri2b/decode_nosp_tglarge_dev_clean_2/wer_15_0.0
%WER 22.42 [ 4515 / 20138, 397 ins, 820 del, 3298 sub ] exp/tri2b/decode_nosp_tgmed_dev_clean_2/wer_16_0.0
%WER 24.55 [ 4944 / 20138, 375 ins, 965 del, 3604 sub ] exp/tri2b/decode_nosp_tgsmall_dev_clean_2/wer_16_0.0
%WER 13.39 [ 2696 / 20138, 380 ins, 328 del, 1988 sub ] exp/tri3b/decode_nosp_tglarge_dev_clean_2/wer_16_0.0
%WER 16.34 [ 3291 / 20138, 351 ins, 481 del, 2459 sub ] exp/tri3b/decode_nosp_tgmed_dev_clean_2/wer_16_0.0
%WER 17.88 [ 3600 / 20138, 363 ins, 561 del, 2676 sub ] exp/tri3b/decode_nosp_tgsmall_dev_clean_2/wer_15_0.0
%WER 24.70 [ 4974 / 20138, 430 ins, 907 del, 3637 sub ] exp/tri3b/decode_nosp_tgsmall_dev_clean_2.si/wer_16_0.0
%WER 13.14 [ 2647 / 20138, 377 ins, 309 del, 1961 sub ] exp/tri3b/decode_tglarge_dev_clean_2/wer_15_0.5
%WER 15.87 [ 3195 / 20138, 376 ins, 413 del, 2406 sub ] exp/tri3b/decode_tgmed_dev_clean_2/wer_17_0.0
%WER 17.36 [ 3495 / 20138, 371 ins, 500 del, 2624 sub ] exp/tri3b/decode_tgsmall_dev_clean_2/wer_17_0.0
%WER 23.81 [ 4794 / 20138, 519 ins, 701 del, 3574 sub ] exp/tri3b/decode_tgsmall_dev_clean_2.si/wer_15_0.0
%WER 21.87 [ 4405 / 20138, 629 ins, 504 del, 3272 sub ] exp/tri3b_online/decode_dev_clean_2/wer_17_0.0
>>> >>>...
To unsubscribe from this group and stop receiving emails from it, send an email to kaldi-help+unsubscribe@googlegroups.com.
...
--
Go to http://kaldi-asr.org/forums.html find out how to join
---
You received this message because you are subscribed to the Google Groups "kaldi-help" group.
To unsubscribe from this group and stop receiving emails from it, send an email to kaldi-help+unsubscribe@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
%WER 13.35 [ 2689 / 20138, 318 ins, 491 del, 1880 sub ] exp/chain/tdnn1a_sp/decode_tglarge_dev_clean_2/wer_9_0.5
%WER 16.17 [ 3256 / 20138, 391 ins, 482 del, 2383 sub ] exp/chain/tdnn1c_sp/decode_tglarge_dev_clean_2/wer_10_0.5
%WER 16.06 [ 3234 / 20138, 354 ins, 532 del, 2348 sub ] exp/chain/tdnn1c_sp_online/decode_tglarge_dev_clean_2/wer_11_0.5
exp/chain/tdnn1c_sp: num-iters=30 nj=1..1 num-params=7.0M dim=40+100->2337 combine=-0.039->-0.037 xent:train/valid[19,29,final]=(-0.972,-0.741,-0.718/-1.62,-1.64,-1.60) logprob:train/valid[19,29,final]=(-0.043,-0.035,-0.034/-0.108,-0.117,-0.113)
exp/chain/tdnn1c_sp: num-iters=17 nj=2..5 num-params=7.0M dim=40+100->2353 combine=-0.061->-0.050 xent:train/valid[10,16,final]=(-1.56,-1.17,-1.06/-1.85,-1.53,-1.46) logprob:train/valid[10,16,final]=(-0.081,-0.053,-0.046/-0.120,-0.096,-0.090)
WER dev_clean_2 (tglarge) 10.45 <---- mine is 16.17
[online:] 10.56 <---- mine is 16.06
exp/chain/tdnn1c_sp: num-iters=30 nj=1..1 num-params=7.0M dim=40+100->2337 combine=-0.046->-0.044 xent:train/valid[19,29,final]=(-0.972,-0.741,-0.968/-1.62,-1.64,-1.63) logprob:train/valid[19,29,final]=(-0.043,-0.035,-0.039/-0.108,-0.117,-0.104)
%WER 14.23 [ 2865 / 20138, 328 ins, 436 del, 2101 sub ] exp/chain/tdnn1c_sp/decode_tglarge_dev_clean_2/wer_9_1.0
%WER 14.32 [ 2884 / 20138, 327 ins, 440 del, 2117 sub ] exp/chain/tdnn1c_sp_online/decode_tglarge_dev_clean_2/wer_9_1.0
>>> >> >> >> >