The following source code shows a simple console program that takes wav file
to reco into text.
however the statement
" hr = cpRecoResult->GetPhrase((SPPHRASE**)&pPhrase); "
crashes the program with segmentation violation error,
any one knows why ?
Thanks
Hai
I've removed all status check and comments to make it shorter
================
#include "stdafx.h"
#include <windows.h>
#include <sapi.h>
#include <spdebug.h>
// SAPI Header Files
#include <sphelper.h>
#include <spddkhlp.h>
int main(int argc, char* argv[])
{
HRESULT hr;
CComPtr<ISpStream> cpInputStream;
CComPtr<ISpRecognizer> cpRecognizer;
CComPtr<ISpRecoContext> cpRecoContext;
CComPtr<ISpRecoGrammar> cpRecoGrammar;
CComPtr<ISpRecoResult> cpRecoResult;
CComPtr<ISpPhrase> pPhrase;
WCHAR *pwszText;
CoInitialize(NULL);
hr = cpInputStream.CoCreateInstance(CLSID_SpStream);
CSpStreamFormat sInputFormat(SPSF_22kHz8BitMono, &hr);
hr = cpInputStream->BindToFile(L"test.wav",
SPFM_OPEN_READONLY,
&sInputFormat.FormatId(),
sInputFormat.WaveFormatExPtr(),
SPFEI_ALL_EVENTS); // SPFEI_ALL_EVENTS
hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
hr = cpRecognizer->SetInput(cpInputStream, TRUE);
hr = cpRecognizer->CreateRecoContext(&cpRecoContext);
hr = cpRecoContext->CreateGrammar(NULL, &cpRecoGrammar);
hr = cpRecoGrammar->LoadDictation(NULL,SPLO_STATIC);
hr = cpRecoContext->SetInterest(SPFEI(SPEI_RECOGNITION) |
SPFEI(SPEI_END_SR_STREAM), SPFEI(SPEI_RECOGNITION) |
SPFEI(SPEI_END_SR_STREAM));
hr = cpRecoContext->SetNotifyWin32Event();
hr = cpRecoGrammar->SetDictationState(SPRS_ACTIVE);
BOOL fEndStreamReached = FALSE;
while (!fEndStreamReached)
{
hr = cpRecoContext->WaitForNotifyEvent(INFINITE);
CSpEvent spEvent;
while (!fEndStreamReached && S_OK == spEvent.GetFrom(cpRecoContext))
{
switch (spEvent.eEventId)
{
case SPEI_RECOGNITION:
hr = cpRecoResult->GetPhrase((SPPHRASE**)&pPhrase);
hr = pPhrase->GetText(SP_GETWHOLEPHRASE,
SP_GETWHOLEPHRASE, TRUE, &pwszText, NULL);
printf("result ==> %s\n", pwszText);
break;
case SPEI_END_SR_STREAM:
fEndStreamReached = TRUE;
break;
}
spEvent.Clear();
}
}
hr = cpRecoGrammar->SetDictationState(SPRS_INACTIVE);
hr = cpRecoGrammar->UnloadDictation();
hr = cpInputStream->Close();
cpRecognizer.Release();
CoUninitialize();
return 0;
}
===============================
and then one after processing that result to release the result
cpRecoResult.Release();
--
This posting is provided "AS IS" with no warranties, and confers no rights.
"Hai Xu" <h...@macrosoftinc.com> wrote in message
news:ehuXTOl2...@TK2MSFTNGP11.phx.gbl...
Any idea why ?
Thanks
Hai
"Dave Wood [MS]" <dave...@online.microsoft.com> wrote in message
news:O3mvEfs2...@tk2msftngp13.phx.gbl...
--
This posting is provided "AS IS" with no warranties, and confers no rights.
"Hai Xu" <h...@macrosoftinc.com> wrote in message
news:eXnFQyt2...@tk2msftngp13.phx.gbl...
Thank you very much for your great help.
Well, I now get some result, however, so terrible that makes me think this
may not be good way to go with.
Also, WaitForNotifyEvent(INFINITE); keeps returning S_OK that makes
spEvent.GetFrom() crashed, do you
have any suggestion for me to do this simple task ?
The basic job I want to do is to take a wave file, then reco its content to
text.
Is my code mostly OK, or where did I miss anything ?
Thanks again
Hai
"Dave Wood [MS]" <dave...@online.microsoft.com> wrote in message
news:OzblH$t2DHA...@TK2MSFTNGP09.phx.gbl...
==================
// WaveASR.cpp : take a wav file
// reco the content into text
// print out the result into the console
#include <windows.h>
// SAPI Header Files
#include <sapi.h>
#include <spdebug.h>
#include <sphelper.h>
#include <spddkhlp.h>
int main(int argc, char* argv[])
{
HRESULT hr;
printf("Start wav file recognizing ...\n");
CComPtr<ISpStream> cpInputStream;
CComPtr<ISpRecognizer> cpRecognizer;
CComPtr<ISpRecoContext> cpRecoContext;
CComPtr<ISpRecoGrammar> cpRecoGrammar;
CComPtr<ISpRecoResult> cpRecoResult;
CComPtr<ISpPhrase> pPhrase;
WCHAR *pwszText;
CoInitialize(NULL);
// Create basic SAPI stream object
// NOTE: The helper SpBindToFile can be used to perform the following
operations
hr = cpInputStream.CoCreateInstance(CLSID_SpStream);
// Check hr
// set wav format as 22kHz, 16-bit, Mono
CSpStreamFormat sInputFormat(SPSF_22kHz16BitMono, &hr);
// setup stream object with a wav file name
// for read-only access, since it will only be access by the SR engine
// if wav file is in other directory, use L"C:\\temp\\test.wav"
// L specifies unicode, same to (LPCWSTR)
hr = cpInputStream->BindToFile(L"test.wav",
SPFM_OPEN_READONLY,
&sInputFormat.FormatId(),
sInputFormat.WaveFormatExPtr(),
SPFEI_ALL_EVENTS);
if(hr == S_OK)
{
}
else if(hr == E_INVALIDARG)
{
printf("E_INVALIDARG\n");
}
else if(hr == E_OUTOFMEMORY)
{
printf("E_OUTOFMEMORY\n");
}
else if(hr == STG_E_FILENOTFOUND)
{
printf("STG_E_FILENOTFOUND\n");
}
else if(hr == SPERR_ALREADY_INITIALIZED)
{
printf("SPERR_ALREADY_INITIALIZED\n");
}
else if(FAILED(hr))
{
printf("cpInputStream->BindToFile failed !\n");
}
// Create in-process speech recognition engine
hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
// Check hr
if(hr == S_OK)
{
}
else if(hr == E_INVALIDARG)
{
printf("E_INVALIDARG\n");
}
else if(hr == SPERR_ENGINE_BUSY)
{
printf("SPERR_ENGINE_BUSY\n");
}
else if(FAILED(hr))
{
printf("cpRecognizer.CoCreateInstance failed !\n");
}
// connect wav input to recognizer
// SAPI will negotiate mismatched engine/input audio formats
// using system audio codecs, so second parameter is not
// important - use default of TRUE
hr = cpRecognizer->SetInput(cpInputStream, TRUE);
// Create recognition context to receive events
hr = cpRecognizer->CreateRecoContext(&cpRecoContext);
// Create grammar, and load dictation
// ignore grammar ID for simplicity's sake
// NOTE: Voice command apps would load CFG here
hr = cpRecoContext->CreateGrammar(NULL, &cpRecoGrammar);
hr = cpRecoGrammar->LoadDictation(NULL,SPLO_STATIC);
// check for recognitions and end of stream event
hr = cpRecoContext->SetInterest(SPFEI(SPEI_RECOGNITION) |
SPFEI(SPEI_END_SR_STREAM), SPFEI(SPEI_RECOGNITION) |
SPFEI(SPEI_END_SR_STREAM));
// use Win32 events for command-line style application
hr = cpRecoContext->SetNotifyWin32Event();
// activate dictation, and begin recognition
hr = cpRecoGrammar->SetDictationState(SPRS_ACTIVE);
// Check hr
if(hr == S_OK)
{
}
else if(hr == E_INVALIDARG)
{
printf("E_INVALIDARG\n");
}
else if(hr == SP_STREAM_UNINITIALIZED)
{
printf("SP_STREAM_UNINITIALIZED\n");
}
else if(hr == SPERR_UNINITIALIZED)
{
printf("SPERR_UNINITIALIZED\n");
}
else if(hr == SPERR_UNSUPPORTED_FORMAT)
{
printf("SPERR_UNSUPPORTED_FORMAT\n");
}
else if(FAILED(hr))
{
printf("cpRecoGrammar->SetDictationState(SPRS_ACTIVE) failed !\n");
}
// while events occur, continue processing
// timeout should be greater than the audio stream length,
// or a reasonable amount of time expected to pass before
// no more recognitions are expected in an audio stream
// INFINITE is a little risky for hanging the program
BOOL fEndStreamReached = FALSE;
while (!fEndStreamReached && S_OK ==
cpRecoContext->WaitForNotifyEvent(INFINITE)) // set time out 60 seconds here
{
CSpEvent spEvent;
// pull all queued events from the reco context's event queue
while (!fEndStreamReached && S_OK == spEvent.GetFrom(cpRecoContext))
{
// Check event type
switch (spEvent.eEventId)
{
// speech recognition engine recognized some audio
case SPEI_RECOGNITION:
// get result in even queue
cpRecoResult = spEvent.RecoResult();
//hr = cpRecoResult->GetPhrase((SPPHRASE**)&pPhrase);
// get the phrase's entire text string, including replacements
//hr = pPhrase->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE,
&pwszText, NULL);
hr = cpRecoResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE,
&pwszText, NULL);
// get the phrase's first 2 words, excluding replacements
//hr = pPhrase->GetText(pPhrase->Rule.ulFirstElement, 2, FALSE,
&pwszText, NULL);
//int nLen = WideCharToMultiByte(CP_ACP, 0, pwszText, -1, 0, 0, 0, 0);
wprintf(L"result ==> %s\n", pwszText);
cpRecoResult.Release();
break;
// end of the wav file was reached by the speech recognition engine
case SPEI_END_SR_STREAM:
fEndStreamReached = TRUE;
break;
}
// clear any event data/object references
spEvent.Clear();
}// END event pulling loop - break on empty event queue OR end stream
}// END event polling loop - break on event timeout OR end stream
// deactivate dictation
hr = cpRecoGrammar->SetDictationState(SPRS_INACTIVE);
// unload dictation topic
hr = cpRecoGrammar->UnloadDictation();
// close the input stream, since we're done with it
// NOTE: smart pointer will call SpStream's destructor,
// and consequently ::Close, but code may want to check
// for errors on ::Close operation
hr = cpInputStream->Close();
cpRecognizer.Release();
CoUninitialize();
return 0;
}
==================
Hai
"Hai Xu" <h...@macrosoftinc.com> wrote in message
news:eGpNPau2...@tk2msftngp13.phx.gbl...