GSMConverter.h
typedef unsigned char u8; /* Exactly one byte (8 bits) */
typedef unsigned short u16; /* Exactly 2 bytes (16 bits) */
typedef unsigned long u32; /* Exactly 4 bytes (32 bits) */
class CGSMConverter
{
public:
enum EConvertDirection { PCM_TO_GSM, GSM_TO_PCM };
CGSMConverter( void );
virtual ~CGSMConverter( void );
AudioError InitConverter( EConvertDirection eDirection );
void TermConverter( void );
// Return number of bytes converted, stored in m_pDestBuffer
u32 Convert( const u8* pData, u32 iDataLength );
inline const u8* GetDestBuffer() { return m_pDestBuffer; }
protected:
HACMSTREAM m_hAcmStream;
WAVEFORMATEX m_PCMStruct;
GSM610WAVEFORMAT m_GSMStruct;
PWAVEFORMATEX m_pSrcFormat;
LPWAVEFORMATEX m_pDstFormat;
private:
u8* m_pSrcBuffer;
u8* m_pDestBuffer;
u32 m_iSrcBufferLength;
u32 m_iDestBufferLength;
};
GSMConverter.cpp
#define PREFERRED_FRAMES_PER_PACKET 4
#define PCM_FRAME_SIZE 320
#define GSM_DOUBLE_FRAME_SIZE 65
#include "StdAfx.h"
#include "GSMConverter.h"
CGSMConverter::CGSMConverter( void )
: m_hAcmStream( NULL )
, m_pSrcFormat( NULL )
, m_pDstFormat( NULL )
, m_pSrcBuffer( NULL )
, m_pDestBuffer( NULL )
, m_iSrcBufferLength( 0 )
, m_iDestBufferLength( 0 )
{
// PCM struct initialization
m_PCMStruct.wFormatTag = WAVE_FORMAT_PCM;
m_PCMStruct.nChannels = 1;
m_PCMStruct.nSamplesPerSec = 8000;
m_PCMStruct.nAvgBytesPerSec = 16000;
m_PCMStruct.nBlockAlign = 2;
m_PCMStruct.wBitsPerSample = 16;
m_PCMStruct.cbSize = 0;
// GSM struct initialization
//m_GSMStruct
m_GSMStruct.wfx.wFormatTag = WAVE_FORMAT_GSM610;
m_GSMStruct.wfx.nChannels = 1;
m_GSMStruct.wfx.nSamplesPerSec = 8000;
m_GSMStruct.wfx.nAvgBytesPerSec = 1625;
m_GSMStruct.wfx.nBlockAlign = 65;
m_GSMStruct.wfx.wBitsPerSample = 0;
m_GSMStruct.wfx.cbSize = 2;
m_GSMStruct.wSamplesPerBlock = 320;
}
CGSMConverter::~CGSMConverter( void )
{
}
AudioError CGSMConverter::InitConverter( EConvertDirection
eDirection )
{
AudioError iInitRes = AudioErrorNone;
if ( PCM_TO_GSM == eDirection )
{
m_iSrcBufferLength = PCM_FRAME_SIZE * PREFERRED_FRAMES_PER_PACKET;
m_iDestBufferLength = GSM_DOUBLE_FRAME_SIZE *
( PREFERRED_FRAMES_PER_PACKET / 2 );
m_pSrcFormat = &m_PCMStruct;
m_pDstFormat = reinterpret_cast< LPWAVEFORMATEX >( &m_GSMStruct );
}
else
{
m_iSrcBufferLength = GSM_DOUBLE_FRAME_SIZE *
( PREFERRED_FRAMES_PER_PACKET / 2 );
m_iDestBufferLength = PCM_FRAME_SIZE * PREFERRED_FRAMES_PER_PACKET;
m_pSrcFormat = reinterpret_cast< LPWAVEFORMATEX >( &m_GSMStruct );
m_pDstFormat = &m_PCMStruct;
}
// Allocating memory for destination and source buffers
m_pSrcBuffer = new u8[ m_iSrcBufferLength ];
m_pDestBuffer = new u8[ m_iDestBufferLength ];
DWORD dwOpenFlags = ACM_STREAMOPENF_NONREALTIME;
MMRESULT openStreamResult = acmStreamOpen(
&m_hAcmStream // stream handle
, NULL // handle to ACM driver
, m_pSrcFormat // source WAVEFORMATEX struct
, m_pDstFormat // destination WAVEFORMATEX struct
, NULL // filter
, NULL // callback
, NULL // instance data
, dwOpenFlags // open flags
);
if ( MMSYSERR_NOERROR != openStreamResult )
{
iInitRes = AudioErrorInitConverter;
}
return iInitRes;
}
u32 CGSMConverter::Convert( const u8* pData, u32 iDataLength )
{
u32 iConvertResult = 0;
DWORD dwConvertFlags = 0;
ACMSTREAMHEADER streamHeader;
memset( &streamHeader, 0, sizeof( ACMSTREAMHEADER ) );
streamHeader.cbStruct = sizeof( ACMSTREAMHEADER );
streamHeader.pbSrc = m_pSrcBuffer;
streamHeader.cbSrcLength = m_iSrcBufferLength;
streamHeader.cbSrcLengthUsed = iDataLength;
streamHeader.dwSrcUser = 0;
streamHeader.pbDst = m_pDestBuffer;
streamHeader.cbDstLength = m_iDestBufferLength;
streamHeader.cbDstLengthUsed = 0;
streamHeader.dwDstUser = 0;
MMRESULT streamPrepareHeader = acmStreamPrepareHeader( m_hAcmStream,
&streamHeader, 0 );
if ( MMSYSERR_NOERROR == streamPrepareHeader )
{
MMRESULT streamConvertRes = acmStreamConvert( m_hAcmStream,
&streamHeader, dwConvertFlags );
if ( MMSYSERR_NOERROR == streamConvertRes )
{
iConvertResult = streamHeader.cbDstLengthUsed;
}
}
if ( streamHeader.fdwStatus & ACMSTREAMHEADER_STATUSF_PREPARED )
{
MMRESULT streamUnprepareHeader =
acmStreamUnprepareHeader( m_hAcmStream, &streamHeader, 0 );
}
return iConvertResult;
}
void CGSMConverter::TermConverter( void )
{
// Releasing memory allocated for destination and source buffers
delete [] m_pSrcBuffer;
delete [] m_pDestBuffer;
// Closing stream
MMRESULT closeStreamResult = acmStreamClose( m_hAcmStream, 0 );
}
I don't do Windows Mobile or C but apart from thr buffer sizes it
looks OK to me. I presume that the conversion code is called multiple
times in a loop. As so there is no need to Prepare & UnPrepare the
header for each converted frame. Unles you are increasing the
specified memory you can use the same Prepared header throught.
Your buffer sizing appears correct, but while you've got the correct
relationship, the calculation appears "magical". Basically 320 samples
of PCM convert to 1 block of GSM. PCM (Mono 16 bits/sample) uses 2
bytes per block (sample). GSM uses 65 bytes / block. So 640 PCM bytes
convert to 65 bytes of GSM.
So the calculations are ...
GSMBytesPerBuffer = GSMBytesPerGSMBlock * GSMBlocksPerBuffer
(I thinkPreferredFramesPerPacket are GSMBlocksPerBuffer)
260 = 65 * 4
PCMBytesPerBuffer = PCMBytesPerPCMBlock * PCMBlocksPerGSMBlock *
GSMBlocksPerBuffer
(I think PCMBlocksPerGSMBlock are PCMSamplesPerGSMBlock)
2560 = 2 * 320 * 4
... or 130 and 1280 with only 2 GSMBlocksPerBuffer.
Alan Lloyd
Hi Alan!
Thank you for your answer.
I already found an error, It was ( as usually ) small logical error,
which I doesn't seen.
About calculating buffer sizes:
if I right understand two GSM frames, haves a size 65 bytes ( 32,5
bytes - one GSM frame )
So, for 4 PCM frames buffer size = 320 * 4 = 1280 bytes
and for 4 GSM frames buffer size = 65 * ( 4 / 2 ) = 130 bytes
I thing you missed " / 2 " in my code for calculating GSM buffer size
Thank you again for focus attention.
My point was that you got the correct answer using the wrong
calculation. GSM has a block (frame) of 65 bytes and is only mono. You
would get the same sized GSM output whether your input of a constant
number of samples (ie same time of record) were stereo or mono PCM.
So your "double-frame" concept which implies a GSM block of 32.5 bytes
for mono is a fallacy. Your calculation should have used a multiple of
2 (for PCM bytes/block) when calculating the PCM buffer, not using a
false divisor of 2 on the GSM buffer calculation.
Is your ring finger shorter than your index finger ? <g>
Alan Lloyd
Sorry Alan, but I still don't understand.
I have mono PCM, I want to convert it to mono GSM.
I thought, that 65 bytes it's size of two GSM frames, it's wrong?
One 320 PCM = One X size GSM frame, so what meaning of X?
Sorry Alan for dullness, I just study.
Thank you.
GSM is always & only mono. A block (maybe the same as your frame) is
the smallest chunk of the format.
For PCM a block is one sample and may be 8 or 16 bits and may be mono
(one channel) or stereo (two channels), or even more in "surround
sound". So a PCM block is ...
BitsPerSample / 8 * ChannelCount bytes, and is 4 BytesPerBlock for 16-
bit Stereo, 2 BytesPerBlock for 16-bit Mono
For GSM each block is 65 bytes and always mono. Each block contains
320 samples of PCM. So a GSM block is always 65 bytes of GSM data
derived from 320 blocks of PCM data.
So it holds 320 * PCMBytesPerBlock of PCM data. Which for 16-bit mono
PCM will be 640 bytes.
If you deal in two GSM blocks per buffer that means that 1280 buffer
bytes of PCM 16-bit Mono would fit into 130 bytes of GSM buffer.
It is inaccurate to talk about a GSM_DOUBLE_FRAME, there is only a
GSM_BLOCK of 65 bytes. Dividing the PREFERRED_FRAMES_PER_PACKET by 2
instead of including the PCM BytesPerBlock in the PCM buffer
calculation, gets the right answer using the wrong method.
There is no concept of "frames" in either PCM or GSM, only blocks. One
could, I suppose, talk about frames in GSM because each block (could
be a frame) holds a multiplicity of PCM blocks. But it is not a
conventionally descriptive term.
If you put the correct intermediate elements in your calculation they
work out to be in the correct units (ie bytes/buffer). the terms you
used do not work out to that correct unit.
Using correct terminology and clear coding & calculation does not, I
suppose, matter much as of now. But when you have learnt more about
processes, and six-months later you look at your past code, you will
say "What on earth was I doing in that piece of code". That's the
result of writng code which has poor clarity.
Alan Lloyd
> For GSM each block is 65 bytes and always mono. Each block contains
> 320 samples of PCM. So a GSM block is always 65 bytes of GSM data
> derived from 320 blocks of PCM data.
Actually Galian is correct about the GSM frame size. Each GSM frame
represents 160 PCM samples (20ms) at a frame rate of 50 frames a second.
The compressed data is packed into 260 bits, or 32.5 bytes. In PC land we
usually work with double frames as the .5 is tough to manage.
--
http://www.chrisnet.net/code.htm
[MS MVP for DirectShow / MediaFoundation]
I'm probably just repeating what you were stating - interesting to me
nonetheless, thanks!
Mark
> That may be true of GSM in general, but the GSM ACM codec included with the
> MS operating systems has a block size of 65 bytes which is always 320 samples.
> In other words, ACM doesn't let us work with 260-bit blocks.
>
> I'm probably just repeating what you were stating - interesting to me
> nonetheless, thanks!
Exactly. Hence my statement "In PC land we usually work with double frames
as the .5 is tough to manage.", a double frame is 320 samples.
I didn't know that, and it confirms my uncertainty of the definition
of "frames".
A "block" I can understand as the smallest chunk of data in the
subject format.
The logical definition of "frame" would be the chunk of many blocks a
particular application is using.
But is that so, and how does it fit with a situation (as in PC GSM)
where the smallest chunk of data one _can_ use consists of two-blocks.
And in domestic interlaced TV a frame is half a block of visual data.
And if "block" & "frame" is interchangeable, why have the two.
Alan Lloyd
> I didn't know that, and it confirms my uncertainty of the definition
> of "frames".
>
> A "block" I can understand as the smallest chunk of data in the
> subject format.
>
> The logical definition of "frame" would be the chunk of many blocks a
> particular application is using.
>
> But is that so, and how does it fit with a situation (as in PC GSM)
> where the smallest chunk of data one _can_ use consists of two-blocks.
> And in domestic interlaced TV a frame is half a block of visual data.
>
> And if "block" & "frame" is interchangeable, why have the two.
Frame refers to the transmission size where as block refers to the smallest
representable data size. So for the PC GSM one block = 2 frames.
For interlaced video it gets a little confusing as a frame is 2 blocks,
i.e. 2 fields.
P.S. Sorry for my bad English.