I am looking for a range coder because it is claimed to be free.
I found the carryless range coder of Dmitry Subbotin at the following url:
http://cpansearch.perl.org/src/SALVA/Compress-PPMd-0.10/Coder.hpp
The source code is at the end of this message.
I must do the range coder in Delphi.
Unfortunately, I don't get use to program in C/C++ and there are several
things I would like to understand.
1.
#define DO(n) for (int _=0; _<n; _++)
...
void FinishEncode () { DO(4) OutByte(low>>24), low<<=8; }
is this the same thing as the following:
void FinishEncode ()
{
for ( int i=0; i<4 ; i++)
{
OutByte(low>>24) ;
low = low <<8 ;
}
}
2.
assert(cumFreq+freq<totFreq && freq && totFreq<=BOT);
is this the same thing as:
if (! ( (cumFreq+freq<totFreq) &&
(freq<>0) &&
(totFreq<=BOT)
)
)
{
printf("error !") ;
exit(0) ;
}
3.
in "void Encode" I don't understand the line:
while ((low ^ low+range)<TOP || range<BOT && ((range= -low & BOT-1),1))
OutByte(low>>24), range<<=8, low<<=8;
the first thing I don't understand is this:
((range= -low & BOT-1),1))
what does represent this ",1" ???
second thing:
is it the same thing as the following ?
while ((low ^ low+range)<TOP || range<BOT && ((range= -low & BOT-1),1))
{
OutByte(low>>24) ;
range = range << 8 ;
low = low << 8;
}
Thank you for your help
/****************************************************************************
* This file is part of PPMd project
*
* Contents: 'Carryless rangecoder' by Dmitry Subbotin
*
* Comments: this implementation is claimed to be a public domain
*
****************************************************************************/
/********************** Original text
*************************************
//////// Carryless rangecoder (c) 1999 by Dmitry Subbotin ////////
typedef unsigned int uint;
typedef unsigned char uc;
#define DO(n) for (int _=0; _<n; _++)
#define TOP (1<<24)
#define BOT (1<<16)
class RangeCoder
{
uint low, code, range, passed;
FILE *f;
void OutByte (uc c) { passed++; fputc(c,f); }
uc InByte () { passed++; return fgetc(f); }
public:
uint GetPassed () { return passed; }
void StartEncode (FILE *F) { f=F; passed=low=0; range= (uint) -1; }
void FinishEncode () { DO(4) OutByte(low>>24), low<<=8; }
void StartDecode (FILE *F) { passed=low=code=0; range= (uint) -1;
f=F; DO(4) code= code<<8 | InByte();
}
void Encode (uint cumFreq, uint freq, uint totFreq) {
assert(cumFreq+freq<totFreq && freq && totFreq<=BOT);
low += cumFreq * (range/= totFreq);
range*= freq;
while ((low ^ low+range)<TOP || range<BOT && ((range= -low & BOT-1),1))
OutByte(low>>24), range<<=8, low<<=8;
}
uint GetFreq (uint totFreq) {
uint tmp= (code-low) / (range/= totFreq);
if (tmp >= totFreq) throw ("Input data corrupt"); // or force it to
return
return tmp; // a valid value :)
}
void Decode (uint cumFreq, uint freq, uint totFreq) {
assert(cumFreq+freq<totFreq && freq && totFreq<=BOT);
low += cumFreq*range;
range*= freq;
while ((low ^ low+range)<TOP || range<BOT && ((range= -low & BOT-1),1))
code= code<<8 | InByte(), range<<=8, low<<=8;
}
};
*****************************************************************************/
Yes. Pretty much obfuscated. "_" is a valid variable name. "i" would
have been better.
> 2.
> assert(cumFreq+freq<totFreq && freq && totFreq<=BOT);
>
>
> is this the same thing as:
> if (! ( (cumFreq+freq<totFreq) &&
> (freq<>0) &&
> (totFreq<=BOT)
> )
> )
> {
> printf("error !") ;
> exit(0) ;
> }
Approximately, except that "assert" will also cause a core dump and will
print a message that is implementation defined.
>
>
> 3.
> in "void Encode" I don't understand the line:
> while ((low ^ low+range)<TOP || range<BOT && ((range= -low & BOT-1),1))
> OutByte(low>>24), range<<=8, low<<=8;
>
> the first thing I don't understand is this:
> ((range= -low & BOT-1),1))
>
> what does represent this ",1" ???
More obfuscation. The result of the comma operator is the second
operand, the first is ignored. Thus ((range= -low & BOT-1),1)) has a
side-effect of assigning the value of (-low & BOT-1) to the variable
"range", and has the value of "1" (the second operand of the comma
operator). IOW, the assignment happens as part of the conditional
evaluation of the while-loop, but the actual value of the assignment is
ignored and the result is always true. Terrifying programming style IMHO.
>
> second thing:
> is it the same thing as the following ?
>
> while ((low ^ low+range)<TOP || range<BOT && ((range= -low & BOT-1),1))
> {
> OutByte(low>>24) ;
> range = range << 8 ;
> low = low << 8;
> }
Yes. Apparently, some people believe that if you write more condensed
code, the output will be more optimized. (-;
Greetings,
Thomas
thank you for your answer
>>
>> 3.
>> in "void Encode" I don't understand the line:
>> while ((low ^ low+range)<TOP || range<BOT && ((range= -low &
>> BOT-1),1))
>> OutByte(low>>24), range<<=8, low<<=8;
>>
>> the first thing I don't understand is this:
>> ((range= -low & BOT-1),1))
>>
>> what does represent this ",1" ???
>
> More obfuscation. The result of the comma operator is the second
> operand, the first is ignored. Thus ((range= -low & BOT-1),1)) has a
> side-effect of assigning the value of (-low & BOT-1) to the variable
> "range", and has the value of "1" (the second operand of the comma
> operator). IOW, the assignment happens as part of the conditional
> evaluation of the while-loop, but the actual value of the assignment is
> ignored and the result is always true. Terrifying programming style IMHO.
>
if I well understood, the "range=..." can be placed in the brackets, and
the result is only 1:
while ((low ^ low+range)<TOP || range<BOT && 1)
that is to say
while ( ((low ^ low+range)<TOP) || (range<BOT) )
so the whole thing becomes:
while ( ((low ^ low+range)<TOP) || (range<BOT) )
{
range= -low & (BOT-1) ;
OutByte(low>>24) ;
range = range << 8 ;
low = low << 8;
}
am I right ?
thank you
No, you forgot the shortcut evaluation of C. If the left-hand of ||
evaluates to non-zero, the right-hand side is not evaluated. If the left
hand side of && evaluates to zero, the right hand side is not evaluated.
Thus, the loop is repeated if either:
low ^ low + range < top
or range < BOT,
but only in the latter case, range is set to -low & (BOT-1) because
otherwise shortcut evaluation bypasses the evaluation of the third
condition.
So long,
Thomas