Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

Rosario: wildcard matching exercise: solution 1

201 views
Skip to first unread message

Rosario19

unread,
Aug 17, 2018, 12:49:14 AM8/17/18
to
On Sat, 11 Aug 2018 15:49:53 +0300, Anton Shepelev wrote:
>Hello, all
>
>I invite all interested parties to participate in a small
>programming exercise: write a function that tests whether a
>string matches a standard wildcard pattern. The only
>special characters in a pattern are '*' and '?', with the
>conventional semantics of any string and any character. The
>other characters must match exactly in a case-sensitive
>manner.
>
>The prototype is:
>
> char /* returns 1 upon match and 0 upon mismatch */
> wcmatch
> ( char const * const pat, /* the pattern, e.g. "*.txt" */
> char const * const text /* the string to match, e.g. "reame.txt" */
> )
>
>I will not provide any futher requirements to encourage
>variety in approaches from different participants.
>
>In order to avoid mutual influence, let us abstain from
>posting solutions until Monday. It would be ideal if you
>wrote code entires by yourself without consulting existing
>implementations.

the function would be wcmatch()
The problem is can be bugged, and it can be one couple of string
can make that function exponential (so it would be better find them)

The bug can be the function for some type of input begin one infinite
loop or return the wrong result (due that i don't know what happen
100%).

The questions are:
1) are 900 call itself too much few, for preventing stack overflow?
2) what are the strings that make that function behave as one
exponential functions?
3) there is some test case in what that function return wrong result?
4) someone see some bug?

for the question 3 here it passed all test of the function
testString()

#include <stdio.h>
#include <time.h>

#define F for
#define R return
#define P printf
#define GG(a,b) if(a)goto b

/*
Input: a, b pointer to C array 0 terminated (strings)
Search the string "a" that can contain too wildcard:
"*" as generic substring that can be null too
"?" as generic substring of 1 char
in the string "b";
if error it return -1
(to many recursive call or too long strings or too much time)
if "a" match "b", it return 1 else
if "a" do not match "b", it return 0
NB: For Big O it could be exponential
in the wrost case (if someone find the right string i don't know)
If find one calculus take> 10 seconds time => return -1
*/
wcmatch(char* a, char* b)
{static time_t ti, tf;
static int nc,r; // nc means number of call of function
unsigned ns;
char *pa, *pb, ch;

if(nc==0)tf=ti=time(0);
++nc; // if recursivly call >900 times =>return -1
if(nc>= 900){re: --nc;R -1;}// return -1 (error)
if(a== 0){r1: --nc;R 1;}// return match result
if(b== 0){r0: --nc;R 0;}// return not match result
// begin code workaround for case input
// generate exponentials behaviour
if(nc==1) // only at start
{F(pa=a;*pa;++pa);F(pb=b;*pb;++pb);//here *pa==*pb==0
F(;pa>a&&pb>b;--pa,--pb)
{if(*pa=='*')
{F(;pa>a&&(*pa=='*'||*pa=='?');--pa)
if(*pa=='?'){GG(pb<=b,r0);--pb;}
GG(pa==a&&*a=='*',r1);
GG(pa==a&&*a=='?',r1);
GG(pa==a&&*b==*pa,r1);
F(ch=*pa;pb>b&&ch!=*pb;--pb);
if(pb==b&&*b==ch) continue;
GG(pb<=b, r0);
continue;
}
if(*pa!='?')
GG(*pa!=*pb, r0);
}
}
// end code workaround for case input
// generate exponentials behaviour
tf=time(0); //if calculate too much(e.g.exponential)return -1
GG(difftime(tf,ti)>10.0, re);//10 seconds of calculum:too much
F(ns=0;;++a,++b,++ns)
{//P("%c %c, %d %d\n", *a, *b, *a, *b);
GG(ns>=0xFFFFFF, re); //string too much long R -1
if(*a=='*'){F(;*a=='*';++a); --a;} //if*a=='*'goto the last '*'
if(!*b){GG(*a=='*'&&a[1]==0, r1);--nc;R !*a;}// if *b==0 =>exit
switch(*a)
{case '?': break;
case '*': ++a; GG(*a==0, r1); //in the case of "..*" => match
F(ch=*a,pb=b,pa=0;*pb;++pb) //search ch in b until\0
{if(*pb==ch||ch=='?')
{pa=pb;
if(a[1])
{GG((r=wcmatch(a+1,pb+1))==1, r1);
GG(r==-1,re); //stack overflow
} //exponential calc
}
}
GG(pa==0, r0); // ch not found
b=pa;
break; // [here *a!=0]
default: GG(*a!=*b, r0); //here *b!=0;
} //if*a==0=>*a=0!=*b return 0
}
}

testString(void)
{char *a1[]={"","" ,"*","?","a*" , "*n" ,"*n*n" ,"*a*n*t*o*n*"
,"******","*","**","**","a**","a*a*a*a*a*a*a*a*a*a*a*a*a*a*c"
,0};
char *b1[]={"","1","" ,"" ,"anton", "anton","anton","anton"
,"anton" ,"*","**","*" ,
"a","aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab",0};
int c1[]={ 1, 0, 1, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1,
0,0};
char *a2[]={"123", "1?3", "1??", "?", "??", "?1", "a*", "a*", "a*b",
"?*a", "???*???", 0};
char *b2[]={"123", "123", "145", "1", "34", "31", "a" , "ab",
"acjgofb", "1*a", "123123" , 0};
char *b3[]={"129", "1234", "14", "", "2", "111", "b" , "n" ,
"acjgofbf", "1ab", "12312" , 0};
char *a3[]={"a*?b","a","?","*?" ,"ab", "?b","b?","*c" ,"c*","a*b"
,"a*b" , "?*", "*s?" ,0};
char *b4[]={"acb" ,"a","1","huhu","ab", "1b","b5","c"
,"c1","a2iiibssb","a123b", "12", "abcs?",0};
char *b5[]={"ab" ,"" ,"" ,"" ,"abv","b", "b" ,"acb","1c","a"
,"abc" , "" , "s" ,0};
int i;

for(i=0;a1[i];++i)
{if(c1[i]!=wcmatch(a1[i], b1[i]))
{printf("Error: [%s][%s]->%d ok=%d\n", a1[i], b1[i],
wcmatch(a1[i], b1[i]),c1[i]);
return 0;
}
}
for(i=0;a2[i];++i)
{if(1!=wcmatch(a2[i], b2[i]))
{printf("Error: [%s][%s]->%d ok=%d\n", a2[i], b2[i],
wcmatch(a2[i], b2[i]),1);
return 0;
}
}
for(i=0;a2[i];++i)
{if(0!=wcmatch(a2[i], b3[i]))
{printf("Error: [%s][%s]->%d ok=%d\n", a2[i], b3[i],
wcmatch(a2[i], b3[i]),0);
return 0;
}
}

for(i=0;a3[i];++i)
{if(1!=wcmatch(a3[i], b4[i]))
{printf("Error: [%s][%s]->%d ok=%d\n", a3[i], b4[i],
wcmatch(a3[i], b4[i]),1);
return 0;
}
}

for(i=0;a3[i];++i)
{if(0!=wcmatch(a3[i], b5[i]))
{printf("Error: [%s][%s]->%d ok=%d\n", a3[i], b5[i],
wcmatch(a3[i], b5[i]),0);
return 0;
}
}
printf("TestString: All ok\n");
return 1;
}

main()
{char *pa, *pb, a[1024], b[1024];
testString();
//P("Inserisci I stringa > "); pa=gets(a); if(pa==0)R 0;
//P("Inserisci II stringa > "); pb=gets(b); if(pb==0)R 0;
//P("[%s][%s]=%d\n", pa, pb, wcmatch(pa, pb));
R 0;
}


Rosario19

unread,
Aug 17, 2018, 3:32:01 AM8/17/18
to
On Fri, 17 Aug 2018 06:54:17 +0200, Rosario19 <R...@invalid.invalid>
wrote:

>wcmatch(char* a, char* b)
>{static time_t ti, tf;
> static int nc,r; // nc means number of call of function
> unsigned ns;
> char *pa, *pb, ch;
>
> if(nc==0)tf=ti=time(0);
> ++nc; // if recursivly call >900 times =>return -1
> if(nc>= 900){re: --nc;R -1;}// return -1 (error)
> if(a== 0){r1: --nc;R 1;}// return match result
> if(b== 0){r0: --nc;R 0;}// return not match result
> // begin code workaround for case input
> // generate exponentials behaviour
> if(nc==1) // only at start
> {F(pa=a;*pa;++pa);F(pb=b;*pb;++pb);//here *pa==*pb==0

better

F(pa=a,ns=0;*pa&&ns<0xFFFFFF;++pa,++ns);
GG(ns>=0xFFFFFF,re);
F(pb=b,ns=0;*pb&&ns<0xFFFFFF;++pb,++ns);//here *pa==*pb==0
GG(ns>=0xFFFFFF,re);

Rosario19

unread,
Aug 17, 2018, 5:34:56 AM8/17/18
to
On Fri, 17 Aug 2018 06:54:17 +0200, Rosario19 wrote:

>the function would be wcmatch()

the last version..., there is something rendundance (cheked 2 or more
time the same)

#include <stdio.h>
#include <time.h>

#define F for
#define R return
#define P printf
#define GG(a,b) if(a)goto b

/*
Input: a, b pointer to C array 0 terminated (strings)
Search the string "a" that can contain too wildcard:
"*" as generic substring that can be null too
"?" as generic substring of 1 char
in the string "b";
if error it return -1
(to many recursive call or too long strings or too much time)
if "a" match "b", it return 1 else
if "a" do not match "b", it return 0
NB: Errors possible 98%; For Big O it could be O(2^n)exponential
in the wrost case (if someone find the right string i don't know)
If find one calculus take > 10 seconds time => return -1
*/
wcmatchRosario(char* a, char* b)
{static time_t ti, tf;
static int nc,r; // nc means number of call of this function
unsigned ns;
char *pa, *pb, ch;

if(nc==0)tf=ti=time(0); //nc==0 at start
++nc; // if recursivly call >900 times => error
if(nc>= 900){re: --nc;R -1;}// return error
if(a== 0){r1: --nc;R 1;}// return match result
if(b== 0){r0: --nc;R 0;}// return not match result
// begin code workaround on case input
// generate exponentials behaviour
if(nc==1) // only at start
{F(pa=a,ns=0;*pa&&ns<0xFFFFFF;++pa,++ns);GG(ns>=0xFFFFFF,re);
F(pb=b,ns=0;*pb&&ns<0xFFFFFF;++pb,++ns);GG(ns>=0xFFFFFF,re);
// at end *pa==*pb==0
F(;pa>a&&pb>b;--pa,--pb)
{if(*pa=='*')
{F(;pa>a&&(*pa=='*'||*pa=='?');--pa)
if(*pa=='?'){GG(pb<=b,r0);--pb;}
GG(pa==a&&*a=='*',r1);
GG(pa==a&&*a=='?',r1);
GG(pa==a&&*b==*pa,r1);
F(ch=*pa;pb>b&&ch!=*pb;--pb);
if(pb==b&&*b==ch) continue;
GG(pb<=b, r0);
continue;
}
if(*pa!='?')
GG(*pa!=*pb, r0);
}
}
// end code workaround on case input
// generate exponentials behaviour
tf=time(0); // if calculate too much(e.g. exponential)return -1
GG(difftime(tf,ti)>10.0, re); //10 seconds of calculum:too much
F(ns=0;;++a,++b,++ns)
{//P("%c %c, %d %d\n", *a, *b, *a, *b);
GG(ns>=0xFFFFFF, re); //string too much long R -1
if(*a=='*'){F(;*a=='*';++a); --a;} //if*a=='*'goto the last '*'
if(!*b){GG(*a=='*'&&a[1]==0, r1);--nc;R !*a;}// if *b==0 =>exit
switch(*a)
{case '?': break;
case '*': ++a; GG(*a==0, r1); //in the case of "..*" => match
F(ch=*a,pb=b,pa=0;*pb;++pb) //search ch in b until\0
{if(*pb==ch||ch=='?')
{pa=pb;
if(a[1])
{
GG((r=wcmatchRosario(a+1,pb+1))==1, r1);
GG(r==-1,re); //stack overflow
} //exponential calc
}
}
GG(pa==0, r0); // ch not found
b=pa;
break; // [here *a!=0]
default: GG(*a!=*b, r0); //here *b!=0;
} //if*a==0=>*a=0!=*b return not match
}
}

Rosario19

unread,
Aug 17, 2018, 10:37:00 AM8/17/18
to
On Fri, 17 Aug 2018 11:39:59 +0200, Rosario19 <R...@invalid.invalid>
wrote:

>On Fri, 17 Aug 2018 06:54:17 +0200, Rosario19 wrote:
>
>>the function would be wcmatch()

yes it is possible i don't know what happen...
yes it is easy to criticize

#include <stdio.h>
#include <time.h>

#define F for
#define R return
#define P printf
#define G goto
{GG(*a=='*'&&a[1]==0, r1);
F(pa=a,ns=0;*pa&&ns<0xFFFFFF;++pa,++ns);GG(ns>=0xFFFFFF,re);
F(pb=b,ns=0;*pb&&ns<0xFFFFFF;++pb,++ns);GG(ns>=0xFFFFFF,re);
// at end *pa==*pb==0
F(;pa>a&&pb>b;--pa,--pb)
{if(*pa=='*')
{F(;pa>a&&(*pa=='*'||*pa=='?');--pa)
if(*pa=='?'){GG(pb<=b,r0);--pb;}
GG(pa==a&&*a=='*',r1);
GG(pa==a&&*a=='?',r1); // ????
GG(pa==a&&*b==*pa,r1);
F(ch=*pa;pb>b&&ch!=*pb;--pb);
if(pb==b&&*b==ch) continue;
GG(pb<=b, r0);
continue;
}
if(*pa!='?')
GG(*pa!=*pb, r0);
}

GG(pa==a&&*a=='*',r1);
if(pb==b&&pa==a)
{GG(*a=='?'||*a=='*'||*a==*b,r1);
G r0;
}
GG(*pb==0&&pa==a&&*a==0,r0);
GG(*pb==0&&pa>a, r0);
//P("x");

Rick C. Hodgin

unread,
Aug 17, 2018, 10:54:23 AM8/17/18
to
On 08/17/2018 10:42 AM, Rosario19 wrote:
> [snip]
> #define F for
> #define R return
> #define P printf
> #define G goto
> #define GG(a,b) if(a)goto b
> [snip]
> [snip]

Rosario, you have a remarkable mind to be able to code like that.
I am serious. It's quite the powerhouse display to be able to
keep it all sorted.

--
Rick C. Hodgin

Keith Thompson

unread,
Aug 17, 2018, 12:03:20 PM8/17/18
to
Rosario19 <R...@invalid.invalid> writes:
> On Fri, 17 Aug 2018 06:54:17 +0200, Rosario19 wrote:
>
>>the function would be wcmatch()
>
> the last version..., there is something rendundance (cheked 2 or more
> time the same)
>
> #include <stdio.h>
> #include <time.h>
>
> #define F for
> #define R return
> #define P printf
> #define GG(a,b) if(a)goto b

Nope.

[...]

--
Keith Thompson (The_Other_Keith) ks...@mib.org <http://www.ghoti.net/~kst>
Working, but not speaking, for JetHead Development, Inc.
"We must do something. This is something. Therefore, we must do this."
-- Antony Jay and Jonathan Lynn, "Yes Minister"

David Brown

unread,
Aug 17, 2018, 12:40:30 PM8/17/18
to
On 17/08/18 18:03, Keith Thompson wrote:
> Rosario19 <R...@invalid.invalid> writes:
>> On Fri, 17 Aug 2018 06:54:17 +0200, Rosario19 wrote:
>>
>>> the function would be wcmatch()
>>
>> the last version..., there is something rendundance (cheked 2 or more
>> time the same)
>>
>> #include <stdio.h>
>> #include <time.h>
>>
>> #define F for
>> #define R return
>> #define P printf
>> #define GG(a,b) if(a)goto b
>
> Nope.
>

If Rosario used rot13 to hide his code or hints, as some other posters
have done, would anyone be able to tell the difference?




Rosario19

unread,
Aug 17, 2018, 3:05:33 PM8/17/18
to
On Fri, 17 Aug 2018 16:42:05 +0200, Rosario19 <R...@invalid.invalid>
wrote:

>On Fri, 17 Aug 2018 11:39:59 +0200, Rosario19 <R...@invalid.invalid>
>wrote:
>
all my others were bugged
this should be ok with limitation on exponential calculation
return -1

#include <stdio.h>
#include <time.h>

#define F for
#define R return
#define P printf
#define GG(a,b) if(a)goto b

/*
Input: a, b pointer to C array 0 terminated (strings)
Search the string "a" that can contain too wildcard:
"*" as generic substring that can be null too
"?" as generic substring of 1 char
in the string "b";
if error it return -1
(to many recursive call or too long strings or too much time)
if "a" match "b", it return 1 else
if "a" do not match "b", it return 0
NB: For Big O it could be exponential in the wrost case;
If find one calculus take > 10 seconds time => return -1
*/
wcmatchR(char* a, char* b)
{static time_t ti, tf;
static int nc,r; // nc means number of call of this function
unsigned ns;
char *pa, *pb, ch;

if(nc==0)tf=ti=time(0); //nc==0 at start
++nc; // if recursivly call >900 times => error
if(nc>= 900){re: --nc;R -1;}// return error
if(a== 0){r1: --nc;R 1;}// return match result
if(b== 0){r0: --nc;R 0;}// return not match result
tf=time(0); // if calculate too much(e.g. exponential)return -1
GG(difftime(tf,ti)>10.0, re); //10 seconds of calculum:too much
F(ns=0;;++a,++b,++ns)
{//P("%c %c, %d %d\n", *a, *b, *a, *b);
GG(ns>=0xFFFFFF, re); //string too much long R -1
if(*a=='*'){F(;*a=='*';++a); --a;} //if*a=='*'goto the last '*'
if(!*b){GG(*a=='*'&&a[1]==0, r1);--nc;R !*a;}// if *b==0 =>exit
switch(*a)
{case '?': break;
case '*': ++a; GG(*a==0, r1); //in the case of "..*" => match
F(ch=*a,pb=b,pa=0;*pb;++pb) //search ch in b until\0
{if(*pb==ch||ch=='?')
{pa=pb;
if(a[1])
{GG((r=wcmatchR(a+1,pb+1))==1, r1);

Keith Thompson

unread,
Aug 17, 2018, 3:27:00 PM8/17/18
to
Rosario19 <R...@invalid.invalid> writes:
[...]
> #define F for
> #define R return
> #define P printf
> #define GG(a,b) if(a)goto b

No.

Kenny McCormack

unread,
Aug 17, 2018, 4:04:25 PM8/17/18
to
In article <ln7ekos...@kst-u.example.com>,
Keith Thompson <ks...@mib.org> dealt the death knell to "Rosario19" thusly:
>Rosario19 <R...@invalid.invalid> writes:
>[...]
>> #define F for
>> #define R return
>> #define P printf
>> #define GG(a,b) if(a)goto b
>
>No.

Wouldn't it a far, far better world if Kiki's opinions determined whether
or not something actually was valid C code?

Then problems like this could be caught by the compiler.

It might involve embedding his email address inside each and every compiler
binary. And it would slow down compilation considerably.

But it would clearly be worth it.

--
The randomly chosen signature file that would have appeared here is more than 4
lines long. As such, it violates one or more Usenet RFCs. In order to remain
in compliance with said RFCs, the actual sig can be found at the following URL:
http://user.xmission.com/~gazelle/Sigs/God

Rosario19

unread,
Aug 19, 2018, 12:04:47 AM8/19/18
to
On Fri, 17 Aug 2018 21:10:33 +0200, Rosario19 wrote:

>all my others were bugged

this not seems bugged and it seems not have problem
with string as "a*a*a*a*a*a*a*a*a*a*a*a*a*c"
and "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"

#include <stdio.h>
#include <time.h>

#define F for
#define R return
#define P printf
#define G goto
#define GG(a,b) if(a)goto b

/*
Input: a, b pointer to C array 0 terminated (strings)
Search the string "a" that can contain too wildcard:
"*" as generic substring that can be null too
"?" as generic substring of 1 char
in the string "b"; if error it return -1
(to many recursive call or too long strings or too much time)
if "a" match "b", it return 1 else
if "a" do not match "b", it return 0
NB: For Big O it could be exponential in the wrost case
(if someone find the right string) but
if find one calculus take > 10 seconds time => return -1
so one exponential calculus here is not possible because
it would return -1 (error)
*/
wcmatchRx(char* a, char* b)
{static time_t ti, tf;
static int nc,r; // nc means number of call of this function
unsigned ns;
char *pa, *pb, ch;

if(nc==0)tf=ti=time(0); //nc==0 at start 1 call
++nc; // if recursivly call >900 times => error
if(nc>= 900){re: --nc;R -1;}// return error
if(a== 0){r1: --nc;R 1;}// return match result
if(b== 0){r0: --nc;R 0;}// return not match result

// This code is for find problematic string, that make algo
// behaviour as exponential
// Each substring of "a" enclose by ** or */0 must be in "b"
// This is the code return the result 99% of times in the
// case not match
if(nc==1) // check only for the first call
{GG(*a=='*'&&a[1]==0, r1);
F(pa=a,pb=b,ns=0,r=1;*pa&&*pb;++pb,++ns)
{GG(ns>=0xFFFFFF, re);
if(*pa=='*')
{F(;*pa=='*';++pa,++ns)
GG(ns>=0xFFFFFF, re);
r=0;
}
F(;*pa&&*pb&&*pa!='*';++pa,++pb,++ns)
{GG(ns>=0xFFFFFF, re);
if(*pa=='?'||*pa==*pb)continue;
break;
}
if(*pa=='*'&&pb>b&&*pb!=0)--pb;
GG(r==1&&*pa!='*'&&*pa!=*pb, r0);
if(*pa=='*')
{F(;*pa=='*';++pa,++ns)
GG(ns>=0xFFFFFF, re);
r=0;
}
GG(*pb==0&&*pa!=0, r0);
}
GG(*pb==0&&*pa!=0, r0);
}

tf=time(0); // if calculate too much(e.g. exponential)return -1
GG(difftime(tf,ti)>10.0, re); //10 seconds of calculum:too much
F(ns=0;;++a,++b,++ns)
{//P("%c %c, %d %d\n", *a, *b, *a, *b);
GG(ns>=0xFFFFFF, re); //string too much long R -1
if(*a=='*'){F(;*a=='*';++a); --a;} //if*a=='*'goto the last '*'
if(!*b){GG(*a=='*'&&a[1]==0, r1);--nc;R !*a;}// if *b==0 =>exit
switch(*a)
{case '?': break;
case '*': ++a; GG(*a==0, r1); //in the case of "..*" => match
F(ch=*a,pb=b,pa=0;*pb;++pb) //search ch in b until\0
{if(*pb==ch||ch=='?')
{pa=pb;
if(a[1])
{GG((r=wcmatchRx(a+1,pb+1))==1, r1);

Rosario19

unread,
Aug 19, 2018, 12:04:56 AM8/19/18
to
On Fri, 17 Aug 2018 12:26:53 -0700, Keith Thompson wrote:
>Rosario19 <R...@invalid.invalid> writes:
>[...]
>> #define F for
>> #define R return
>> #define P printf
>> #define GG(a,b) if(a)goto b
>
>No.
>
>[...]

why not? it is a probelm in the compiler not right traslate macro?
it is a problem of the human reader that can not follow in good way
macro?

Chris M. Thomasson

unread,
Aug 19, 2018, 12:26:10 AM8/19/18
to
Agreed. For some reason the code makes me want to think of the following
song:

https://youtu.be/ByqYEzugleE

;^)

Keith Thompson

unread,
Aug 19, 2018, 1:24:54 AM8/19/18
to
Since you ask (and I've said this before), these macros do nothing but
make the code more difficult to read. I know what "return" means when I
see it in a program. If I see "R", I have to look it up.

Saving 5 characters is not an advantage.

Kenny McCormack

unread,
Aug 19, 2018, 1:42:02 AM8/19/18
to
In article <lno9dzq...@kst-u.example.com>,
Keith Thompson <ks...@mib.org> wrote:
>Rosario19 <R...@invalid.invalid> writes:
>> On Fri, 17 Aug 2018 12:26:53 -0700, Keith Thompson wrote:
>>>Rosario19 <R...@invalid.invalid> writes:
>>>[...]
>>>> #define F for
>>>> #define R return
>>>> #define P printf
>>>> #define GG(a,b) if(a)goto b
>>>
>>>No.
>>>
>>>[...]
>>
>> why not? it is a probelm in the compiler not right traslate macro?
>> it is a problem of the human reader that can not follow in good way
>> macro?
>
>Since you ask (and I've said this before), these macros do nothing but
>make the code more difficult to read. I know what "return" means when I
>see it in a program. If I see "R", I have to look it up.
>
>Saving 5 characters is not an advantage.

Rosario:

Future versions of the C standard will prevent this sort of thing
(stuff that Kiki doesn't like) from compiling at all. So, you'd best
adapt now. As mentioned earlier, this will be implemented by embedding K's
email address in the compiler binary - and checking with him as part of
each and every compilation. This will slow down compilation speeds all
over the world, but it will certainly be worthwhile in the long run.

--
http://www.rollingstone.com/politics/news/the-10-dumbest-things-ever-said-about-global-warming-20130619

Rosario19

unread,
Aug 19, 2018, 2:16:17 AM8/19/18
to
On Sun, 19 Aug 2018 06:09:54 +0200, Rosario19 <R...@invalid.invalid>
wrote:

>On Fri, 17 Aug 2018 21:10:33 +0200, Rosario19 wrote:
>
>>all my others were bugged
>
>this not seems bugged and it seems not have problem
>with string as "a*a*a*a*a*a*a*a*a*a*a*a*a*c"
>and "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"

this seems to be fast too
(note that in normal use string not match 99%? 80%? times)
ti=time(0);

Rosario19

unread,
Aug 19, 2018, 2:21:31 AM8/19/18
to
On Sat, 18 Aug 2018 22:24:44 -0700, Keith Thompson wrote:

>Since you ask (and I've said this before), these macros do nothing but
>make the code more difficult to read. I know what "return" means when I
>see it in a program. If I see "R", I have to look it up.
>
>Saving 5 characters is not an advantage.

Saving 5 bytes means that in one line can enter more instructions
so the programmer can be more free of indent better

Rosario19

unread,
Aug 19, 2018, 2:24:08 AM8/19/18
to
for example what is clearer

f(unsigned long log a, unsigned long long b, unsigned long long c)

or

f(u64 a, u64 b, u64 c)

?

Bart

unread,
Aug 19, 2018, 6:21:05 AM8/19/18
to
f(u64 a, b, c)

is clearer (although not C).

I don't have a problem with typing keywords provided it's not over the
top like writing 'unsigned long long int' three times, although these
days you can use 'uint64_t'.

It's lots of mixed punctuation that I have trouble with, especially when
shifted. 'R' for 'return' is also shifted.

--
bart

fir

unread,
Aug 19, 2018, 7:43:03 AM8/19/18
to
W dniu niedziela, 19 sierpnia 2018 12:21:05 UTC+2 użytkownik Bart napisał:
> On 19/08/2018 07:29, Rosario19 wrote:
> > On Sun, 19 Aug 2018 08:26:44 +0200, Rosario19 wrote:
> >
> >> On Sat, 18 Aug 2018 22:24:44 -0700, Keith Thompson wrote:
> >>
> >>> Since you ask (and I've said this before), these macros do nothing but
> >>> make the code more difficult to read. I know what "return" means when I
> >>> see it in a program. If I see "R", I have to look it up.
> >>>
> >>> Saving 5 characters is not an advantage.
> >>
> >> Saving 5 bytes means that in one line can enter more instructions
> >> so the programmer can be more free of indent better
> >
> > for example what is clearer
> >
> > f(unsigned long log a, unsigned long long b, unsigned long long c)
> >
> > or
> >
> > f(u64 a, u64 b, u64 c)
> >
> > ?
> >
>
> f(u64 a, b, c)
>
> is clearer (although not C).
>

obviously its better (unles you consider versions of c that have default types but on a ground of present c int a,b,c is better

i recantly im physically agraviated i
cant retype

void foo()
{
char* out = foo();

char* out = boo();

}

both possible options i mean making second instance of out or reusing the same
seem better (at leas sometimes) than aggreviating compiler errors)

[nopt to mention some other close things i got on mind recently but i forgot at the moment, but there is a realy space in c to improve it]


on the other side when you redefine a
function of given name, same as the one you prviously used from dll it goes silently

Ben Bacarisse

unread,
Aug 19, 2018, 7:54:10 AM8/19/18
to
Rosario19 <R...@invalid.invalid> writes:
<snip>
> for example what is clearer
>
> f(unsigned long log a, unsigned long long b, unsigned long long c)
>
> or
>
> f(u64 a, u64 b, u64 c)
>
> ?

It depends. The first has the advantage that I know immediately what
the types are. In the second, I have to go and check. I might assume
that u64 is an unsigned int but is it, like long long, at least 64 bit
or exactly 64 bits? And is that assumption safe? Maybe I should check
the declaration of u64.

Anyway, let's grant that the second is obviously clearer than the
first. That does not mean that

if(!*b){GG(*a=='*'&&a[1]==0, r1);--nc;R !*a;}

is obviously clearer than

if (!*b) {
if (*a == '*' && a[1] == 0)
goto r1;
--nc;
return !*a;
}

That some cases of shortening are good does not mean that all cases of
shortening are good. That's just bad reasoning. And if it were true,
why did you waste space with ==0 for a[1] when ! does for the others?
And why did you leave two redundant spaces in there? Aren't all
redundant spaces a waste?

I don't post to try to change your style -- I am sure you don't care
about such remarks, but you might like to know why you get so few
comments on your code. I, for one, just want to read it.

--
Ben.

fir

unread,
Aug 19, 2018, 7:59:51 AM8/19/18
to
also as to what was recently a bit mentioned (and discussed back then afair)
but may reapeat is as i dont eremembered it enough

when you got structure s which have a pointer p pointing on structure x

s.p[0]=x is okay

but instead of s.*p = x working
you got *s.p=x working

i understand that s.p is like "returning a p" so *(s.p) = x but it seems to me that those oparator priorities here are mistaken
(besides if im not wrong such assigning structure to structure works ok but somparing them by == not compiles, which also seems to be as some inconsequence)


fir

unread,
Aug 19, 2018, 8:34:11 AM8/19/18
to
W dniu niedziela, 19 sierpnia 2018 13:43:03 UTC+2 użytkownik fir napisał:
> void foo()
> {
> char* out = foo();
>
> char* out = boo();
>
> }
>
> both possible options i mean making second instance of out or reusing the same
> seem better (at leas sometimes) than aggreviating compiler errors)
>

if some would instantly change in c hovever it seems that at first draft reusing would be better

> [nopt to mention some other close things i got on mind recently but i forgot at the moment, but there is a realy space in c to improve it]
>
duh, i seems remembered some of them

1) sporadic need for a post-working assigment, i mean w=2 works always in place like ++a where sporadically it could ne useful to work like a++

2) && working with p[arenthasis i mean like if (a==2) && (b==2) {/*smthng*/ }
this looks weird and i at all dont like && and say it should be replaced by and
word but it some tiny remark to consider

[not to say combined comparisons are imo better so if (a==2) && (b==2) should be more like if(a==2==b) taking for consideration it should be -= it shuld be if(a=b=2)

3) consideration if

a=b=c=d;

works the good way, it works c=d, b=c,a=b where im not sure is the alternative wouldnt be better (maybe not but also some tiny remerk to consider)
(present form seems more usefull as you usually want only tos et 2 or 3 variables to zero, but outside of that im not sure) (this not present form look like shift left and shift left is sometimes useful a[0]=a[1]=a[2]=a[3]
also you could write something like this then x=y=x whatever it is (depending on rules it may be swap or last =x is useless) it amay not be mauch important here but if you generalise such combinations on not only pure assigments but something more eleborated it may show some difference)


got possibly also some other tiny thoughts on present c syntax but forgot





Bart

unread,
Aug 19, 2018, 8:58:24 AM8/19/18
to
On 19/08/2018 12:54, Ben Bacarisse wrote:
> Rosario19 <R...@invalid.invalid> writes:
> <snip>
>> for example what is clearer
>>
>> f(unsigned long log a, unsigned long long b, unsigned long long c)
>>
>> or
>>
>> f(u64 a, u64 b, u64 c)
>>
>> ?
>
> It depends. The first has the advantage that I know immediately what
> the types are.

I don't. I need to parse in my head several tokens to know what the type
is (bear in mind they can also be written in any order, with and without
'int'). And then you have to consider whether your implementation
actually does use a 64-bit width for that type.

Regarding that typo, imagine if it occurred here:

extern void f(unsigned long log);

Now it looks like the parameter type is 'unsigned long', with an
optional parameter name of 'log'. That's a harder error to make when the
type specifier is a single identifier.

Meanwhile the parameter names a, b and c of the top example are lost in
the noise. And it gets worse when people start adding in *, const and
restrict.

> In the second, I have to go and check.

No, only when it doesn't work.

Although I'm surprised you wouldn't trust the programmer to do the
sensible thing for something so straightforward, but presumably you are
OK with layer upon layer of gratuitous macros and typedefs.

(And it you say you use a tool to quickly unravel the latter, you can
use the same tool for u64.)

--
bart

fir

unread,
Aug 19, 2018, 10:04:07 AM8/19/18
to
lulz, i may add i discovered/noticed one funny thing here: (i mean when thinking on core c syntax thingz)

such things like 1 2 23 56434 are in fact not ints; so what they are ? (s p o i l y)
they are enums (those my favourite kinds of them i mean those ad-hoc ones) (and that possibly makes some interestin possibilities [coz i probably could make enums uniformly in some ontextes])


i noticed yet second think (which seem sorta obvious (almost) but wasnt noticed by me before (at least not clear like here now): "switch is a bigger extended form of if " switch is just if but extended [specificaly my note that this default is like else in normal if and
all thet 'positive' (esp as it is called by me as i define positive (including zero) as true negative as false) cases are like in positive case of if - interestink remark to know it, esp as you may look on switch in different
eyes (like baroque if) :/

fir

unread,
Aug 19, 2018, 10:24:06 AM8/19/18
to
W dniu niedziela, 19 sierpnia 2018 16:04:07 UTC+2 użytkownik fir napisał:
>[coz i probably could make enums uniformly in some ontextes])

here instead of meke should be 'treat' (erratum, i make a lot of typos - its becouse you write something you got
a few versions of possible sentences
that can be written and sometimes one
overlaps/overwrites another... thats the source of various typos... some others are fingermisses etc.. hovever if not focusing on typos the thought is ok ;c)

Ben Bacarisse

unread,
Aug 19, 2018, 10:32:33 AM8/19/18
to
Bart <b...@freeuk.com> writes:

> On 19/08/2018 12:54, Ben Bacarisse wrote:
>> Rosario19 <R...@invalid.invalid> writes:
>> <snip>
>>> for example what is clearer
>>>
>>> f(unsigned long log a, unsigned long long b, unsigned long long c)
>>>
>>> or
>>>
>>> f(u64 a, u64 b, u64 c)
>>>
>>> ?
>>
>> It depends. The first has the advantage that I know immediately what
>> the types are.
>
> I don't. I need to parse in my head several tokens to know what the
> type is (bear in mind they can also be written in any order, with and
> without 'int').

I think you are missing the point. Maybe I was not clear. There is a
big difference between a standard type which, even if it takes you a few
fractions of a second to read, is not a mystery like u64. Immediate (as
I intended it) carries a meaning of "here and now" and "directly" rather
than simply "quickly".

> And then you have to consider whether your
> implementation actually does use a 64-bit width for that type.
>
> Regarding that typo, imagine if it occurred here:
>
> extern void f(unsigned long log);

Imagine if a typo had occurred in the typedef of u64. Imagine if
there's a type i64 (with U and I so close on the keyboard) and i64 had
been intended? These sorts of claims don't really go anywhere.

>> In the second, I have to go and check.
>
> No, only when it doesn't work.

What does that mean? The code may work but be wrong. I think this is a
fundamental difference between your view of what a program is and mine.
I've spent a lot of time fixing code that presumably once worked which
would have been trivial to write correctly the first time round.

That last part is key because you've dismissed my complaints about
non-portable code as simply the usual cost of program maintenance. You
say: fix code when it breaks you don't waste time before that. But
writing

memcpy(&x, ptr, sizeof x);

is not really harder than writing

x = *(T *)ptr;

for a potentially unaligned access.

> Although I'm surprised you wouldn't trust the programmer to do the
> sensible thing for something so straightforward,

If reading the code for fun, I don't care, but why am wondering about
these types? Often it is because the code has broken and some unwritten
assumption has turned out to less universal than originally thought.

The most important reader of the code is the person who has to fix it
when it no longer works. Everyone else can trust the programmer, as
would I in that situation. After all, there's presumably lots of test
evidence that it works.

> but presumably you
> are OK with layer upon layer of gratuitous macros and typedefs.

That's the politician speaking again. Oh yes, if value human rights you
must be fine with terrorists coming into the country. I don't know why
you think I am OK with "layer upon layer of gratuitous macros and
typedefs".

--
Ben.

Ben Bacarisse

unread,
Aug 19, 2018, 10:35:40 AM8/19/18
to
Ben Bacarisse <ben.u...@bsb.me.uk> writes:
<snip>
> I don't post to try to change your style -- I am sure you don't care
> about such remarks, but you might like to know why you get so few
> comments on your code. I, for one, just want to read it.

Bad typo there -- a missing "don't": "I, for one, just don't want to
read it".

--
Ben.

Joe Pfeiffer

unread,
Aug 19, 2018, 10:51:43 AM8/19/18
to
It makes it unreadable for anyone other than the person who wrote the
code.

Joe Pfeiffer

unread,
Aug 19, 2018, 11:18:33 AM8/19/18
to
In this example, the u64 version is clearer to me -- that it is unsigned
is implicit in the u, and the 64 tells me how long the type is (make
some assumptions about the macro or typedef defining u64, of course).
uint64_t would have been even clearer, since that's a macro that appears
in a standard header so I've seen it before.

A better comparison to trying to read your code would be to compare

f(unsigned long long a, unsigned long long b, unsigned long long c)

to

#define U(x) unsigned long long x
f(U(a),U(b),U(c))

in which the single U macro isn't really bad in isolation, but doesn't
help either. If the definition of U were in a wad of a dozen macros,
some of which were defining new types and others were replacing "for" as
you do, it would contribute to making it less readable.

Bart

unread,
Aug 19, 2018, 2:08:08 PM8/19/18
to
On 19/08/2018 15:32, Ben Bacarisse wrote:
> Bart <b...@freeuk.com> writes:

>> No, only when it doesn't work.
>
> What does that mean? The code may work but be wrong. I think this is a
> fundamental difference between your view of what a program is and mine.
> I've spent a lot of time fixing code that presumably once worked which
> would have been trivial to write correctly the first time round.
>
> That last part is key because you've dismissed my complaints about
> non-portable code as simply the usual cost of program maintenance. You
> say: fix code when it breaks you don't waste time before that. But
> writing
>
> memcpy(&x, ptr, sizeof x);
>
> is not really harder than writing
>
> x = *(T *)ptr;
>
> for a potentially unaligned access.

If going back to that issue, the original source is not in C. While it
is possible to use memcpy there too, I try not to pander to the demands
of C too much, so that there is a clear dividing line between the
languages. (Otherwise it's a can of worms.)

There are other targets to consider after all.

>> but presumably you
>> are OK with layer upon layer of gratuitous macros and typedefs.
>
> That's the politician speaking again. Oh yes, if value human rights you
> must be fine with terrorists coming into the country.


I don't know why
> you think I am OK with "layer upon layer of gratuitous macros and
> typedefs".

I've rarely seen you complain about such code.

Except here, where the mildest use of typedef to define aliases like i32
and u64, which are commonly used and instantly understood, and which are
actually built-in to some languages (eg Rust), elicited a response from
you. (And it wasn't even used in a real program as far as I could see.)

Meanwhile I've had to do battle with code like this:

struct stat {
__dev_t st_dev;
__ino_t st_ino;
__mode_t st_mode;
__nlink_t st_nlink;
__uid_t st_uid;
__gid_t st_gid;
__dev_t st_rdev;
__off64_t st_size;
__blksize_t st_blksize;
__blkcnt64_t st_blocks;
};

where every int member has its own private typedef, which is frankly
ridiculous. I would have loved that such a struct used i16 and u32!

And actually, while I would never use Rosario's R and F macros and don't
think much of the practice, they are amongst the simplest macros I've
come across other than defining a constant: no parameters, and mapping
to a single keyword, for most of them.

Applying -E will fix the source code. But try that with truly
convoluted, nested macros, and you just get a sea of parentheses.


--
bart

Keith Thompson

unread,
Aug 19, 2018, 3:20:06 PM8/19/18
to
The first. Anyone who knows C can understand it. To understand the
second, I have to know what "u64" is. Where is it defined, and how?
It's probably meant to be a 64-bit unsigned integer type, but is it
defined correctly?

But it's not nearly as bad as using "R" for "return".

Replacing u64 by uint64_t would make the second one clearer, but it
would still have a different meaning than the first.

(This is ignoring the misspelling of "long", the use of implicit
int, and the meaningless function name "f", which is ok for an example
but not in real code.)

David Brown

unread,
Aug 19, 2018, 3:46:16 PM8/19/18
to
They are subtly different things - "unsigned long long" is an unsigned
type at least 64 bits long, while (presumably) your non-standard u64
type is /exactly/ 64 bits. In practice, "unsigned long long" is going
to be exactly 64-bits - although so might "unsigned long".

The standard has a way to write this:

f(uint64_t a, uint64_t b, uint64_t c);

That seems clear to me - short enough, standard, understood by all, and
no need for extra typedefs that /look/ standard, but aren't.


And if you think that a standard typename is too long (and I agree that
"unsigned long long" is unwieldy), then use a typedef:

typedef unsigned long long ull;

f(ull a, ull b, ull c);

That's fine. Using macros to obfuscate common keywords or function
names, however, is not fine. Using them to obfuscate some hideous
unstructured coding with goto's is /definitely/ not fine.

David Brown

unread,
Aug 19, 2018, 4:08:58 PM8/19/18
to
On 19/08/18 20:07, Bart wrote:
> On 19/08/2018 15:32, Ben Bacarisse wrote:
>> Bart <b...@freeuk.com> writes:
>
<snip>
>>> but presumably you
>>> are OK with layer upon layer of gratuitous macros and typedefs.
>>
>> That's the politician speaking again.  Oh yes, if value human rights you
>> must be fine with terrorists coming into the country.
>
>
>   I don't know why
>> you think I am OK with "layer upon layer of gratuitous macros and
>> typedefs".
>
> I've rarely seen you complain about such code.
>
> Except here, where the mildest use of typedef to define aliases like i32
> and u64, which are commonly used and instantly understood, and which are
> actually built-in to some languages (eg Rust), elicited a response from
> you. (And it wasn't even used in a real program as far as I could see.)

There was no typedef here. There was just an assumption about what u64
meant. And when you see "u64" in code, there are many, many things it
/could/ mean. Obviously context will narrow this down somewhat - we
could see here, for example, that it must be a type.

But consider the possibilities for what "u64" means:

A typedef for "unsigned long long" on a 64-bit cpu.

A typedef for "unsigned long", on 64-bit *nix.

A macro instead of a typedef.

A typedef for uint64_t.

A typedef for uint_least64_t.

A typedef for uint_fast64_t.

A typedef for "unsigned long" or "unsigned long long" with unwritten and
unchecked assumptions about it being exactly 64 bits.

An application-specific type for holding counts of people under 64 years
old.


On the other hand, if you see "unsigned long long", you know /exactly/
what that means - including knowing exactly what it /doesn't/ say (it
does not say precisely 64 bits) if the platform is not given.
Similarly, if you see "uint64_t" you know /exactly/ what it means,
including the restrictions that the platform has.



>
> Meanwhile I've had to do battle with code like this:
>
>     struct stat {
>         __dev_t         st_dev;
>         __ino_t         st_ino;
>         __mode_t        st_mode;
>         __nlink_t       st_nlink;
>         __uid_t         st_uid;
>         __gid_t         st_gid;
>         __dev_t         st_rdev;
>         __off64_t       st_size;
>         __blksize_t     st_blksize;
>         __blkcnt64_t    st_blocks;
>     };
>
> where every int member has its own private typedef, which is frankly
> ridiculous. I would have loved that such a struct used i16 and u32!

I don't think it is very fair to blame /Ben/ for that structure!

There are, however, very good reasons for having lots of individual
typedefs here. The sizes of these sorts of type have changed through
the history of *nix systems. Early *nix systems had 16-bit counters for
user ids - later ones have 32-bit. By making "__uid_t" a typedef, the
change for supporting large numbers of users could be done by changing
/one/ typedef line in /one/ header, and re-compiling the kernel,
libraries and utilities. If someone had used "i16" instead, so that it
now had to be changed to "i32", it would need to be changed in tens of
thousands of lines scattered through thousands of programs - anything
that tracked a user id. And that applies only to the kernel, standard
libraries and standard utilities - all user programs that needed to
track a user id would be broken.

The success and longevity of *nix comes partly because the people
designing it and coding it thought about flexibility and maintainability
from the start - they did not think "640 KB is enough for anyone" and
use fixed sizes.


Other reasons for typedefs are to improve code readability and perhaps
also the use of automated tools. For critical types, Linux (and a lot
of other code) uses struct wrappers around the real value type, to get
stronger checking from the compiler.

Anton Shepelev

unread,
Aug 19, 2018, 6:03:05 PM8/19/18
to
Joe Pfeiffer to Rosario19:

> > why not? it is a probelm in the compiler not right
> > traslate macro? it is a problem of the human reader
> > that can not follow in good way macro?
>
> It makes it unreadable for anyone other than the person
> who wrote the code.

Mickle useful for keeping a job, innit?

--
() ascii ribbon campaign -- against html e-mail
/\ http://preview.tinyurl.com/qcy6mjc [archived]

Anton Shepelev

unread,
Aug 19, 2018, 6:06:08 PM8/19/18
to
David Brown:

> But consider the possibilities for what "u64" means:
>
> A typedef for "unsigned long long" on a 64-bit cpu.
>
> A typedef for "unsigned long", on 64-bit *nix.
>
> A macro instead of a typedef.
>
> A typedef for uint64_t.
>
> A typedef for uint_least64_t.
>
> A typedef for uint_fast64_t.
>
> A typedef for "unsigned long" or "unsigned long long" with unwritten and
> unchecked assumptions about it being exactly 64 bits.
>
> An application-specific type for holding counts of people under 64 years
> old.

From my understanding of C philosophy, it cannot mean
anything but a 64-bit unsigned integer.

Bart

unread,
Aug 19, 2018, 6:15:54 PM8/19/18
to
On 19/08/2018 21:08, David Brown wrote:
> On 19/08/18 20:07, Bart wrote:

>> Except here, where the mildest use of typedef to define aliases like
>> i32 and u64,


> There was no typedef here.  There was just an assumption about what u64
> meant.  And when you see "u64" in code, there are many, many things it
> /could/ mean.  Obviously context will narrow this down somewhat - we
> could see here, for example, that it must be a type.
>
> But consider the possibilities for what "u64" means:
>
> A typedef for "unsigned long long" on a 64-bit cpu.
>
> A typedef for "unsigned long", on 64-bit *nix.
>
> A macro instead of a typedef.
>
> A typedef for uint64_t.
>
> A typedef for uint_least64_t.
>
> A typedef for uint_fast64_t.
>
> A typedef for "unsigned long" or "unsigned long long" with unwritten and
> unchecked assumptions about it being exactly 64 bits.
>
> An application-specific type for holding counts of people under 64 years
> old.

That's C for you. You want an unsigned 64-bit type for your program;
specifying that should be the easy bit before you proceed to write the
actual code.

Instead you have a bewildering selection of denotations: "unsigned long
long", "unsigned long long int", which may not even be 64 bits,
"uint64_t" with "#include <stdint_h>", which has the printf issue.

But you can't just create a short and sweet "u64" typedef, which seems
the perfect application for it, because of those considerations above.

When /I/ write "u64" (not often as I prefer "word64") outside of C,
there is none of this; "u64" is a built-in type and can't be redefined:

u64 a := 18 billion billion

println u64.bytes # 8 (sizeof)
println u64.bitwidth # 64
println u64.typestr # u64 ('official' internal name)
println u64.minvalue # 0
println u64.maxvalue # 18446744073709551615
println u64.type, a.type # 9 9 (9 is internal code for u64)
println word64.typestr # u64 (word64/u64 equivalent)
println a # 18000000000000000000

You don't even need to worry about the printf specifier for 'a' as on
that last line.

Can you see how that can make life simpler?

>>      struct stat {
>>          __dev_t         st_dev;
>>          __ino_t         st_ino;
>>          __mode_t        st_mode;
>>          __nlink_t       st_nlink;
>>          __uid_t         st_uid;

> the history of *nix systems.  Early *nix systems had 16-bit counters for
> user ids - later ones have 32-bit.  By making "__uid_t" a typedef, the
> change for supporting large numbers of users could be done by changing
> /one/ typedef line in /one/ header, and re-compiling the kernel,
> libraries and utilities.  If someone had used "i16" instead, so that it
> now had to be changed to "i32", it would need to be changed in tens of
> thousands of lines scattered through thousands of programs - anything
> that tracked a user id.

It might need to be changed in struct stat{...}, which is in a system
header file specific to a platform.

Would there be any reason to have a stand-alone variable destined to
contain __uid_t type actually have a __uid_t type, rather than just a
regular int which can manage either size?

If people routinely used private types for each int variable, it would
be chaotic.

Anyway, at roughly the point where things had to become 32 bits rather
than 16, you will probably find that 'int' also became 32 bits rather
than 16, and an 'int st_ino' might not have needed changing.

--
bart

Rosario19

unread,
Aug 19, 2018, 6:32:47 PM8/19/18
to
On Sun, 19 Aug 2018 08:21:27 +0200, Rosario19 wrote:

>On Sun, 19 Aug 2018 06:09:54 +0200, Rosario19 <R...@invalid.invalid>
>wrote:
>
>>On Fri, 17 Aug 2018 21:10:33 +0200, Rosario19 wrote:
>>
>>>all my others were bugged
>>
>>this not seems bugged and it seems not have problem
>>with string as "a*a*a*a*a*a*a*a*a*a*a*a*a*c"
>>and "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
>
>this seems to be fast too
>(note that in normal use string not match 99%? 80%? times)

there was a problem "**" and "" return 0 instead of 1(bug)
this would be the final version that not show bugs to me
(but full of goto instructions)
// This code below it is one workaround for problematic string,
// in input that make calculus exponential; it is based on the
// fact that each substring of "a" enclose by ** or */0 must
// be in "b".
// So this find one possible subset of solution that can be
// very big
if(nc==1) // check only for the first call
{pa=a; pb=b; ns=0;
if(*pa=='*') //eliminate *****
{F(;*pa=='*';++pa,++ns)
GG(ns>=0xFFFFFF, re);
GG(*pa==0,r1); // only *=>r1
GG(*pb==0,r0);
G li;
} //check literaly the I string
F(;*pa&&*pb&&*pa!='*';++pa,++pb,++ns)
{GG(ns>=0xFFFFFF, re);
if(*pa=='?'||*pa==*pb)continue;
break;
}
if(*pa=='*')
{if(pb>b&&*pb!=0)--pb;
F(;*pa=='*';++pa,++ns)
GG(ns>=0xFFFFFF, re);
}
else GG(*pa!=*pb, r0);
if(*pb==0){GG(*pa==0, r1); G r0;}
if(*pa==0&&pa>a)
{GG(*(pa-1)!='*', r0); G r1;}
F(;*pa;++pb,++ns)
{GG(ns>=0xFFFFFF, re);
li:;
F(;*pa&&*pb&&*pa!='*';++pa,++pb,++ns)
{GG(ns>=0xFFFFFF, re);
if(*pa=='?'||*pa==*pb)continue;
break;
}
if(*pa=='*')
{if(pb>b&&*pb!=0)--pb;
F(;*pa=='*';++pa,++ns)
GG(ns>=0xFFFFFF, re);
}
if(*pb==0){GG(*pa!=0, r0);break;}
}
ti=time(0);
}
// if above code not find one solution: than the recursive
// code for find it
if((nc & 0x3)==0) // this for make faster the code
{tf=time(0); // if calculate too much(e.g. exponential)return -1
GG(difftime(tf,ti)>10.0, re); //10 seconds of calculum:too much
}
F(ns=0;;++a,++b,++ns)
{//P("%c %c, %d %d\n", *a, *b, *a, *b);
GG(ns>=0xFFFFFF, re); //string too much long R -1
if(*a=='*') //if*a=='*'goto the last '*'
{F(;*a=='*';++a,++ns)GG(ns>=0xFFFFFF, re);
--a;
}
if(!*b){GG(*a=='*'&&a[1]==0, r1);--nc;R !*a;}// if *b==0 =>exit
switch(*a)
{case '?': break;
case '*': ++a; GG(*a==0, r1); //in the case of "..*" => match
F(ch=*a,pb=b,pa=0;*pb;++pb,++ns) //search ch in b
{GG(ns>=0xFFFFFF, re);

David Kleinecke

unread,
Aug 19, 2018, 7:27:07 PM8/19/18
to
You could use my
f(U3 a, U3 b, U3 c)

Everybody hates the idea.

Bart

unread,
Aug 19, 2018, 8:13:25 PM8/19/18
to
On 19/08/2018 15:32, Ben Bacarisse wrote:
> Bart <b...@freeuk.com> writes:

>> Regarding that typo, imagine if it occurred here:
>>
>> extern void f(unsigned long log);
>
> Imagine if a typo had occurred in the typedef of u64. Imagine if
> there's a type i64 (with U and I so close on the keyboard) and i64 had
> been intended? These sorts of claims don't really go anywhere.

That's the trouble with claiming something is error-prone. People will
make counter-suggestions.

To the extent that you wonder if it's worth considering susceptibility
to typos at all, since there will always be other kinds of
hard-to-detect typos that can be made, according to people who try to
make light of the problems.

In the case of 'i64' versus 'u64', the tokens differ by 33%, which makes
differences more obvious.

(Compare with 13% for 'int64_t' vs 'uint64_t', where the '64_t' bit
tends to draw the eye, and 'ui' can sometimes be typed when you try and
do 'i'.)

Remember the original example was along these lines:

unsigned long long int fnname(unsigned long long param1,
signed long long int param2, unsigned long int param3);
....
unsigned long long int fnname(unsigned long long param1,
unsigned long long int param2, unsigned long long param3) {}

Lots of longs in there! Assume the second is correct. The first has an
'un' missing, and a 'long', but it's not easy to spot.

Compare with the same errors in the short format:

u64 fnname(u64 param1, i64 param2, u32 param3);
....
u64 fnname(u64 param1, u64 param2, u64 param3) {}

Using stdint.h:

uint64_t fnname(uint64_t param1, int64_t param2, uint32_t param3);
....
uint64_t fnname(uint64_t param1, uint64_t param2, u64int_t param3) {}

A definitive improvement on that longwindedness; however, I don't know
about you, but I have trouble seeing past all those "_t"s.

(Finally in my syntax as I might write it, when three parameters are of
the same type:

function fnname(word64 param1, param2, param3)=>word64 ...

Now it is impossible for (1) parameters to inadvertently not match in
type; and (2) for there to be mismatch with a prototype, as there aren't
any.

And, when the return type is the same as the parameters, I can use this
trick I saw in the C++ group:

function fnname(word64 param1, param2, param3)=>typeof(param1) ...

Now you don't have up to EIGHT unwieldy types to manage and ensure they
all match. And you can't change the one type by pressing one key, as the
signed version is 'int64'. That's how you reduce typos.)

--
bart

Keith Thompson

unread,
Aug 19, 2018, 10:19:32 PM8/19/18
to
Bart <b...@freeuk.com> writes:
[...]
> That's C for you. You want an unsigned 64-bit type for your program;
> specifying that should be the easy bit before you proceed to write the
> actual code.

It is. If it needs to be exactly 64 bits, with no padding bits, you
add #include <stdint.h> and use uint64_t. Using it with printf is
slightly tricky; I usually cast to unsigned long long and use "%ull"
(or "0x%llx").

> Instead you have a bewildering selection of denotations: "unsigned long
> long", "unsigned long long int", which may not even be 64 bits,

Correct, it might not be 64 bits -- so why bring it up?

[...]

> But you can't just create a short and sweet "u64" typedef, which seems
> the perfect application for it, because of those considerations above.

Sure you can. Who's stopping you? I've explained why I find it
"u64" less readable than the alternatives, but you don't care about
my opinion, so why is that a problem?

> When /I/ write "u64" (not often as I prefer "word64") outside of C,

... you still insist on talking about it in comp.lang.c.

> there is none of this; "u64" is a built-in type and can't be redefined:
[...]
> You don't even need to worry about the printf specifier for 'a' as on
> that last line.
>
> Can you see how that can make life simpler?

Sure, using an unsigned integer type that's exactly 64 bits is slightly
easier in your language than in C. And you're willing to learn your
own language, which is a big advantage over C for you.

[...]

James Kuyper

unread,
Aug 19, 2018, 10:55:26 PM8/19/18
to
On 08/19/2018 06:06 PM, Anton Shepelev wrote:
> David Brown:
>
>> But consider the possibilities for what "u64" means:
>>
>> A typedef for "unsigned long long" on a 64-bit cpu.
>>
>> A typedef for "unsigned long", on 64-bit *nix.
>>
>> A macro instead of a typedef.
>>
>> A typedef for uint64_t.
>>
>> A typedef for uint_least64_t.
>>
>> A typedef for uint_fast64_t.
>>
>> A typedef for "unsigned long" or "unsigned long long" with unwritten and
>> unchecked assumptions about it being exactly 64 bits.
>>
>> An application-specific type for holding counts of people under 64 years
>> old.
>
> From my understanding of C philosophy, it cannot mean
> anything but a 64-bit unsigned integer.

From my understanding, it could be any of the things listed above, and
any of the things listed above could be described, by some people who
tend to be careless about such distinctions, as "a 64-bit unsigned integer".

James Kuyper

unread,
Aug 19, 2018, 10:57:59 PM8/19/18
to
On 08/19/2018 06:02 PM, Anton Shepelev wrote:
> Joe Pfeiffer to Rosario19:
>
>>> why not? it is a probelm in the compiler not right
>>> traslate macro? it is a problem of the human reader
>>> that can not follow in good way macro?
>>
>> It makes it unreadable for anyone other than the person
>> who wrote the code.
>
> Mickle useful for keeping a job, innit?

It could be; it could also ensure loosing your job, depending upon your
working environment.

luser droog

unread,
Aug 20, 2018, 12:48:00 AM8/20/18
to
On Sunday, August 19, 2018 at 5:21:05 AM UTC-5, Bart wrote:
> On 19/08/2018 07:29, Rosario19 wrote:
> > On Sun, 19 Aug 2018 08:26:44 +0200, Rosario19 wrote:
> >
> >> On Sat, 18 Aug 2018 22:24:44 -0700, Keith Thompson wrote:
> >>
> >>> Since you ask (and I've said this before), these macros do nothing but
> >>> make the code more difficult to read. I know what "return" means when I
> >>> see it in a program. If I see "R", I have to look it up.
> >>>
> >>> Saving 5 characters is not an advantage.
> >>
> >> Saving 5 bytes means that in one line can enter more instructions
> >> so the programmer can be more free of indent better
> >
> > for example what is clearer
> >
> > f(unsigned long log a, unsigned long long b, unsigned long long c)
> >
> > or
> >
> > f(u64 a, u64 b, u64 c)
> >
> > ?
> >
>
> f(u64 a, b, c)
>
> is clearer (although not C).
>

K&R to the rescue!

f(a, b, c) unsigned long long a, b, c; {

David Brown

unread,
Aug 20, 2018, 2:17:23 AM8/20/18
to
On 20/08/18 00:06, Anton Shepelev wrote:
> David Brown:
>
>> But consider the possibilities for what "u64" means:
>>
>> A typedef for "unsigned long long" on a 64-bit cpu.
>>
>> A typedef for "unsigned long", on 64-bit *nix.
>>
>> A macro instead of a typedef.
>>
>> A typedef for uint64_t.
>>
>> A typedef for uint_least64_t.
>>
>> A typedef for uint_fast64_t.
>>
>> A typedef for "unsigned long" or "unsigned long long" with unwritten and
>> unchecked assumptions about it being exactly 64 bits.
>>
>> An application-specific type for holding counts of people under 64 years
>> old.
>
> From my understanding of C philosophy, it cannot mean
> anything but a 64-bit unsigned integer.
>

Unfortunately, you are wrong.

Certainly when "u64" is used in the context of a type, it /looks/ like
it would be a 64-bit unsigned integer. And in most cases, it will be one.

But there is no requirement for it to be one. There is no law in C
programming that says the programmer is smart and does the right thing -
indeed, using "u64" when there are standard types for the purpose makes
me suspicious about how good the programmer is. (The exception, of
course, is for pre-C99 code, before uint64_t.)

So yes, you would almost certainly be correct in assuming that the
programmer /meant/ a 64-bit unsigned integer. And that in the context
of compiling with the compiler the programmer used on the target used,
it almost certainly /is/ a 64-bit unsigned integer type. But if you are
looking at using this code in a wider context, be a little wary.

The situation is a lot worse with other types, like i8 or u32. Very
often, these "home made" types are done in a lax manner - they are
typedefs (or sometimes macros) for basic types that happen to be the
right size on the one platform. And those basic types vary in size on
different implementations. (I've also seen "typedef char i8;", which of
course causes big laughs on platforms with default unsigned chars.)

It is entirely possible, of course, that these types are defined using
conditional compilation based on the sizes in <limits.h> so that they
are sure to be correct. Or they could be based on <stdint.h> types, or
on a common "port.h" header designed to be adapted to suit the target.
But you don't /know/ that - not without investigating the code a lot
more thoroughly.




David Brown

unread,
Aug 20, 2018, 5:24:49 AM8/20/18
to
On 20/08/18 00:15, Bart wrote:
> On 19/08/2018 21:08, David Brown wrote:
>> On 19/08/18 20:07, Bart wrote:
>
>>> Except here, where the mildest use of typedef to define aliases like
>>> i32 and u64,
>
>
>> There was no typedef here. There was just an assumption about what
>> u64 meant. And when you see "u64" in code, there are many, many
>> things it /could/ mean. Obviously context will narrow this down
>> somewhat - we could see here, for example, that it must be a type.
>>
>> But consider the possibilities for what "u64" means:
>>
>> A typedef for "unsigned long long" on a 64-bit cpu.
>>
>> A typedef for "unsigned long", on 64-bit *nix.
>>
>> A macro instead of a typedef.
>>
>> A typedef for uint64_t.
>>
>> A typedef for uint_least64_t.
>>
>> A typedef for uint_fast64_t.
>>
>> A typedef for "unsigned long" or "unsigned long long" with unwritten
>> and unchecked assumptions about it being exactly 64 bits.
>>
>> An application-specific type for holding counts of people under 64
>> years old.
>
> That's C for you. You want an unsigned 64-bit type for your program;
> specifying that should be the easy bit before you proceed to write the
> actual code.

When designing a programming language, you need to make certain
decisions about types (let's stick to integer types here). The main
choice is between types with tightly defined sizes and properties, and
abstract types based on usage or loser properties. Tightly defined
types are close to the hardware - such as int32_t, or types used in
assembly. Python's integer type is an example of a very abstract type -
it grows as necessary to suit the values used. C's standard integer
types are a compromise - they are abstract and not tied to machine
types, but can be implemented efficiently as though they were low-level
types.

Sometimes you really want fixed and locked types. Sometimes you really
want very abstract types. It is all a compromise, and it is impossible
to get an ideal system that is always best.

Since C99, C has had fixed size integer types as well as the standard,
more abstract types. These do pretty much everything that you might
want for coding where the higher level types are not ideal. I use them
in most of my coding.

A big problem with the C types is not caused by the language itself, but
by users of the language - there are far too many C programmers who make
unwarranted assumptions about the basic integer types instead of using
the <stdint.h> types appropriately.

>
> Instead you have a bewildering selection of denotations: "unsigned long
> long", "unsigned long long int", which may not even be 64 bits,
> "uint64_t" with "#include <stdint_h>", which has the printf issue.

printf with the <stdint.h> types is certainly ugly. Workable, but ugly.
A better system would involve a replacement of printf (perhaps with a
generic variadic macro).

For my own use, I will typically just use the specifiers like "%lu" in
printf along with types like uint32_t. It is not entirely portable, but
it is portable enough for me. A critical point here is that if I use
the code in a situation where it is not valid (perhaps uint32_t is
"unsigned int" rather than "unsigned long int"), I will get compile-time
errors. This means it is safe non-portability - unlike, say, messing
with unaligned accesses that are only detectable at runtime testing.

>
> But you can't just create a short and sweet "u64" typedef, which seems
> the perfect application for it, because of those considerations above.
>

You get "uint64_t", which is short and sweet enough for me. I prefer it
to "u64".

> When /I/ write "u64" (not often as I prefer "word64") outside of C,
> there is none of this; "u64" is a built-in type and can't be redefined:
>
> u64 a := 18 billion billion
>

That code is going to be non-portable across the Atlantic!


> println u64.bytes # 8 (sizeof)
> println u64.bitwidth # 64
> println u64.typestr # u64 ('official' internal name)
> println u64.minvalue # 0
> println u64.maxvalue # 18446744073709551615
> println u64.type, a.type # 9 9 (9 is internal code for u64)
> println word64.typestr # u64 (word64/u64 equivalent)
> println a # 18000000000000000000
>
> You don't even need to worry about the printf specifier for 'a' as on
> that last line.

Such a "print" system is entirely possible to implement in C11. I'm not
sure it is worth doing - I would prefer that if C were to get a
replacement for printf then it should be expandable to user types.
(Like "cout <<" in C++ - a system that has some disadvantages of its own.)

On the other hand, your system here makes it rather difficult to
separate the format string with its fixed characters and % specifiers
from the variable data. Such a separation is extremely useful when
translating programs and messages to different languages.

It turns out to be impossible to get a "perfect" printing system that is
suitable for all uses. You seem to think you have made the ideal type
and print system and that C's is inferior. You are wrong. Your system
is better in some ways, and worse in others - if it suits the
programming you do, then great, but it would not be as good for some of
the programming other people do.

(On the other hand, I do like your "attribute" feature for getting
information about a type. It is much like the system Ada has, and is
neater than <limits.h> or C++'s numeric_limit<> template.)

>
> Can you see how that can make life simpler?

I can see how it can make some things simpler - and some things harder.

>
>>> struct stat {
>>> __dev_t st_dev;
>>> __ino_t st_ino;
>>> __mode_t st_mode;
>>> __nlink_t st_nlink;
>>> __uid_t st_uid;
>
>> the history of *nix systems. Early *nix systems had 16-bit counters
>> for user ids - later ones have 32-bit. By making "__uid_t" a typedef,
>> the change for supporting large numbers of users could be done by
>> changing /one/ typedef line in /one/ header, and re-compiling the
>> kernel, libraries and utilities. If someone had used "i16" instead,
>> so that it now had to be changed to "i32", it would need to be changed
>> in tens of thousands of lines scattered through thousands of programs
>> - anything that tracked a user id.
>
> It might need to be changed in struct stat{...}, which is in a system
> header file specific to a platform.
>
> Would there be any reason to have a stand-alone variable destined to
> contain __uid_t type actually have a __uid_t type, rather than just a
> regular int which can manage either size?

Yes - see above.

>
> If people routinely used private types for each int variable, it would
> be chaotic.

No, it would be excessive, but not chaotic (assuming they picked
sensible names). Like most aspects of coding, there is a happy medium
to be found. I use such private types occasionally, but not everywhere.
Use them where they are useful and not too costly in terms of
understanding the code.

In languages with stronger type checking, private types are extremely
useful for ensuring code correctness. If you have types "meter",
"kilometer", "second" and "hour" in a strongly typed language, then a
mistake such as trying to add a time to a distance, or mixing up length
units, will be a compile-time error. It would involve more verbose
coding, of course - there is always a price to pay.

>
> Anyway, at roughly the point where things had to become 32 bits rather
> than 16, you will probably find that 'int' also became 32 bits rather
> than 16, and an 'int st_ino' might not have needed changing.
>

Eh, no. "int" has always been 32-bit in *nix, while UID sizes have
changed. All sorts of things change at times - and they do not all
change at once.


Bart

unread,
Aug 20, 2018, 6:18:19 AM8/20/18
to
On 20/08/2018 03:19, Keith Thompson wrote:
> Bart <b...@freeuk.com> writes:

>> But you can't just create a short and sweet "u64" typedef, which seems
>> the perfect application for it, because of those considerations above.
>
> Sure you can. Who's stopping you?

People say that. But if you try and use it, then they will say why you
shouldn't. At least when posting code here.

The answer, as I've learnt, is not to post code here.

(I haven't destroyed by C code generator drivers as I said; they were a
lot of work and are far too useful. But are now strictly for private
use. And they work beautifully despite the nay-sayers here.)

I've explained why I find it
> "u64" less readable than the alternatives,

And there you go...

but you don't care about
> my opinion, so why is that a problem?

> When /I/ write "u64" (not often as I prefer "word64") outside of C,
>
> ... you still insist on talking about it in comp.lang.c.

I'm showing what it is like when it is a total non-issue.

If you don't care for my language, look at C#, Go, D and Rust where such
a family of int types are also solidly defined and succinctly denoted.
(I'd include Java but I think that omits the unsigned versions.)

>> Can you see how that can make life simpler?
>
> Sure, using an unsigned integer type that's exactly 64 bits is slightly
> easier in your language than in C.

It's a LOT easier. And there is no one telling you it's a much better
idea to use 'unsigned long long int' despite there being doubts about
the actual width.

And you're willing to learn your
> own language, which is a big advantage over C for you.

In C, learning it is not the problem, it is figuring what denotation to
use. Some are long winded and imprecise. Others are shorter but need
"include <stdint.h>" (an annoyance when writing short throwaway code,
which is precisely when you need short forms), they have this ugly "_t"
suffix, and raise questions about the printf formats that need to be used.

Everyone knows this; it is still an annoyance.

If it was generally accepted that many people like to make their own
definitions which might be like i32, u64, or maybe int32, uint64, with
no questions asked, then that would be much better.

--
bart

Ben Bacarisse

unread,
Aug 20, 2018, 6:36:45 AM8/20/18
to
David Brown <david...@hesbynett.no> writes:
<snip>
> Eh, no. "int" has always been 32-bit in *nix,

!!! Surely you jest!

--
Ben.

Bart

unread,
Aug 20, 2018, 6:39:51 AM8/20/18
to
On 20/08/2018 10:24, David Brown wrote:
> On 20/08/18 00:15, Bart wrote:

>> That's C for you. You want an unsigned 64-bit type for your program;
>> specifying that should be the easy bit before you proceed to write the
>> actual code.
>
> When designing a programming language, you need to make certain
> decisions about types (let's stick to integer types here). The main
> choice is between types with tightly defined sizes and properties, and
> abstract types based on usage or loser properties. Tightly defined
> types are close to the hardware - such as int32_t, or types used in
> assembly. Python's integer type is an example of a very abstract type -
> it grows as necessary to suit the values used. C's standard integer
> types are a compromise - they are abstract and not tied to machine
> types, but can be implemented efficiently as though they were low-level
> types.

As in my reply to Keith, look at C#, D, Go, Rust and Java, which
although less machine-oriented than C, have a tidier set of types which
are more solidly defined and based on power-of-two widths.

In C, the nearest to a 64-bit unsigned type is unsigned long long int,
which is 64 bits or wider.

If you want it exact, you have to use uint64_t. But (on Mingw for
example), uint64_t is a typedef for unsigned long long int!

C's built-in types are /not/ solidly defined.

> A big problem with the C types is not caused by the language itself, but
> by users of the language - there are far too many C programmers who make
> unwarranted assumptions about the basic integer types instead of using
> the <stdint.h> types appropriately.

Why not take such assumptions and make them part of the language
standard? I notice stdint.h is fill of int typedefs with power-of-two
bit-widths, so at least it is acknowledging that such specific widths
are important.

>> u64 a := 18 billion billion
>
> That code is going to be non-portable across the Atlantic!

A billion is 1e9 everywhere now.

--
bart

David Brown

unread,
Aug 20, 2018, 6:59:13 AM8/20/18
to
It was not a jest - it was a brain fart. I believe /POSIX/ has always
required 32-bit (minimum) for int, but Unix did not.

Still the point is that different things have changed sizes over time,
and having them as typedefs meant that they could be changed mostly
independently.

David Brown

unread,
Aug 20, 2018, 8:43:39 AM8/20/18
to
On 20/08/18 12:39, Bart wrote:
> On 20/08/2018 10:24, David Brown wrote:
>> On 20/08/18 00:15, Bart wrote:
>
>>> That's C for you. You want an unsigned 64-bit type for your program;
>>> specifying that should be the easy bit before you proceed to write the
>>> actual code.
>>
>> When designing a programming language, you need to make certain
>> decisions about types (let's stick to integer types here). The main
>> choice is between types with tightly defined sizes and properties, and
>> abstract types based on usage or loser properties. Tightly defined
>> types are close to the hardware - such as int32_t, or types used in
>> assembly. Python's integer type is an example of a very abstract type -
>> it grows as necessary to suit the values used. C's standard integer
>> types are a compromise - they are abstract and not tied to machine
>> types, but can be implemented efficiently as though they were low-level
>> types.
>
> As in my reply to Keith, look at C#, D, Go, Rust and Java, which
> although less machine-oriented than C, have a tidier set of types which
> are more solidly defined and based on power-of-two widths.
>

When C was designed, power-of-two width sizes were not ubiquitous. Even
now, there are plenty of processors with different sizes. 24-bit sizes
are common on audio DSPs. 20-bit and 40-bit sizes are often found as
extended integer types on DSPs or as extended address sizes. 18-bit
sizes can be found in some soft processors, and there are cpu families
with all sorts of different sizes. These kinds of chips are mostly
programming in C (somewhat specialised programming, and often very
non-portable programming, but still basically C).

In designing a language /now/ for big processors - as all those
languages are - you'd fix sizes at 32-bit for your basic number type,
and other powers of 2 for bigger and smaller types. But that would not
be a language that replaces C. (I first looked at D some 10-15 years
ago. I thought it was an interesting language, but impractical for my
type of work because it required 32-bit integers.)

Of course you could argue that a good deal of the kind of programming
that is currently done in C, could be done in Go, Rust, or whatever,
using fixed size integers. That would be correct - and I could well
agree that they are often better choices (but /not/ because of the size
of integers - that is an almost irrelevant issue).

And if I were designing a language that actually replaced C, I would
probably define the equivalent of "int" as "int_fast16_t" - define the
fixed types first, and have the abstract types being
implementation-dependent typedefs for the most appropriate concrete
type. But I would also have very abstract types - a "num" type that had
no limits.

But we are not designing a new language - we are using an existing
language - C. And the types in standard C work fine.


> In C, the nearest to a 64-bit unsigned type is unsigned long long int,
> which is 64 bits or wider.
>
> If you want it exact, you have to use uint64_t. But (on Mingw for
> example), uint64_t is a typedef for unsigned long long int!

I would expect uint64_t to be defined that way on MinGW - anything else
would be a bit surprising. On 64-bit *nix, on the other hand, it might
be a typedef for "unsigned long" rather than "unsigned long long". And
- you'll love this one - on the 8-bit AVR gcc port it is:

typedef unsigned int uint64_t __attribute__((__mode__(__DI__)));

Of course it doesn't matter how an implementation chooses to define it -
it merely matters that it works as the standards dictate.

>
> C's built-in types are /not/ solidly defined.

They are somewhat abstract, yes. There is nothing wrong with that - it
has slightly different pros and cons compared to more concrete types.

>
>> A big problem with the C types is not caused by the language itself, but
>> by users of the language - there are far too many C programmers who make
>> unwarranted assumptions about the basic integer types instead of using
>> the <stdint.h> types appropriately.
>
> Why not take such assumptions and make them part of the language
> standard? I notice stdint.h is fill of int typedefs with power-of-two
> bit-widths, so at least it is acknowledging that such specific widths
> are important.

You can't retroactively change the definition of language features that
have been in use for decades! We are not talking about a private little
language for one user here, we are talking about a language with
/enormous/ legacy. Backwards compatibility is the linchpin of C - it is
its greatest strength, and greatest weakness.

The best C can do here is exactly what it /did/ do in C99 - see that
people had need of fixed size types, and create them in <stdint.h>.
There is not a lot that can be done about people who continue to get
things wrong about the C basic integer types, except try to correct them
and educate them when the point arises.

(Note about terminology - the five types "char", "short", "int", "long"
and "long long", along with their unsigned versions and "signed char",
are actually called the "standard integer types". I have been referring
to them as "C basic integer types" to make clear the distinction from
the <stdint.h> types which are also integer types defined in the standard.)

>
>>> u64 a := 18 billion billion
>>
>> That code is going to be non-portable across the Atlantic!
>
> A billion is 1e9 everywhere now.
>

Except in the places where it 10^12. It has been 50 years since the
short scale became mostly standard in the UK, but in most of Europe and
about half the world, long scale is the standard. "Billion" in
Norwegian is 10^12 - it is not even spelt differently. I suppose it's
fine to use it in a one-person language, but it would be silly to invite
confusion if it were more wide-scale. (IIRC your language has digit
separators - a fine idea - which makes such names unnecessary.)


fir

unread,
Aug 20, 2018, 9:04:45 AM8/20/18
to
>
> A billion is 1e9 everywhere now.
>

in poland bilion is 1e12,
( 1e9 is miliard, 1e15 is biliard)

Ben Bacarisse

unread,
Aug 20, 2018, 9:06:00 AM8/20/18
to
Bart <b...@freeuk.com> writes:

> In C, the nearest to a 64-bit unsigned type is unsigned long long int,
> which is 64 bits or wider.

No, the nearest is uint64_t. C does not go so far as to insist that
this type be emulated on hardware that does not have it, but you don't
care about such hardware, surely? In what situations is uint64_t not
suitable for whatever it you want this type for?

> If you want it exact, you have to use uint64_t. But (on Mingw for
> example), uint64_t is a typedef for unsigned long long int!

What's the "but" and the "!"? Is there there that suggests this
implementation is not providing the type as it is supposed to?

--
Ben.

fir

unread,
Aug 20, 2018, 9:41:51 AM8/20/18
to
as base is 10 it should be rather 1d12 not 1e12 (coz e is euler number in mathematics 2.71828..)

1b64 is btw about 18.4 trillions, round 16 exbibytes (those two scales are not comformant in names 16 * 10bits^6, 16MMM (16-mega-maga-maga or 16GG 16-giga-giga)

Bart

unread,
Aug 20, 2018, 9:45:37 AM8/20/18
to
Some units of measure are famously different, for example 1 UK pint
versus 1 US pint (ie. 20 versus 16 fluid ounces, and fluid ounces are
themselves slightly different).

The analogy with uint64_t would be like defining a metric litre in terms
of ambiguously-sized pints rather than something more universally constant.

That's the reason for the exclamation mark.

(Not all implementation will do that. Some might have an actual built-in
type for unsigned 64 bits which can be used to define uint64_t, but
stdint.h does commonly seem to define uint64_t etc exactly as I said.

If you go poking around inside Linux header files, things get even more
confusing. Look for example in stdint.h, sys/types.h and bits/types.h
where you lose track of what is being defined in terms of what.)

--
bart

David Brown

unread,
Aug 20, 2018, 10:21:54 AM8/20/18
to
On 20/08/18 15:45, Bart wrote:
> On 20/08/2018 14:05, Ben Bacarisse wrote:
>> Bart <b...@freeuk.com> writes:
>>
>>> In C, the nearest to a 64-bit unsigned type is unsigned long long int,
>>> which is 64 bits or wider.
>>
>> No, the nearest is uint64_t. C does not go so far as to insist that
>> this type be emulated on hardware that does not have it, but you don't
>> care about such hardware, surely? In what situations is uint64_t not
>> suitable for whatever it you want this type for?
>>
>>> If you want it exact, you have to use uint64_t. But (on Mingw for
>>> example), uint64_t is a typedef for unsigned long long int!
>>
>> What's the "but" and the "!"? Is there there that suggests this
>> implementation is not providing the type as it is supposed to?
>
> Some units of measure are famously different, for example 1 UK pint
> versus 1 US pint (ie. 20 versus 16 fluid ounces, and fluid ounces are
> themselves slightly different).
>
> The analogy with uint64_t would be like defining a metric litre in terms
> of ambiguously-sized pints rather than something more universally constant.
>
> That's the reason for the exclamation mark.

Ah, so you are saying you don't know how uint64_t is defined - is that
it? Let me help you out, since reading a couple of paragraphs from the
standards is likely to be deemed too much effort.

An implementation (C99 or above) can define uint64_t in <stdint.h> if
and only if the type is exactly 64 bits, with no padding (and therefore
a maximum value of 2^64 - 1). int64_t must be a signed integer type
with exactly 64 bits and no padding, and two's complement
representation. If implementation can define both these types, it must
do so - if it can't define one of them, it may not define either.

This means that /if/ a compiler has a "uint64_t" type in <stdint.h>,
then it is precisely 64 bits. There is no ambiguity, no question of
interpretation or complexity - it is as simple and concrete as you can
possibly get in any language. The same applies to "int64_t", and the
other sizes 8, 16 and 32 bit.

If an implementation has, say, a 72-bit "unsigned long long int" and
does not have an alternative suitable 64-bit type, then it simply cannot
define uint64_t.

>
> (Not all implementation will do that. Some might have an actual built-in
> type for unsigned 64 bits which can be used to define uint64_t, but
> stdint.h does commonly seem to define uint64_t etc exactly as I said.
>
> If you go poking around inside Linux header files, things get even more
> confusing. Look for example in stdint.h, sys/types.h and bits/types.h
> where you lose track of what is being defined in terms of what.)
>

Open up the keyboard of your computer. (If it doesn't have screws, try
an axe.) Poke around a bit. Can you figure out exactly what it is
doing? Are you sure that when you press the "A" key, you'll get a
letter "A" out of it? If /you/ can't verify that by a quick look then
clearly you can't trust it and should stop typing.

Or perhaps you could just assume that other people know what they are
doing even if you don't, and the <stdint.h> types work they way they are
defined and documented to work?

Just for fun, though, I /did/ have a poke at "stdint.h" on my Linux
system (details will vary slightly according to the age of the system
and the processor). After the copyright notices there is a section
defining the exact-width integer types. It is perfectly simple, and has
this:

#if __WORDSIZE == 64
typedef unsigned long int uint64_t;
#else
__extension__
typedef unsigned long long int uint64_t;
#endif

What is __WORDSIZE? Well, in case it is not blindingly obvious, you can
see "#include <bits/wordsize.h>" at the top of the file. This leads to
a file less than a dozen lines long defining __WORDSIZE as 64 or 32
depending on the target processor (x86_64 or x86). It means everything
will work out fine, with the right sizes and typedefs, regardless of the
target you ask for, the C standard you choose, or anything else.

How can you possibly find this confusing? It cannot take more than 30
seconds to look at those files and see /exactly/ how "uint64_t" is
defined on this platform. How can you lose track here?


There are lots of other things that /are/ complicated in the standard
headers in a typical Linux installation. Part of this is that the C
standard headers and the OS standard headers are mixed together, with
some of these headers coming from the compiler, some from the C standard
library, and some from the OS. (I think it would have been neater if
these were separated, but it's /way/ to late for that.) And it's all
designed for these headers to support a very wide range of compilers, C
standards, target OS's, host OS's, processors, processor variations,
etc. I am confident that there is also a certain amount of "historical
baggage" there, with parts that are no longer relevant, but which do no
serious harm.


Ben Bacarisse

unread,
Aug 20, 2018, 11:18:43 AM8/20/18
to
Bart <b...@freeuk.com> writes:

> On 20/08/2018 14:05, Ben Bacarisse wrote:
>> Bart <b...@freeuk.com> writes:
>>
>>> In C, the nearest to a 64-bit unsigned type is unsigned long long int,
>>> which is 64 bits or wider.
>>
>> No, the nearest is uint64_t. C does not go so far as to insist that
>> this type be emulated on hardware that does not have it, but you don't
>> care about such hardware, surely? In what situations is uint64_t not
>> suitable for whatever it you want this type for?
>>
>>> If you want it exact, you have to use uint64_t. But (on Mingw for
>>> example), uint64_t is a typedef for unsigned long long int!
>>
>> What's the "but" and the "!"? Is there there that suggests this
>> implementation is not providing the type as it is supposed to?
>
> Some units of measure are famously different, for example 1 UK pint
> versus 1 US pint (ie. 20 versus 16 fluid ounces, and fluid ounces are
> themselves slightly different).
>
> The analogy with uint64_t would be like defining a metric litre in
> terms of ambiguously-sized pints rather than something more
> universally constant.

No, that's not the analogy. Mingw is a C implementation. unsigned long
long int to known to that implementation. (If you insist on an analogy,
Mingw is the landlord of a UK pub asking himself for a pint.)

> That's the reason for the exclamation mark.

"But" suggests a problem and the "!" suggests the problem is a surprise.
I don't see a problem and I don't see anything to be surprised about.

--
Ben.

Bart

unread,
Aug 20, 2018, 11:21:17 AM8/20/18
to
Have another look. On mine, int64_t (not uint64_t) depends on __int8_t
being defined. That stdint says there is some overlap with sys/types.h.
Inside the latter, there are also definitions for int64_t. And one for
u_int64_t (with an extra underscore).

sys/types.h also includes bits/types.h. Inside that one, there are
definitions for __int64_t, and __uint64_t.

There is also __uquad_t which is 64 bits. And __U64_TYPE defined on top
of __uquad_t (or sometimes on top of unsigned long int).

How many variations of a 64-bit type does one language need?

BTW, __uid_t is defined in terms of __UID_T_TYPE (some sort of typedef I
think, but it uses '_STD_TYPE' in place of typedef).

__UID_T_TYPE is defined in terms of __U32_TYPE (using #define not
typedef). And __U32_TYPE in terms of unsigned int, again using #define.
I /assume/ that int in this context is 32 bits wide (so where it really
matters, the language chooses to be cagey).


(My non-C compiler was intended to support all combinations of
Windows/Linux, 32/64 bits, and x86/x64/ARM, with just one short internal
table of type definitions.

User programs see nothing of that except, in the docs, the list of int
types, which in short form are i8, i16, i32, i64, i128, u8, u16, u32,
u64, u128 when specifying exact widths.

Can you get any simpler and tidier than that?)

--
bart

Keith Thompson

unread,
Aug 20, 2018, 12:09:22 PM8/20/18
to
luser droog <luser...@gmail.com> writes:
[...]
> K&R to the rescue!
>
> f(a, b, c) unsigned long long a, b, c; {

Sure, if you don't mind `f(10, 20, 30)` having undefined behavior.

Keith Thompson

unread,
Aug 20, 2018, 12:11:56 PM8/20/18
to
David Brown <david...@hesbynett.no> writes:
> On 20/08/18 00:06, Anton Shepelev wrote:
>> David Brown:
>>> But consider the possibilities for what "u64" means:
[...]
>> From my understanding of C philosophy, it cannot mean
>> anything but a 64-bit unsigned integer.
>
> Unfortunately, you are wrong.
>
> Certainly when "u64" is used in the context of a type, it /looks/ like
> it would be a 64-bit unsigned integer. And in most cases, it will be one.
>
> But there is no requirement for it to be one. There is no law in C
> programming that says the programmer is smart and does the right thing -
> indeed, using "u64" when there are standard types for the purpose makes
> me suspicious about how good the programmer is. (The exception, of
> course, is for pre-C99 code, before uint64_t.)

And if you're using two different libraries, both of which define "u64"
in headers, your program might not even compile. uint64_t solves this
because it's defined in one place. (Actually two, <stdint.h> and
<inttypes.h>, but that's handled.)

Keith Thompson

unread,
Aug 20, 2018, 12:15:57 PM8/20/18
to
David Brown <david...@hesbynett.no> writes:
[...]
> printf with the <stdint.h> types is certainly ugly. Workable, but ugly.
> A better system would involve a replacement of printf (perhaps with a
> generic variadic macro).
>
> For my own use, I will typically just use the specifiers like "%lu" in
> printf along with types like uint32_t. It is not entirely portable, but
> it is portable enough for me.

It's not portable enough for me. I would use "%lu" *and* cast the
argument to unsigned long.

> A critical point here is that if I use
> the code in a situation where it is not valid (perhaps uint32_t is
> "unsigned int" rather than "unsigned long int"), I will get compile-time
> errors. This means it is safe non-portability - unlike, say, messing
> with unaligned accesses that are only detectable at runtime testing.

No diagnostic is guaranteed. gcc happens to warn about incorrect format
strings, but other compilers may or may not.

[...]

Bart

unread,
Aug 20, 2018, 1:04:22 PM8/20/18
to
On 20/08/2018 17:11, Keith Thompson wrote:
> David Brown <david...@hesbynett.no> writes:
>> On 20/08/18 00:06, Anton Shepelev wrote:
>>> David Brown:
>>>> But consider the possibilities for what "u64" means:
> [...]
>>> From my understanding of C philosophy, it cannot mean
>>> anything but a 64-bit unsigned integer.
>>
>> Unfortunately, you are wrong.
>>
>> Certainly when "u64" is used in the context of a type, it /looks/ like
>> it would be a 64-bit unsigned integer. And in most cases, it will be one.
>>
>> But there is no requirement for it to be one. There is no law in C
>> programming that says the programmer is smart and does the right thing -
>> indeed, using "u64" when there are standard types for the purpose makes
>> me suspicious about how good the programmer is. (The exception, of
>> course, is for pre-C99 code, before uint64_t.)
>
> And if you're using two different libraries, both of which define "u64"
> in headers, your program might not even compile. uint64_t solves this
> because it's defined in one place. (Actually two, <stdint.h> and
> <inttypes.h>, but that's handled.)

That's an argument against using any typedefs, macros, struct tags, enum
tags, enum names, function names and variable names, if they are in
headers which can be used by other programs.

Possibly even header names.

In particular, with much touted macros such as MIN and MAX.

--
bart

Scott Lurndal

unread,
Aug 20, 2018, 1:25:37 PM8/20/18
to
Bart <b...@freeuk.com> writes:
>On 20/08/2018 15:21, David Brown wrote:

>>
>> How can you possibly find this confusing? It cannot take more than 30
>> seconds to look at those files and see /exactly/ how "uint64_t" is
>> defined on this platform. How can you lose track here?
>
>Have another look. On mine, int64_t (not uint64_t) depends on __int8_t
>being defined. That stdint says there is some overlap with sys/types.h.
>Inside the latter, there are also definitions for int64_t. And one for
>u_int64_t (with an extra underscore).

[elided internal compiler implementation of header files]

>User programs see nothing of that except, in the docs, the list of int
>types, which in short form are i8, i16, i32, i64, i128, u8, u16, u32,
>u64, u128 when specifying exact widths.

And C programmers (and programs) only see uint64_t, et. al (when the
correct header file is included).

Not a single one of them cares what is in stdint.h, or how it actually
defines the type; they ask for a 64-bit unsigned integer and that's
what they get.

Bart

unread,
Aug 20, 2018, 2:45:20 PM8/20/18
to
Sure. That's why you never see people using variations such as i32 or u64.


And uint64_t etc are of course completely free of problems, such as:

* Whether or not uint64_t* is compatible with long* (sometime is
is, sometimes not)

* What print format to use for int64_t, whether %ld or %lld

* What suffix do you add on the end of a integer constant to make
it int64_t, whether L or LL

--
bart

Scott Lurndal

unread,
Aug 20, 2018, 3:07:59 PM8/20/18
to
Bart <b...@freeuk.com> writes:
>On 20/08/2018 18:25, Scott Lurndal wrote:

>> Not a single one of them cares what is in stdint.h, or how it actually
>> defines the type; they ask for a 64-bit unsigned integer and that's
>> what they get.
>
>Sure. That's why you never see people using variations such as i32 or u64.

Non sequitor, for sure.

>
>
>And uint64_t etc are of course completely free of problems, such as:
>
>* Whether or not uint64_t* is compatible with long* (sometime is
> is, sometimes not)

In C, uint64_t is a 64-bit unsigned type. C simply requires that
long can at a minimum, cover the range of integers supported by
the int type.

No C programmer cares if uint64_t is compatible with long, so you've
created another strawman.

>
>* What print format to use for int64_t, whether %ld or %lld

Clearly, PRIx64 is the correct printf format string for uint64_t,
where x is one of {i, d, o, u, x, X}. See
<inttypes.h>

>
>* What suffix do you add on the end of a integer constant to make
> it int64_t, whether L or LL

If in doubt, cast LL. In reality use either one. The compiler will
complain if you screw up.

Ben Bacarisse

unread,
Aug 20, 2018, 3:08:03 PM8/20/18
to
Bart <b...@freeuk.com> writes:

> On 20/08/2018 18:25, Scott Lurndal wrote:
<snip>
>> And C programmers (and programs) only see uint64_t, et. al (when the
>> correct header file is included).
>>
>> Not a single one of them cares what is in stdint.h, or how it actually
>> defines the type; they ask for a 64-bit unsigned integer and that's
>> what they get.
>
> Sure. That's why you never see people using variations such as i32 or
> u64.

Sarcasm is not usually effective in technical discussions. Except to
end them!

> And uint64_t etc are of course completely free of problems, such as:
>
> * Whether or not uint64_t* is compatible with long* (sometime is
> is, sometimes not)

That's a problem with any typedef, other than one that is obviously a
synonym. And that incompatibility is there for a reason. You should
not be accessing uint64_t values through unsigned long * pointers.
Sometimes you have no choice, but you definitely want that flagged as
dangerous and so requiring a cast is not bad thing.

> * What print format to use for int64_t, whether %ld or %lld

You use PRId64 (example later).

> * What suffix do you add on the end of a integer constant to make
> it int64_t, whether L or LL

You write INT64_C(42).

printf("Answer = %"PRId64"\n", INT64_C(42));

--
Ben.

Keith Thompson

unread,
Aug 20, 2018, 3:25:26 PM8/20/18
to
sc...@slp53.sl.home (Scott Lurndal) writes:
[...]
> In C, uint64_t is a 64-bit unsigned type. C simply requires that
> long can at a minimum, cover the range of integers supported by
> the int type.

And that it's at least 32 bits wide (and int is a least 16 bits wide).

(POSIX additionally requires int to be at least 32 bits wide.)

Anton Shepelev

unread,
Aug 20, 2018, 4:06:48 PM8/20/18
to
David Kleinecke to Rosario:

> > f(u64 a, u64 b, u64 c)
>
> You could use my
> f(U3 a, U3 b, U3 c)
>
> Everybody hates the idea.

I don't, although I prefer Bart's u64. What does the number
after 'U' mean -- a power of two times eight, i.e.:

8 * 2^3 = 64 ?

--
() ascii ribbon campaign -- against html e-mail
/\ http://preview.tinyurl.com/qcy6mjc [archived]

Ben Bacarisse

unread,
Aug 20, 2018, 4:35:54 PM8/20/18
to
sc...@slp53.sl.home (Scott Lurndal) writes:

> Bart <b...@freeuk.com> writes:
<snip>
>>* What print format to use for int64_t, whether %ld or %lld
>
> Clearly, PRIx64 is the correct printf format string for uint64_t,
> where x is one of {i, d, o, u, x, X}. See
> <inttypes.h>

Technically, only o, u, x and X for uint64_t and only i or d for
int64_t, but I say "technically" because it's so very unlikely to be a
problem.

--
Ben.

David Brown

unread,
Aug 20, 2018, 5:40:51 PM8/20/18
to
You are nearly right - it depends on __int8_t (or, more accurately,
"__int8_t_defined") /not/ being defined. And it is not defined earlier
in <stdint.h> or the headers involved, as confirmed by a very quick check.

This guard check is there /precisely/ because the type might be defined
in a different header as well. The same thing is done in most
implementations of libraries and headers to cover duplicate definitions.

(It took about 10 seconds to use a grep to see that __int8_t_defined is
mentioned only in stdint.h and sys/types.h. In a complicated situation,
searching and manual checking would of course take longer, but here it
is very simple.)


These headers would have been a little simpler if C had allowed
duplicate identical declarations and definitions of typedefs, macros,
etc. It doesn't - so library header writers need to make a little extra
effort. Users - normal programmers - don't care. None of this matters
in the slightest to programmers that are not involved in developing the
standard library, or the OS headers. If you want a 64-bit fixed width
type, use #include <stdint.h> or #include <inttypes.h>, and uint64_t is
there and does what you want. It is /irrelevant/ how many other headers
are needed, or where it is defined. (What /is/ relevant is that if you
use another header that itself needs the type, it gets defined once and
only once and everything works.)



>
> sys/types.h also includes bits/types.h. Inside that one, there are
> definitions for __int64_t, and __uint64_t.
>
> There is also __uquad_t which is 64 bits. And __U64_TYPE defined on top
> of __uquad_t (or sometimes on top of unsigned long int).
>
> How many variations of a 64-bit type does one language need?

Several, apparently. I don't know why the glibc library, and the Linux
headers, need so many - but I am sure there are reasons for it. (Some
are mentioned in comments.) One reason is that if some headers need
64-bit types, then they have to use ones with reserved names rather than
the <stdint.h> names - because the user is allowed to define those names
themselves.

>
> BTW, __uid_t is defined in terms of __UID_T_TYPE (some sort of typedef I
> think, but it uses '_STD_TYPE' in place of typedef).

_STD_TYPE is a macro used to allow 64-bit types to be available even if
you are compiling for 32-bit systems in C90 mode, using the
"__extension__" gcc extension (otherwise there would be no 64-bit types
available). It is all about giving you the features needed no matter
what choices you make about compilation modes.

>
> __UID_T_TYPE is defined in terms of __U32_TYPE (using #define not
> typedef). And __U32_TYPE in terms of unsigned int, again using #define.
> I /assume/ that int in this context is 32 bits wide (so where it really
> matters, the language chooses to be cagey).
>
>
> (My non-C compiler was intended to support all combinations of
> Windows/Linux, 32/64 bits, and x86/x64/ARM, with just one short internal
> table of type definitions.

gcc, on the other hands, support many dozens of OS's, 8-bit, 16-bit,
32-bit and 64-bit processors from several dozen different families.
glibc supports a fair number of OS's - and supports them with lots of
standard and extra functions - and large numbers of processors of
different sizes, as well as a number of compilers. Similarly, the Linux
kernel headers work with various compilers, lots of C libraries, piles
of processor types. And they all work, with different versions of
libraries, compilers, and OS's.

It's hardly surprising that this means a hugely more complex set of
headers, types and macros.

>
> User programs see nothing of that except, in the docs, the list of int
> types, which in short form are i8, i16, i32, i64, i128, u8, u16, u32,
> u64, u128 when specifying exact widths.
>
> Can you get any simpler and tidier than that?)
>

You could have better names for your types. But yes, you have a simple
and limited set of types for simple usage.

David Brown

unread,
Aug 20, 2018, 5:50:07 PM8/20/18
to
Usually you don't need to bother with suffixes at all. You write your
integer constant, and it gets a suitable type automatically. When you
assign it to variable, use it for initialisation, use it in an
expression, etc., it gets converted.

David Kleinecke

unread,
Aug 20, 2018, 5:58:45 PM8/20/18
to
On Monday, August 20, 2018 at 1:06:48 PM UTC-7, Anton Shepelev wrote:
> David Kleinecke to Rosario:
>
> > > f(u64 a, u64 b, u64 c)
> >
> > You could use my
> > f(U3 a, U3 b, U3 c)
> >
> > Everybody hates the idea.
>
> I don't, although I prefer Bart's u64. What does the number
> after 'U' mean -- a power of two times eight, i.e.:
>
> 8 * 2^3 = 64 ?

U0 1 byte
U1 2 bytes
U2 4 bytes
U3 8 bytes

signed

S0 1 byte
S1 2 bytes
S2 4 bytes
S3 8 bytes



David Brown

unread,
Aug 20, 2018, 5:59:34 PM8/20/18
to
On 20/08/18 18:15, Keith Thompson wrote:
> David Brown <david...@hesbynett.no> writes:
> [...]
>> printf with the <stdint.h> types is certainly ugly. Workable, but ugly.
>> A better system would involve a replacement of printf (perhaps with a
>> generic variadic macro).
>>
>> For my own use, I will typically just use the specifiers like "%lu" in
>> printf along with types like uint32_t. It is not entirely portable, but
>> it is portable enough for me.
>
> It's not portable enough for me. I would use "%lu" *and* cast the
> argument to unsigned long.

Sure, but you need more portability than me. Most of my code is tightly
tied to a specific platform. It is generally sufficient for me that
code would fail to compile in an obvious way if the assumptions I make
about the target don't hold.

>
>> A critical point here is that if I use
>> the code in a situation where it is not valid (perhaps uint32_t is
>> "unsigned int" rather than "unsigned long int"), I will get compile-time
>> errors. This means it is safe non-portability - unlike, say, messing
>> with unaligned accesses that are only detectable at runtime testing.
>
> No diagnostic is guaranteed. gcc happens to warn about incorrect format
> strings, but other compilers may or may not.
>

Yes, but I would not use other compilers (or at least, significantly
weaker or incompatible compilers) for the code. Again, I don't need
such wide portability. And a non-gcc compatible compiler trying my code
is sure to baulk at some gcc'ism long before failing to give an error
about printf formatting!

I am /not/ suggesting this applies to everyone - I'm just saying what
/I/ do.

Ian Collins

unread,
Aug 20, 2018, 6:04:53 PM8/20/18
to
Lucky you, we have to support X86 Linux, OSX, Windows while trying to
get the best out of a 32 bit ARM/Linux controller!

--
Ian.

David Brown

unread,
Aug 20, 2018, 6:36:27 PM8/20/18
to
Well, some of my code has to run on Linux too, for testing - but it's
all gcc. And it's all got easier since we moved more to ARM
microcontrollers away from smaller devices for most of our systems.

Bart

unread,
Aug 20, 2018, 6:43:37 PM8/20/18
to
On 20/08/2018 22:40, David Brown wrote:
> On 20/08/18 17:21, Bart wrote:

>> (My non-C compiler was intended to support all combinations of
>> Windows/Linux, 32/64 bits, and x86/x64/ARM, with just one short
>> internal table of type definitions.
>
> gcc, on the other hands, support many dozens of OS's, 8-bit, 16-bit,
> 32-bit and 64-bit processors from several dozen different families.

That doesn't really affect the basic types available in a language.
(I've supported 8 and 16-bit targets before, and the difference might be
that 'int' is 16-bits rather than 32, with poor support for wider types.).

C has made a hash of that and it shows, especially when stdint.h with
its macros like INT64_C and PRd64 has been so obviously bolted on.

> glibc supports a fair number of OS's - and supports them with lots of
> standard and extra functions - and large numbers of processors of
> different sizes, as well as a number of compilers.  Similarly, the Linux
> kernel headers work with various compilers, lots of C libraries, piles
> of processor types.  And they all work, with different versions of
> libraries, compilers, and OS's.
>
> It's hardly surprising that this means a hugely more complex set of
> headers, types and macros.

It has done in the case of C, but do you also find such systems of
headers, types and macros with other languages. I bet only C, and maybe
C++, requires a special header (stdint.h) just to use fixed width types.

>> Can you get any simpler and tidier than that?)
>>
>
> You could have better names for your types.

Those are short forms. Most of the time I use 'int', 'byte' or 'word'
(i32, u8 or u32).

> But yes, you have a simple
> and limited set of types for simple usage.

What would be advanced usage? But whatever it is, you want the basic use
of simple types to be effortless.

--
bart

Scott Lurndal

unread,
Aug 20, 2018, 6:59:09 PM8/20/18
to
However, consider this:

uint64_t fred = 0x1531 << 48;

suffix is definitely required to get correct behavior.

Keith Thompson

unread,
Aug 20, 2018, 7:05:55 PM8/20/18
to
David Brown <david...@hesbynett.no> writes:
> On 20/08/18 18:15, Keith Thompson wrote:
>> David Brown <david...@hesbynett.no> writes:
>> [...]
>>> printf with the <stdint.h> types is certainly ugly. Workable, but ugly.
>>> A better system would involve a replacement of printf (perhaps with a
>>> generic variadic macro).
>>>
>>> For my own use, I will typically just use the specifiers like "%lu" in
>>> printf along with types like uint32_t. It is not entirely portable, but
>>> it is portable enough for me.
>>
>> It's not portable enough for me. I would use "%lu" *and* cast the
>> argument to unsigned long.
>
> Sure, but you need more portability than me.

I don't think I *need* more portability that you do (I can safely
assume either gcc or clang for almost all the code I work on.).
But I apparently *want* more portability that you do -- especially
when I can get it for a minimal cost.

And I don't necessarily control how the compiler is invoked or where any
diagnostics will end up.

> Most of my code is tightly
> tied to a specific platform. It is generally sufficient for me that
> code would fail to compile in an obvious way if the assumptions I make
> about the target don't hold.

This:
uint32_t n = 42;
printf("%lu\n", n);
won't fail to compile unless you compile with "-Werror" or equivalent.

On the other hand, if I write this:
uint32_t n = 42;
printf("%lu\n", (unsigned long)n);
I don't have to think about whether the compiler I'm using will warn
about it.

I find it easier to write portable code than to figure out what I can
get away with.

Ian Collins

unread,
Aug 20, 2018, 7:25:47 PM8/20/18
to
On 21/08/18 10:43, Bart wrote:
> On 20/08/2018 22:40, David Brown wrote:
>>
>> It's hardly surprising that this means a hugely more complex set of
>> headers, types and macros.
>
> It has done in the case of C, but do you also find such systems of
> headers, types and macros with other languages. I bet only C, and maybe
> C++, requires a special header (stdint.h) just to use fixed width types.

If you can find another 40+ year old language that supports as many
current and legacy targets as C, you could do a comparison, couldn't you?

--
Ian.

David Brown

unread,
Aug 20, 2018, 7:45:48 PM8/20/18
to
Yes, or a cast, or an intermediary variable or const. But /usually/ you
don't need to bother with suffixes.

David Brown

unread,
Aug 20, 2018, 7:50:33 PM8/20/18
to
On 21/08/18 00:43, Bart wrote:
> On 20/08/2018 22:40, David Brown wrote:
>> On 20/08/18 17:21, Bart wrote:
>
>>> (My non-C compiler was intended to support all combinations of
>>> Windows/Linux, 32/64 bits, and x86/x64/ARM, with just one short
>>> internal table of type definitions.
>>
>> gcc, on the other hands, support many dozens of OS's, 8-bit, 16-bit,
>> 32-bit and 64-bit processors from several dozen different families.
>
> That doesn't really affect the basic types available in a language.
> (I've supported 8 and 16-bit targets before, and the difference might be
> that 'int' is 16-bits rather than 32, with poor support for wider types.).

I was talking about the complications of the headers on your system.

>
> C has made a hash of that and it shows, especially when stdint.h with
> its macros like INT64_C and PRd64 has been so obviously bolted on.

These were added in C99, extending the language from C90 but keeping
maximal backwards compatibility (including for printf). Yes, they were
"bolted on".

>
>> glibc supports a fair number of OS's - and supports them with lots of
>> standard and extra functions - and large numbers of processors of
>> different sizes, as well as a number of compilers.  Similarly, the
>> Linux kernel headers work with various compilers, lots of C libraries,
>> piles of processor types.  And they all work, with different versions
>> of libraries, compilers, and OS's.
>>
>> It's hardly surprising that this means a hugely more complex set of
>> headers, types and macros.
>
> It has done in the case of C, but do you also find such systems of
> headers, types and macros with other languages. I bet only C, and maybe
> C++, requires a special header (stdint.h) just to use fixed width types.
>

Only C (and C++) have such extensive support. There are no comparable
languages.

(Note that I am not saying it is a great thing that we have all these
headers with all their conditional compilations - I am tying to explain
some of the reasons /why/ we have them, why the /result/ is good even if
the details are complicated, and why the details don't matter to anyone
who uses them.)

>>> Can you get any simpler and tidier than that?)
>>>
>>
>> You could have better names for your types.
>
> Those are short forms. Most of the time I use 'int', 'byte' or 'word'
> (i32, u8 or u32).
>
>> But yes, you have a simple and limited set of types for simple usage.
>
> What would be advanced usage? But whatever it is, you want the basic use
> of simple types to be effortless.
>

And in C, it is effortless.

David Brown

unread,
Aug 20, 2018, 7:54:24 PM8/20/18
to
On 21/08/18 01:05, Keith Thompson wrote:
> David Brown <david...@hesbynett.no> writes:
>> On 20/08/18 18:15, Keith Thompson wrote:
>>> David Brown <david...@hesbynett.no> writes:
>>> [...]
>>>> printf with the <stdint.h> types is certainly ugly. Workable, but ugly.
>>>> A better system would involve a replacement of printf (perhaps with a
>>>> generic variadic macro).
>>>>
>>>> For my own use, I will typically just use the specifiers like "%lu" in
>>>> printf along with types like uint32_t. It is not entirely portable, but
>>>> it is portable enough for me.
>>>
>>> It's not portable enough for me. I would use "%lu" *and* cast the
>>> argument to unsigned long.
>>
>> Sure, but you need more portability than me.
>
> I don't think I *need* more portability that you do (I can safely
> assume either gcc or clang for almost all the code I work on.).
> But I apparently *want* more portability that you do -- especially
> when I can get it for a minimal cost.
>
> And I don't necessarily control how the compiler is invoked or where any
> diagnostics will end up.
>

Ah, I control that too. I even control /exactly/ which compiler and
library versions are used. (I code so that I /could/ change these
without problems - except due to using new features in newer tools - but
I still keep toolchains absolutely fixed for projects.)

>> Most of my code is tightly
>> tied to a specific platform. It is generally sufficient for me that
>> code would fail to compile in an obvious way if the assumptions I make
>> about the target don't hold.
>
> This:
> uint32_t n = 42;
> printf("%lu\n", n);
> won't fail to compile unless you compile with "-Werror" or equivalent.
>

And I always have -Werror enabled, once a project has got past its
initial stages.

> On the other hand, if I write this:
> uint32_t n = 42;
> printf("%lu\n", (unsigned long)n);
> I don't have to think about whether the compiler I'm using will warn
> about it.
>
> I find it easier to write portable code than to figure out what I can
> get away with.
>

I know what I can get away with on the platforms I use. Again, this is
just how /I/ do it - I fully appreciate the way you do it. In fact,
this particular issue is seldom very relevant for me because I rarely
use printf (or friends) in my code - it's just not that kind of programming.

Bart

unread,
Aug 20, 2018, 8:13:24 PM8/20/18
to
On 21/08/2018 00:50, David Brown wrote:
> On 21/08/18 00:43, Bart wrote:
>> On 20/08/2018 22:40, David Brown wrote:
>>> On 20/08/18 17:21, Bart wrote:
>>
>>>> (My non-C compiler was intended to support all combinations of
>>>> Windows/Linux, 32/64 bits, and x86/x64/ARM, with just one short
>>>> internal table of type definitions.
>>>
>>> gcc, on the other hands, support many dozens of OS's, 8-bit, 16-bit,
>>> 32-bit and 64-bit processors from several dozen different families.
>>
>> That doesn't really affect the basic types available in a language.
>> (I've supported 8 and 16-bit targets before, and the difference might
>> be that 'int' is 16-bits rather than 32, with poor support for wider
>> types.).
>
> I was talking about the complications of the headers on your system.

I still don't get why it needs to involve headers.

I can only guess that a dependency on such headers has grown so much
that it's not possible to get away from that model.

Try compiling this complete module:

char a;
short b;
int c;
long d;
long long e;

on a compiler where int is 24 bits. Will it compile?

If so then all those headers are unnecessary.


>> What would be advanced usage? But whatever it is, you want the basic
>> use of simple types to be effortless.
>>
>
> And in C, it is effortless.

Yes, apart from a list of things you have to, be aware of or need to
sort out, that doesn't apply in any other language.

You still haven't explained 'advanced usage'.

--
bart

David Brown

unread,
Aug 21, 2018, 3:03:51 AM8/21/18
to
On 21/08/18 02:13, Bart wrote:
> On 21/08/2018 00:50, David Brown wrote:
>> On 21/08/18 00:43, Bart wrote:
>>> On 20/08/2018 22:40, David Brown wrote:
>>>> On 20/08/18 17:21, Bart wrote:
>>>
>>>>> (My non-C compiler was intended to support all combinations of
>>>>> Windows/Linux, 32/64 bits, and x86/x64/ARM, with just one short
>>>>> internal table of type definitions.
>>>>
>>>> gcc, on the other hands, support many dozens of OS's, 8-bit, 16-bit,
>>>> 32-bit and 64-bit processors from several dozen different families.
>>>
>>> That doesn't really affect the basic types available in a language.
>>> (I've supported 8 and 16-bit targets before, and the difference might
>>> be that 'int' is 16-bits rather than 32, with poor support for wider
>>> types.).
>>
>> I was talking about the complications of the headers on your system.
>
> I still don't get why it needs to involve headers.

C (and especially C++) take that attitude that you don't add new things
to the core language if you can write them in code - and thus put them
in headers and library implementation files. This is a /good/ attitude.

Implementing fixed-size integer types did not need a change to the
language, just a single header (from the viewpoint of the user - the
implementation details don't really matter). That is the best way to do it.

C /could/ have made the fixed size types the fundamental types, and had
headers defining "int", and all the rest. That would have been a bit
more awkward at the time of C's design, but would probably be a good
idea for a language designed now. The end result is not much different,
however.

>
> I can only guess that a dependency on such headers has grown so much
> that it's not possible to get away from that model.
>
> Try compiling this complete module:
>
> char a;
> short b;
> int c;
> long d;
> long long e;
>
> on a compiler where int is 24 bits. Will it compile?

I don't have such a compiler on hand, and the one I briefly used long
ago was C90 (no "long long"). IIRC "long" was 48 bit, all the other
types were 24 bit.

>
> If so then all those headers are unnecessary.
>

That makes no sense.

>
>>> What would be advanced usage? But whatever it is, you want the basic
>>> use of simple types to be effortless.
>>>
>>
>> And in C, it is effortless.
>
> Yes, apart from a list of things you have to, be aware of or need to
> sort out, that doesn't apply in any other language.
>

That applies to /every/ language - you have to know the language.

> You still haven't explained 'advanced usage'.
>

Having fast general purpose integers portable across different systems?
There are plenty of processors for which "i16" would be a lot more
efficient than "i32" - and plenty where "i32" would be a lot more
efficient than "i16". In C, it's all just "int".

Supporting systems that don't have everything in neat power-of-two
sizes, or don't have 8-bit chars?

As I have said several times, in making a new, modern language suitable
for (amongst other tasks) systems programming, you'd probably have
started with fixed-size integer types. And you'd probably ignore the
processors that don't have 8-bit bytes, 32-bit registers, etc. - it
would keep things a little simpler. C did not have that option when it
was designed, and it has not done it any harm since.

Scott Lurndal

unread,
Aug 21, 2018, 9:06:53 AM8/21/18
to
I've gotten in the habit of always using them, having been bitten
once or twice by the example above. They also document the programmers
intention, which is never, in my mind, superfluous.

David Brown

unread,
Aug 21, 2018, 9:13:57 AM8/21/18
to
If you feel it makes the programmer's intention clearer, then that is an
excellent reason to use them. And of course if your code uses shifts
that needs types bigger than int, suffixes will be needed if you don't
want to re-arrange the code or use a cast.


Anton Shepelev

unread,
Aug 21, 2018, 4:19:45 PM8/21/18
to
David Kleinecke to Anton Shepelev:

> > What does the number after 'U' mean -- a power of two
> > times eight, i.e.:
> >
> > 8 * 2^3 = 64 ?
>
> U0 1 byte
> U1 2 bytes
> U2 4 bytes
> U3 8 bytes

Then it is as I suggested:

U0: 8 * 2^0 = 8 (bits), &c.

David Kleinecke

unread,
Aug 21, 2018, 5:18:17 PM8/21/18
to
On Tuesday, August 21, 2018 at 1:19:45 PM UTC-7, Anton Shepelev wrote:
> David Kleinecke to Anton Shepelev:
>
> > > What does the number after 'U' mean -- a power of two
> > > times eight, i.e.:
> > >
> > > 8 * 2^3 = 64 ?
> >
> > U0 1 byte
> > U1 2 bytes
> > U2 4 bytes
> > U3 8 bytes
>
> Then it is as I suggested:
>
> U0: 8 * 2^0 = 8 (bits), &c.

If you must have a formula.
Ux is 2^x bytes

Anton Shepelev

unread,
Aug 21, 2018, 5:27:30 PM8/21/18
to
David Brown:

> And if I were designing a language that actually replaced
> C, I would probably define the equivalent of "int" as
> "int_fast16_t" - define the fixed types first, and have
> the abstract types being implementation-dependent typedefs
> for the most appropriate concrete type.

Fixed-size types are abstract, because decoupled from the
underlying implementation and machine-level support and
embodying a mathematical concept of a number within certain
limits and with certain properties. One could, for example,
implement a 64-bit integer on a 32-bit machine.

Variable-sized types are concrete, because they correspond
to machine-level types and are not meant to embody any
mathematical concept independently of the CPU. They seem
useless unless one is so hard put to it by considerations of
efficiency that one is ready to sacrifice portability and
stability, i.e. the guarrantee that the behavior of a
program is determined by its code and does not depend on a
machine where it is compiled as long as it does not run into
a resource limit.

> But we are not designing a new language -- we are using an
> existing language -- C.

Would not it be possible, using macros, smoothly to pass to
a type system based on types with fixed size and properties,
e.g. to express int conditionally via i16, i32, and i64, &c?
Why do you think to C this way is closed?

People keep proposing modificatins to C, most of which
increase bloat through addition of new features or extension
of existing ones, but rarely does anybody suggest the
counterbalance of removal of features or hardening of
constraints -- things that help a language stay slim and not
explode like C++.

> And the types in standard C work fine.

I was shocked when I read that char can be either signed or
unsigned, and that obtaining a normal abstract fixed-with
type is in classic C a non-trivial task.

As you probably understand, I know very little of C, and
reading the standard from the middle made me dizzy. It must
take a tremendous effort to read from cover to cover. And
what about this crazy idea of having an open standard hidden
behind a paywall? It defeats the very purpose of an open
standard.

Bart

unread,
Aug 21, 2018, 5:39:17 PM8/21/18
to
The problem with this scheme is that can only represent the following
bit widths:

8, 16, 32, 64, ...

(Although that will do for most of the types likely to be of interest).

Using a byte count instead will allow:

8, 16, 24, 32, 40, 48, 56, 64, ...

While a bit count allows:

1, 2, 3, .. 8, 9, 10, 11, 12,..... 64, ...

So any odd width. Most of them are unlikely to ever be used, although I
have used U1, U2, U4 myself (1, 2 and 4 bits).

(With something so general, using dedicated identifiers for each
arbitrary width would be unwieldy, but that's another matter. Probably
you would use something like 'uint:27' instead of 'uint27'.)

--
bart

Bart

unread,
Aug 21, 2018, 5:48:59 PM8/21/18
to
On 21/08/2018 22:25, Anton Shepelev wrote:
> David Brown:

>> And the types in standard C work fine.
>
> I was shocked when I read that char can be either signed or
> unsigned,

In that case you'd better sit down before reading the next bit. Because
in some circumstances, a char type is considered neither signed nor
unsigned. Both of these are true:

char* is not compatible with signed char*
char* is not compatible with unsigned char*

Compile:

int main(void) {
char *p;
signed char* ps;
unsigned char* pu;

p=ps;
p=pu;
}

with:

gcc -Wpedantic -std=c11 -c c.c

It might say:

c.c: In function 'main':
c.c:13:2: warning: pointer targets in assignment differ in signedness
[-Wpointer-sign]
p=ps;
^
c.c:14:2: warning: pointer targets in assignment differ in signedness
[-Wpointer-sign]
p=pu;
^

--
bart

Anton Shepelev

unread,
Aug 21, 2018, 5:56:33 PM8/21/18
to
Bart:

> in some circumstances, a char type is considered neither
> signed nor unsigned. Both of these are true:
>
> char* is not compatible with signed char*
> char* is not compatible with unsigned char*
>
> Compile:
>
> int main(void) {
> char *p;
> signed char* ps;
> unsigned char* pu;
>
> p=ps;
> p=pu;
> }
>
> with:
>
> gcc -Wpedantic -std=c11 -c c.c
>
> It might say:
>
> c.c: In function 'main':
> c.c:13:2: warning: pointer targets in assignment differ in signedness
> [-Wpointer-sign]
> p=ps;
> ^
> c.c:14:2: warning: pointer targets in assignment differ in signedness
> [-Wpointer-sign]
> p=pu;
> ^

I am glad I was not standing. Is it a bug in gcc or
standard behavior?

Bart

unread,
Aug 21, 2018, 6:13:54 PM8/21/18
to
Well, several compilers can be made to report it, usually as a warning.
So I guess it must be standard (the experts here will probably confirm
that).

(My own compiler is non-conforming, and only one of those assignments
will be an error, as 'char' is a synonym for 'unsigned char'.)

--
bart

Ben Bacarisse

unread,
Aug 21, 2018, 6:29:46 PM8/21/18
to
Bart <b...@freeuk.com> writes:

> On 21/08/2018 22:25, Anton Shepelev wrote:
>> David Brown:
>
>>> And the types in standard C work fine.
>>
>> I was shocked when I read that char can be either signed or
>> unsigned,
>
> In that case you'd better sit down before reading the next
> bit. Because in some circumstances, a char type is considered neither
> signed nor unsigned. Both of these are true:
>
> char* is not compatible with signed char*
> char* is not compatible with unsigned char*

That's a bit muddled. There are /no/ circumstances in which char is
neither signed nor unsigned, and in /every/ circumstance char is
incompatible with both signed char and unsigned char. This is because
char is considered to be a different type from both signed char and
unsigned char.

Yes, those pointers are incompatible, but its far simpler and more
helpful to say that that the pointed-to types are incompatible. The
incompatibility of the pointer types follows from that.

It's important to distinguish between "char is a signed type" from "char
is the same type as signed char". The first is often true, the second
never is. And one of "char is a signed type" and "char is an unsigned
type" is always true.

--
Ben.

Tim Rentsch

unread,
Aug 21, 2018, 6:38:47 PM8/21/18
to
Bart is making mountains out of molehills. C has three character
types: char, unsigned char, and signed char. The type char must
have the same signed-ness and range of values as either unsigned
char or signed char; each implementation gets to choose which
one is the case, but it must make one of those two choices.

Despite char resembling one of the other two character types, the
three character types are all distinct types. This is somewhat
like the situation where an implementation has a 32-bit int and a
64-bit long long. Assuming no non-power-of-two sizes, the type
long must be the same size as int or the same size as long long
(and obviously all three types are signed). Despite the overlap,
the three types int, long, and long long are all distinct types.
So it is with char, unsigned char, and signed char. And because
the three character types are distinct, each of their pointer
types is not compatible with any of the others. That is just
like 'long *' not being compatible with 'int *', even if int
and long happen to have the same size.

james...@alumni.caltech.edu

unread,
Aug 21, 2018, 6:53:02 PM8/21/18
to
It is standard behavior:

"The three types char, signed char, and unsigned char are collectively
called the character types. The implementation shall define char to have
the same range, representation, and behavior as either signed char or
unsigned char.45)" (6.2.5p15). Note that it fails to say that char is
the same type as either of the other two. That clause ends with a
reference to footnote 45, which makes it clear that this ommission was
intentional: "char is a separate type from the other two and is not
compatible with either."

"Two types have compatible type if their types are the same. Additional
rules for determining whether two types are compatible are described
[elsewhere]" (6.2.7p1). None of those "Additional rules" servers to make
char compatible with either signed char or unsigned char, which is why
footnote 45 is correct about their incompatibility.

"For two pointer types to be compatible, both shall be identically
qualified and both shall be pointers to compatible types." (6.7.6.1p2)

6.5.16.1p1 gives the constraints that apply to simple assignment, saying
that "One of the following shall hold". None of the following options
covers a pointer to one type being assigned to a pointer to an
incompatible type, unless one of the two types is a pointer to void.

As far back as K&R C, the signedness of plain char was explicitly left
up to to the implementation to decide, because there were systems where
it made more sense for char to be signed, and other systems where it made
sense for it to be unsigned. The peculiar characteristics of that type
were the committee's solution to that problem, and backwards
compatibility issues prevent any significant change in that regard.

Keith Thompson

unread,
Aug 21, 2018, 6:54:31 PM8/21/18
to
Bart <b...@freeuk.com> writes:
> On 21/08/2018 22:25, Anton Shepelev wrote:
>> David Brown:
>
>>> And the types in standard C work fine.
>>
>> I was shocked when I read that char can be either signed or
>> unsigned,
>
> In that case you'd better sit down before reading the next bit. Because
> in some circumstances, a char type is considered neither signed nor
> unsigned. Both of these are true:
>
> char* is not compatible with signed char*
> char* is not compatible with unsigned char*

It's not "in some circumstances". Types char, signed char, and
unsigned char are three distinct types. Plain char has the same
size and representation as either signed char or unsigned char; the
choice is implementation-defined.

(Plain char is not one of the *signed integer types* or one of the
*unsigned integer types*. This is admittedly a little confusing, but
it's not much of a problem in practice.)

> Compile:
>
> int main(void) {
> char *p;
> signed char* ps;
> unsigned char* pu;
>
> p=ps;
> p=pu;
> }

Both assignments are constraint violations.

Assignments among the three character types themselves are ok;
they're distinct and incompatible types, but they're numeric so
any needed conversions are done implicitly. There are no implicit
conversions between incompatible pointer types other than void*.

If you want byte-oriented character data, use char. If you want
raw byte data, use unsigned char. If you want narrow integers,
use signed char.

Similarly, int and long are incompatible even if they happen to
have the same size and representation.

As usual, Bart is trying to make this sound as confusing as possible.

mark.b...@gmail.com

unread,
Aug 22, 2018, 3:28:05 AM8/22/18
to
On Friday, 17 August 2018 17:40:30 UTC+1, David Brown wrote:
>
> If Rosario used rot13 to hide his code or hints, as some other posters
> have done, would anyone be able to tell the difference?

I regard Rosario's coding "style" as functionally equivalent to rot13, except
that rot13 is used when someone is trying to communicate.

David Brown

unread,
Aug 22, 2018, 3:38:43 AM8/22/18
to
On 21/08/18 23:25, Anton Shepelev wrote:
> David Brown:
>
>> And if I were designing a language that actually replaced
>> C, I would probably define the equivalent of "int" as
>> "int_fast16_t" - define the fixed types first, and have
>> the abstract types being implementation-dependent typedefs
>> for the most appropriate concrete type.
>
> Fixed-size types are abstract, because decoupled from the
> underlying implementation and machine-level support and
> embodying a mathematical concept of a number within certain
> limits and with certain properties. One could, for example,
> implement a 64-bit integer on a 32-bit machine.
>

Abstract/concrete is not a binary feature. Fixed size types like
"int32_t" are /more/ concrete than variable types like "int" because
more of their implementation details are fixed - the size, range and
representation. They are still more abstract than an assembly-level
signed integer as they have different overflow characteristics from the
(usual) underlying hardware, and they may not match the register or cpu
size.

> Variable-sized types are concrete, because they correspond
> to machine-level types and are not meant to embody any
> mathematical concept independently of the CPU.

Incorrect.

"int" is supposed to be a type that is efficient to implement on a given
machine - it does not correspond directly or specifically to any
machine-level type or characteristic, its specifications are vaguer, and
it /does/ embody mathematical concepts that are independent (indeed,
directly contrary) to those of the cpu.

In particular, "int" will generally be 16-bit on 8-bit processors - such
cpus do not have direct support for 16-bit values, registers, loads,
stores, or arithmetic. They will generally be 32-bit on 64-bit
machines, even though they do not match the width of the registers or
ALU on the systems, and the cpu may not be able to do atomic loads or
stores of 32-bit objects.

And "int" embodies mathematical principles such as adding a positive
number to an int will always give you a larger int - compilers use such
rules for optimisations, even though the underlying hardware does not
guarantee it.


> They seem
> useless unless one is so hard put to it by considerations of
> efficiency that one is ready to sacrifice portability and
> stability, i.e. the guarrantee that the behavior of a
> program is determined by its code and does not depend on a
> machine where it is compiled as long as it does not run into
> a resource limit.

Some people find variable sizes - int, long, etc. - suit their coding
better than fixed sizes - int32_t, uint64_t, etc. Others have the
opposite view. That's fine - C gives you both, and you can choose.

Other languages give you the ability to create range types, which are
also somewhat abstract and which I find can be very handy. C doesn't
have these, but I can live without them.

>
>> But we are not designing a new language -- we are using an
>> existing language -- C.
>
> Would not it be possible, using macros, smoothly to pass to
> a type system based on types with fixed size and properties,
> e.g. to express int conditionally via i16, i32, and i64, &c?
> Why do you think to C this way is closed?

It would be entirely possible to do this - but the language of C is
slightly too limited for that. Given some fundamental fixed-size types
like those you could define "int", "short" and "long" in those terms
with typedefs or macros. But you could not define "unsigned int", "long
long int", etc. - C's language does not let you make types that require
multiple specifiers.

In a new language, you might avoid the issue entirely by having types
like "long_int" or "unsigned_short" (if you thought they were nice).

>
> People keep proposing modificatins to C, most of which
> increase bloat through addition of new features or extension
> of existing ones, but rarely does anybody suggest the
> counterbalance of removal of features or hardening of
> constraints -- things that help a language stay slim and not
> explode like C++.

I haven't been proposing modifications or extensions to C here - just
having a discussion about alternatives that could have existed. (There
certainly /are/ a few things I would like to add to C.)

And I have seen quite a number of suggestions for removing features or
hardening constraints in this group. It's not long since there was a
thread about removing the obsolete non-prototype function declarations
and function definitions.

>
>> And the types in standard C work fine.
>
> I was shocked when I read that char can be either signed or
> unsigned, and that obtaining a normal abstract fixed-with
> type is in classic C a non-trivial task.

As long as people realise that "char" is a type for holding a character,
and that anything numerical should use "signed char" or "unsigned char"
explicitly, there really is no problem.

Getting fixed-width types in standard C90 (is that what you mean by
"classic C" ?) is not only difficult, it can be impossible. If your
implementation supports the required types as char, short, int, and long
then you can do it with a bit of mess of conditional compilation using
<limits.h>. If it doesn't have the types, then you can't make them.

Prior to C99, the usual method of getting your fixed-size types was an
implementation-specific header. With C99, you used the standardised
<stdint.h> so that your compiler supplier delivered the
implementation-specific header and as a normal programmer, you no longer
had to make it yourself.

>
> As you probably understand, I know very little of C, and
> reading the standard from the middle made me dizzy. It must
> take a tremendous effort to read from cover to cover. And
> what about this crazy idea of having an open standard hidden
> behind a paywall? It defeats the very purpose of an open
> standard.
>

The C standard is not a tutorial or a good way to learn about C - it is
a reference document. It takes a /long/ time, and a special interest,
to be familiar with it. And there is a lot in it that could have been
explained in far easier language and with more logical organisation. If
you want a good reference website, I recommend
<https://en.cppreference.com/w/c>. The site covers both C and C++, and
is very accurate but a bit more accessible than the standards.

"Open standard" simply means that anyone can get the document, and
anyone can implement it. It doesn't mean you can get the document for
free. (Nor does it mean you get the "source" of the document and can
change it yourself!) But all the draft documents for C and C++
standards are available freely, and generally there are no more than
tiny layout changes between final drafts and published standards. So
for C, the document you want is "N1570" - the final draft of the C11
standard. It is what practically everyone uses, rather than buying the
official C11 standard document.


David Brown

unread,
Aug 22, 2018, 3:43:39 AM8/22/18
to
It is standard behaviour. Don't worry about Bart's dramatisation -
there are no bugs here, nor any particular difficulties or challenges.
Use "char" for characters, use "unsigned char" for "very small unsigned
integer" or raw memory byte, and "signed char" for "very small signed
integer".

If you know your code is only in use in systems that have 8-bit char,
you can also use "uint8_t" and "int8_t". That limits portability
somewhat, but in my line of work (small embedded programming) such small
number types are so very useful that this is appropriate.

It is loading more messages.
0 new messages