Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

Text editor project in C

148 views
Skip to first unread message

jacobnavia

unread,
Apr 22, 2021, 12:58:41 PM4/22/21
to
Recently, somebody started a thread about how C wasn't a simple
language. The excuse was a text editor project for a beginner.

Attached is the source code for the 10th edition of Unix of the "ed"
text editor, published by Brian Kernighan for his CS classes at
Princeton in 2001.

Maybe C is not a simple language but one of its incredible strengths is
its stability. This code is around 40 (yes FORTY) years old and you can
compile it without any trouble today and it will work in any UNIX system.

----------------------------------------------------------------cut here
/* This file contains the source for the 10th Edition Unix version of
ed, which is
1700 lines long. It dates from about 1989, but is typical of Unix
code from the
mid 1970's: terse, tight, efficient, and largely uncommented
This is a slightly modified version that compiles without any
warnings under gcc in 2021
See https://www.cs.princeton.edu/courses/archive/spring01/cs333/ed.c
Compile with gcc -O2 -o ed -Wall -Wno-parentheses ed.c
*/
#include <signal.h>
#include <stdlib.h>
#include <setjmp.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <wait.h>
#include <string.h>
#define BLKSIZE 4096 /* make BLKSIZE and LBSIZE 512 for smaller machines */
#define NBLK 2047
#define FNSIZE 128
#define LBSIZE 4096
#define ESIZE 256
#define GBSIZE 256
#define NBRA 5
#define KSIZE 9
#define CBRA 1
#define CCHR 2
#define CDOT 4
#define CCL 6
#define NCCL 8
#define CDOL 10
#define CEOF 11
#define CKET 12
#define CBACK 14
#define CCIRC 15
#define STAR 01
static unsigned char Q[] = "";
static unsigned char T[] = "TMP";
#define READ 0
#define WRITE 1
static int peekc;
static int lastc;
static unsigned char savedfile[FNSIZE];
static unsigned char file[FNSIZE];
static unsigned char linebuf[LBSIZE];
static unsigned char rhsbuf[LBSIZE/2];
static unsigned char expbuf[ESIZE+4];
static int given;
static unsigned int *addr1, *addr2;
static unsigned int *dot, *dol, *zero;
static unsigned char genbuf[LBSIZE];
static long count;
static unsigned char *nextip;
static unsigned char *linebp;
static int ninbuf;
static int io;
static int pflag;
static int vflag = 1;
static int oflag;
static int listf;
static int listn;
static int col;
static unsigned char *globp;
static int tfile = -1;
static int tline;
static char tfname[50];
static unsigned char *loc1;
static unsigned char *loc2;
static unsigned char ibuff[BLKSIZE];
static int iblock = -1;
static unsigned char obuff[BLKSIZE];
static int oblock = -1;
static int ichanged;
static int nleft;
static char WRERR[] = "WRITE ERROR";
static int names[26];
static int anymarks;
static unsigned char *braslist[NBRA];
static unsigned char *braelist[NBRA];
static int nbra;
static int subnewa;
static int subolda;
static int fchange;
static int wrapp;
static unsigned nlall = 128;
static char tmpXXXXX[50] = "/tmp/eXXXXXX";

static unsigned char *getblock(unsigned int atl, int iof);
static unsigned char *GetLine(unsigned int tl);
static unsigned char *place(unsigned char *sp,unsigned char *l1,unsigned
char *l2);
static void add(int i);
static int advance(unsigned char *lp, unsigned char *ep);
static int append(int (*f)(void), unsigned int *a);
static int backref(int i,unsigned char *lp);
static void blkio(int b, unsigned char *buf, int iof);
static void callunix(void);
static int cclass(unsigned char *set, int c, int af);
static void commands(void);
static void compile(int eof);
static int compsub(void);
static void dosub(void);
static void error(unsigned char *s);
static int execute(unsigned int *addr);
static void exfile(void);
static void filename(int comm);
static void gdelete(void);
static int getchr(void);
static int getcopy(void);
static int getfile(void);
static int getnum(void);
static int getsub(void);
static int gettty(void);
static int gety(void);
static void global(int k);
static void init(void);
static unsigned int *address(void);
static void join(void);
static void move(int cflag);
static void newline(void);
static void nonzero(void);
static void onhup(int n);
static void onintr(int n);
static void print(void);
static void putchr(int ac);
static void putd(void);
static void putfile(void);
static int putline(void);
static void quit(int n);
static void rdelete(unsigned int *ad1, unsigned int *ad2);
static void reverse(unsigned int *a1, unsigned int *a2);
static void setwide(void);
static void setnoaddr(void);
static void squeeze(int i);
static void substitute(int inglob);
static jmp_buf savej;
typedef void (*SIG_TYP)(int);
static SIG_TYP oldhup;
static SIG_TYP oldquit;
/* these two are not in ansi, but we need them */
#define SIGHUP 1 /* hangup */
#define SIGQUIT 3 /* quit (ASCII FS) */

int main(int argc, char *argv[])
{
unsigned char *p1, *p2;
SIG_TYP oldintr;

oldquit = signal(SIGQUIT, SIG_IGN);
oldhup = signal(SIGHUP, SIG_IGN);
oldintr = signal(SIGINT, SIG_IGN);
if (signal(SIGTERM, SIG_IGN) == SIG_DFL)
signal(SIGTERM, quit);
argv++;
while (argc > 1 && **argv=='-') {
switch((*argv)[1]) {

case '\0':
vflag = 0;
break;

case 'q':
signal(SIGQUIT, SIG_DFL);
vflag = 1;
break;

case 'o':
oflag = 1;
break;
}
argv++;
argc--;
}
if (oflag) {
p1 = (unsigned char *)"/dev/stdout";
p2 = savedfile;
while (*p2++ = *p1++)
;
}
if (argc>1) {
p1 = (unsigned char *)*argv;
p2 = savedfile;
while (*p2++ = *p1++)
if (p2 >= &savedfile[sizeof(savedfile)])
p2--;
globp = (unsigned char *)"r";
}
zero = (unsigned *)malloc(nlall*sizeof(unsigned));
strcpy(tfname,tmpXXXXX);
mkstemp(tfname);

init();
if (oldintr!=SIG_IGN)
signal(SIGINT, onintr);
if (oldhup!=SIG_IGN)
signal(SIGHUP, onhup);
setjmp(savej);
commands();
quit(0);
return 0;
}
static void commands(void)
{
unsigned int *a1;
int c;
int temp;
unsigned char lastsep;

for (;;) {
if (pflag) {
pflag = 0;
addr1 = addr2 = dot;
print();
}
c = '\n';
for (addr1 = 0;;) {
lastsep = c;
a1 = address();
c = getchr();
if (c!=',' && c!=';')
break;
if (lastsep==',')
error(Q);
if (a1==0) {
a1 = zero+1;
if (a1>dol)
a1--;
}
addr1 = a1;
if (c==';')
dot = a1;
}
if (lastsep!='\n' && a1==0)
a1 = dol;
if ((addr2=a1)==0) {
given = 0;
addr2 = dot;
}
else
given = 1;
if (addr1==0)
addr1 = addr2;
switch(c) {
case 'a':
add(0);
continue;
case 'c':
nonzero();
newline();
rdelete(addr1, addr2);
append(gettty, addr1-1);
continue;
case 'd':
nonzero();
newline();
rdelete(addr1, addr2);
continue;
case 'E':
fchange = 0;
c = 'e';
case 'e':
setnoaddr();
if (vflag && fchange) {
fchange = 0;
error(Q);
}
filename(c);
init();
addr2 = zero;
goto caseread;
case 'f':
setnoaddr();
filename(c);
puts((const char *)savedfile);
continue;
case 'g':
global(1);
continue;
case 'i':
add(-1);
continue;
case 'j':
if (!given)
addr2++;
newline();
join();
continue;
case 'k':
nonzero();
if ((c = getchr()) < 'a' || c > 'z')
error(Q);
newline();
names[c-'a'] = *addr2 & ~01;
anymarks |= 01;
continue;
case 'm':
move(0);
continue;
case 'n':
listn++;
newline();
print();
continue;
case '\n':
if (a1==0) {
a1 = dot+1;
addr2 = a1;
addr1 = a1;
}
if (lastsep==';')
addr1 = a1;
print();
continue;
case 'l':
listf++;
case 'p':
case 'P':
newline();
print();
continue;
case 'Q':
fchange = 0;
case 'q':
setnoaddr();
newline();
quit(0);
case 'r':
filename(c);
caseread:
if ((io = open((const char *)file, 0)) < 0) {
lastc = '\n';
error(file);
}
setwide();
squeeze(0);
ninbuf = 0;
c = zero != dol;
append(getfile, addr2);
exfile();
fchange = c;
continue;
case 's':
nonzero();
substitute(globp!=0);
continue;
case 't':
move(1);
continue;
case 'u':
nonzero();
newline();
if ((*addr2&~01) != subnewa)
error(Q);
*addr2 = subolda;
dot = addr2;
continue;
case 'v':
global(0);
continue;
case 'W':
wrapp++;
case 'w':
setwide();
squeeze(dol>zero);
if ((temp = getchr()) != 'q' && temp != 'Q') {
peekc = temp;
temp = 0;
}
filename(c);
if(!wrapp ||
((io = open((const char *)file,1)) == -1) ||
((lseek(io, 0L, 2)) == -1))
if ((io = creat((const char *)file, 0666)) < 0)
error(file);
wrapp = 0;
if (dol > zero)
putfile();
exfile();
if (addr1<=zero+1 && addr2==dol)
fchange = 0;
if (temp == 'Q')
fchange = 0;
if (temp)
quit(0);
continue;
case '=':
setwide();
squeeze(0);
newline();
count = addr2 - zero;
putd();
putchr('\n');
continue;
case '!':
callunix();
continue;
case EOF:
return;
}
error(Q);
}
}
static void print(void)
{
unsigned int *a1;

nonzero();
a1 = addr1;
do {
if (listn) {
count = a1-zero;
putd();
putchr('\t');
}
puts((const char *)GetLine(*a1++));
} while (a1 <= addr2);
dot = addr2;
listf = 0;
listn = 0;
pflag = 0;
}
static unsigned int * address(void)
{
int sign;
unsigned int *a, *b;
int opcnt, nextopand;
int c;

nextopand = -1;
sign = 1;
opcnt = 0;
a = dot;
do {
do c = getchr(); while (c==' ' || c=='\t');
if ('0'<=c && c<='9') {
peekc = c;
if (!opcnt) a = zero;
a += sign*getnum();
} else switch (c) {
case '$':
a = dol;
/* fall through */
case '.':
if (opcnt) error(Q);
break;
case '\'':
c = getchr();
if (opcnt || c<'a' || 'z'<c) error(Q);
a = zero;
do a++; while (a<=dol && names[c-'a']!=(*a&~01));
break;
case '?':
sign = -sign;
/* fall through */
case '/':
compile(c);
b = a;
for (;;) {
a += sign;
if (a<=zero) a = dol;
if (a>dol) a = zero;
if (execute(a)) break;
if (a==b) error(Q);
}
break;
default:
if (nextopand == opcnt) {
a += sign;
if (a<zero || dol<a)
continue; /* error(Q); */
}
if (c!='+' && c!='-' && c!='^') {
peekc = c;
if (opcnt==0) a = 0;
return (a);
}
sign = 1;
if (c!='+') sign = -sign;
nextopand = ++opcnt;
continue;
}
sign = 1;
opcnt++;
} while (zero<=a && a<=dol);
error(Q);
/*NOTREACHED*/
return 0;
}
static int getnum(void)
{
int r, c;

r = 0;
while ((c=getchr())>='0' && c<='9')
r = r*10 + c - '0';
peekc = c;
return (r);
}
static void setwide(void)
{
if (!given) {
addr1 = zero + (dol>zero);
addr2 = dol;
}
}
static void setnoaddr(void)
{
if (given) error(Q);
}
static void nonzero(void)
{
squeeze(1);
}
static void squeeze(int i)
{
if (addr1<zero+i || addr2>dol || addr1>addr2)
error(Q);
}
static void newline(void)
{
int c;

if ((c = getchr()) == '\n' || c == EOF) return;
if (c=='p' || c=='l' || c=='n') {
pflag++;
if (c=='l') listf++;
else if (c=='n') listn++;
if ((c=getchr())=='\n') return;
}
error(Q);
}
static void filename(int comm)
{
unsigned char *p1, *p2;
int c;

count = 0;
c = getchr();
if (c=='\n' || c==EOF) {
p1 = savedfile;
if (*p1==0 && comm!='f') error(Q);
p2 = file;
while (*p2++ = *p1++)
;
return;
}
if (c!=' ')
error(Q);
while ((c = getchr()) == ' ')
;
if (c=='\n') error(Q);
p1 = file;
do {
if (p1 >= &file[sizeof(file)-1] || c==' ' || c==EOF) error(Q);
*p1++ = c;
} while ((c = getchr()) != '\n');
*p1++ = 0;
if (savedfile[0]==0 || comm=='e' || comm=='f') {
p1 = savedfile;
p2 = file;
while (*p1++ = *p2++)
;
}
}
static void exfile(void)
{
close(io);
io = -1;
if (vflag) {
putd();
putchr('\n');
}
}
static void onintr(int n)
{
signal(SIGINT, onintr);
putchr('\n');
lastc = '\n';
error(Q);
}
static void onhup(int n)
{
signal(SIGINT, SIG_IGN);
signal(SIGHUP, SIG_IGN);
if (dol > zero) {
addr1 = zero+1;
addr2 = dol;
io = creat("ed.hup", 0600);
if (io > 0)
putfile();
}
fchange = 0;
quit(0);
}
static void error(unsigned char *s)
{
int c;

wrapp = 0;
listf = 0;
listn = 0;
putchr('?');
puts((const char *)s);
count = 0;
lseek(0, (long)0, 2);
pflag = 0;
if (globp)
lastc = '\n';
globp = 0;
peekc = lastc;
if(lastc)
while ((c = getchr()) != '\n' && c != EOF)
;
if (io > 0) {
close(io);
io = -1;
}
longjmp(savej, 1);
}
static int getchr(void)
{
char c;
if (lastc=peekc) {
peekc = 0;
return(lastc);
}
if (globp) {
if ((lastc = *globp++) != 0)
return(lastc);
globp = 0;
return(EOF);
}
if (read(0, &c, 1) <= 0)
return(lastc = EOF);
lastc = c&0177;
return(lastc);
}
static int gettty(void)
{
int rc;

if (rc = gety())
return(rc);
if (linebuf[0]=='.' && linebuf[1]==0)
return(EOF);
return(0);
}
static int gety(void)
{
int c;
unsigned char *gf;
unsigned char *p;

p = linebuf;
gf = globp;
while ((c = getchr()) != '\n') {
if (c==EOF) {
if (gf)
peekc = c;
return(c);
}
if ((c &= 0177) == 0)
continue;
*p++ = c;
if (p >= &linebuf[LBSIZE-2])
error(Q);
}

*p++ = 0;
return(0);
}
static int getfile(void)
{
int c;
unsigned char *lp, *fp;

lp = linebuf;
fp = nextip;
do {
if (--ninbuf < 0) {
if ((ninbuf = read(io, genbuf, LBSIZE)-1) < 0)
if (lp>linebuf) {
puts("'\\n' appended");
*genbuf = '\n';
}
else return(EOF);
fp = genbuf;
while(fp < &genbuf[ninbuf]) {
if (*fp++ & 0200)
break;
}
fp = genbuf;
}
c = *fp++;
if (c=='\0')
continue;
if (c&0200 || lp >= &linebuf[LBSIZE]) {
lastc = '\n';
error(Q);
}
*lp++ = c;
count++;
} while (c != '\n');
*--lp = 0;
nextip = fp;
return(0);
}
static void putfile(void)
{
unsigned int *a1;
int n;
unsigned char *fp, *lp;
int nib;

nib = BLKSIZE;
fp = genbuf;
a1 = addr1;
do {
lp = GetLine(*a1++);
for (;;) {
if (--nib < 0) {
n = fp-genbuf;
if(write(io, genbuf, n) != n) {
puts((const char *)WRERR);
error(Q);
}
nib = BLKSIZE-1;
fp = genbuf;
}
count++;
if ((*fp++ = *lp++) == 0) {
fp[-1] = '\n';
break;
}
}
} while (a1 <= addr2);
n = fp-genbuf;
if(write(io, genbuf, n) != n) {
puts((const char *)WRERR);
error(Q);
}
}
static int append(int (*f)(void), unsigned int *a)
{
unsigned int *a1, *a2, *rdot;
int nline, tl;

nline = 0;
dot = a;
while ((*f)() == 0) {
if ((dol-zero)+1 >= nlall) {
unsigned *ozero = zero;

nlall += 1024;
if ((zero = (unsigned *)realloc((unsigned char *)zero,
nlall*sizeof(unsigned)))==NULL) {
error((unsigned char *)"MEM?");
onhup(0);
}
dot += zero - ozero;
dol += zero - ozero;
}
tl = putline();
nline++;
a1 = ++dol;
a2 = a1+1;
rdot = ++dot;
while (a1 > rdot)
*--a2 = *--a1;
*rdot = tl;
}
return(nline);
}
static void add(int i)
{
if (i && (given || dol>zero)) {
addr1--;
addr2--;
}
squeeze(0);
newline();
append(gettty, addr2);
}
static void callunix(void)
{
SIG_TYP savint;
int pid, rpid;
int retcode;

setnoaddr();
if ((pid = fork()) == 0) {
signal(SIGHUP, oldhup);
signal(SIGQUIT, oldquit);
execl("/bin/sh", "sh", "-t", NULL);
exit(0100);
}
savint = signal(SIGINT, SIG_IGN);
while ((rpid = wait(&retcode)) != pid && rpid != -1)
;
signal(SIGINT, savint);
if (vflag) {
puts("!");
}
}
static void quit(int n)
{
if (vflag && fchange && dol!=zero) {
fchange = 0;
error(Q);
}
unlink(tfname);
exit(0);
}
static void rdelete(unsigned int *ad1, unsigned int *ad2)
{
unsigned int *a1, *a2, *a3;

a1 = ad1;
a2 = ad2+1;
a3 = dol;
dol -= a2 - a1;
do {
*a1++ = *a2++;
} while (a2 <= a3);
a1 = ad1;
if (a1 > dol)
a1 = dol;
dot = a1;
fchange = 1;
}
static void gdelete(void)
{
unsigned int *a1, *a2, *a3;

a3 = dol;
for (a1=zero; (*a1&01)==0; a1++)
if (a1>=a3)
return;
for (a2=a1+1; a2<=a3;) {
if (*a2&01) {
a2++;
dot = a1;
} else
*a1++ = *a2++;
}
dol = a1-1;
if (dot>dol)
dot = dol;
fchange = 1;
}
static unsigned char *GetLine(unsigned int tl)
{
unsigned char *bp, *lp;
int nl;

lp = linebuf;
bp = getblock(tl, READ);
nl = nleft;
tl &= ~((BLKSIZE/2)-1);
while (*lp++ = *bp++)
if (--nl == 0) {
bp = getblock(tl+=(BLKSIZE/2), READ);
nl = nleft;
}
return(linebuf);
}
static int putline(void)
{
unsigned char *bp, *lp;
int nl;
unsigned int tl;

fchange = 1;
lp = linebuf;
tl = tline;
bp = getblock(tl, WRITE);
nl = nleft;
tl &= ~((BLKSIZE/2)-1);
while (*bp = *lp++) {
if (*bp++ == '\n') {
*--bp = 0;
linebp = lp;
break;
}
if (--nl == 0) {
bp = getblock(tl+=(BLKSIZE/2), WRITE);
nl = nleft;
}
}
nl = tline;
tline += (((lp-linebuf)+03)>>1)&077776;
return(nl);
}
static unsigned char * getblock(unsigned int atl, int iof)
{
int bno, off;

bno = (atl/(BLKSIZE/2));
off = (atl<<1) & (BLKSIZE-1) & ~03;
if (bno >= NBLK) {
lastc = '\n';
error(T);
}
nleft = BLKSIZE - off;
if (bno==iblock) {
ichanged |= iof;
return(ibuff+off);
}
if (bno==oblock)
return(obuff+off);
if (iof==READ) {
if (ichanged)
blkio(iblock, ibuff, iof);
ichanged = 0;
iblock = bno;
blkio(bno, ibuff, iof);
return(ibuff+off);
}
if (oblock>=0)
blkio(oblock, obuff, iof);
oblock = bno;
return(obuff+off);
}
static void blkio(int b, unsigned char *buf, int iof)
{
lseek(tfile, (long)b*BLKSIZE, 0);
if (iof == READ) {
if (read(tfile, buf, BLKSIZE) != BLKSIZE) {
error(T);
}
}
else {
if (write(tfile, buf, BLKSIZE) != BLKSIZE) {
error(T);
}
}
}
static void init(void)
{
int *markp;

close(tfile);
tline = 2;
for (markp = names; markp < &names[26]; )
*markp++ = 0;
subnewa = 0;
anymarks = 0;
iblock = -1;
oblock = -1;
ichanged = 0;
close(creat(tfname, 0600));
tfile = open(tfname, 2);
dot = dol = zero;
}
static void global(int k)
{
unsigned char *gp;
int c;
unsigned int *a1;
unsigned char globuf[GBSIZE];

if (globp)
error(Q);
setwide();
squeeze(dol>zero);
if ((c=getchr())=='\n')
error(Q);
compile(c);
gp = globuf;
while ((c = getchr()) != '\n') {
if (c==EOF)
error(Q);
if (c=='\\') {
c = getchr();
if (c!='\n')
*gp++ = '\\';
}
*gp++ = c;
if (gp >= &globuf[GBSIZE-2])
error(Q);
}
if (gp == globuf)
*gp++ = 'p';
*gp++ = '\n';
*gp++ = 0;
for (a1=zero; a1<=dol; a1++) {
*a1 &= ~01;
if (a1>=addr1 && a1<=addr2 && execute(a1)==k)
*a1 |= 01;
}
/*
* Special case: g/.../d (avoid n^2 algorithm)
*/
if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
gdelete();
return;
}
for (a1=zero; a1<=dol; a1++) {
if (*a1 & 01) {
*a1 &= ~01;
dot = a1;
globp = globuf;
commands();
a1 = zero;
}
}
}
static void join(void)
{
unsigned char *gp, *lp;
unsigned int *a1;

nonzero();
gp = genbuf;
for (a1=addr1; a1<=addr2; a1++) {
lp = GetLine(*a1);
while (*gp = *lp++)
if (gp++ >= &genbuf[LBSIZE-2])
error(Q);
}
lp = linebuf;
gp = genbuf;
while (*lp++ = *gp++)
;
*addr1 = putline();
if (addr1<addr2)
rdelete(addr1+1, addr2);
dot = addr1;
}
static void substitute(int inglob)
{
int *mp, nl;
unsigned int *a1;
int gsubf;
int n;

n = getnum(); /* OK even if n==0 */
gsubf = compsub();
for (a1 = addr1; a1 <= addr2; a1++) {
if (execute(a1)){
unsigned *ozero;
int m = n;
do {
int span = loc2-loc1;
if (--m <= 0) {
dosub();
if (!gsubf)
break;
if (span==0) { /* null RE match */
if (*loc2=='\0')
break;
loc2++;
}
}
} while (execute((unsigned *)0));
if (m <= 0) {
inglob |= 01;
subnewa = putline();
*a1 &= ~01;
if (anymarks) {
for (mp = names; mp < &names[26]; mp++)
if (*mp == *a1)
*mp = subnewa;
}
subolda = *a1;
*a1 = subnewa;
ozero = zero;
nl = append(getsub, a1);
nl += zero-ozero;
a1 += nl;
addr2 += nl;
}
}
}
if (inglob==0)
error(Q);
}
static int compsub(void)
{
int seof, c;
unsigned char *p;

if ((seof = getchr()) == '\n' || seof == ' ')
error(Q);
compile(seof);
p = rhsbuf;
for (;;) {
c = getchr();
if (c=='\\')
c = getchr() | 0200;
if (c=='\n') {
if (globp && globp[0]) /* last '\n' does not count */
c |= 0200;
else {
peekc = c;
pflag++;
break;
}
}
if (c==seof)
break;
*p++ = c;
if (p >= &rhsbuf[LBSIZE/2])
error(Q);
}
*p++ = 0;
if ((peekc = getchr()) == 'g') {
peekc = 0;
newline();
return(1);
}
newline();
return(0);
}
static int getsub(void)
{
unsigned char *p1, *p2;

p1 = linebuf;
if ((p2 = linebp) == 0)
return(EOF);
while (*p1++ = *p2++)
;
linebp = 0;
return(0);
}
static void dosub(void)
{
unsigned char *lp, *sp, *rp;
int c;

lp = linebuf;
sp = genbuf;
rp = rhsbuf;
while (lp < loc1)
*sp++ = *lp++;
while (c = *rp++&0377) {
if (c=='&') {
sp = place(sp, loc1, loc2);
continue;
} else if (c&0200 && (c &= 0177) >='1' && c < nbra+'1') {
sp = place(sp, braslist[c-'1'], braelist[c-'1']);
continue;
}
*sp++ = c&0177;
if (sp >= &genbuf[LBSIZE])
error(Q);
}
lp = loc2;
loc2 = sp - genbuf + linebuf;
while (*sp++ = *lp++)
if (sp >= &genbuf[LBSIZE])
error(Q);
lp = linebuf;
sp = genbuf;
while (*lp++ = *sp++)
;
}
static unsigned char * place(unsigned char *sp, unsigned char
*l1,unsigned char *l2)
{

while (l1 < l2) {
*sp++ = *l1++;
if (sp >= &genbuf[LBSIZE])
error(Q);
}
return(sp);
}
static void move(int cflag)
{
unsigned int *adt, *ad1, *ad2;

nonzero();
if ((adt = address())==0) /* address() guarantees addr is in range */
error(Q);
newline();
if (cflag) {
unsigned int *ozero;
int delta;

ad1 = dol;
ozero = zero;
append(getcopy, ad1++);
ad2 = dol;
delta = zero - ozero;
ad1 += delta;
adt += delta;
} else {
ad2 = addr2;
for (ad1 = addr1; ad1 <= ad2;)
*ad1++ &= ~01;
ad1 = addr1;
}
ad2++;
if (adt<ad1) {
dot = adt + (ad2-ad1);
if ((++adt)==ad1)
return;
reverse(adt, ad1);
reverse(ad1, ad2);
reverse(adt, ad2);
} else if (adt >= ad2) {
dot = adt++;
reverse(ad1, ad2);
reverse(ad2, adt);
reverse(ad1, adt);
} else
error(Q);
fchange = 1;
}
static void reverse(unsigned int *a1, unsigned int *a2)
{
int t;

for (;;) {
t = *--a2;
if (a2 <= a1)
return;
*a2 = *a1;
*a1++ = t;
}
}
static int getcopy(void)
{
if (addr1 > addr2)
return(EOF);
GetLine(*addr1++);
return(0);
}
static void compile(int eof)
{
int c;
unsigned char *ep;
unsigned char *lastep;
unsigned char bracket[NBRA], *bracketp;
int cclcnt;

ep = expbuf;
bracketp = bracket;
if ((c = getchr()) == '\n') {
peekc = c;
c = eof;
}
if (c == eof) {
if (*ep==0)
error(Q);
return;
}
nbra = 0;
if (c=='^') {
c = getchr();
*ep++ = CCIRC;
}
peekc = c;
lastep = 0;
for (;;) {
if (ep >= &expbuf[ESIZE])
goto cerror;
c = getchr();
if (c == '\n') {
peekc = c;
c = eof;
}
if (c==eof) {
if (bracketp != bracket)
goto cerror;
*ep++ = CEOF;
return;
}
if (c!='*')
lastep = ep;
switch (c) {

case '\\':
if ((c = getchr())=='(') {
if (nbra >= NBRA)
goto cerror;
*bracketp++ = nbra;
*ep++ = CBRA;
*ep++ = nbra++;
continue;
}
if (c == ')') {
if (bracketp <= bracket)
goto cerror;
*ep++ = CKET;
*ep++ = *--bracketp;
continue;
}
if (c>='1' && c<'1'+NBRA) {
*ep++ = CBACK;
*ep++ = c-'1';
continue;
}
*ep++ = CCHR;
if (c=='\n')
goto cerror;
*ep++ = c;
continue;

case '.':
*ep++ = CDOT;
continue;

case '\n':
goto cerror;

case '*':
if (lastep==0 || *lastep==CBRA || *lastep==CKET)
goto defchar;
*lastep |= STAR;
continue;

case '$':
if ((peekc=getchr()) != eof && peekc!='\n')
goto defchar;
*ep++ = CDOL;
continue;

case '[':
*ep++ = CCL;
*ep++ = 0;
cclcnt = 1;
if ((c=getchr()) == '^') {
c = getchr();
ep[-2] = NCCL;
}
do {
if (c=='\n')
goto cerror;
if (c=='-' && ep[-1]!=0) {
if ((c=getchr())==']') {
*ep++ = '-';
cclcnt++;
break;
}
while (ep[-1]<c) {
*ep = ep[-1]+1;
ep++;
cclcnt++;
if (ep>=&expbuf[ESIZE])
goto cerror;
}
}
*ep++ = c;
cclcnt++;
if (ep >= &expbuf[ESIZE])
goto cerror;
} while ((c = getchr()) != ']');
lastep[1] = cclcnt;
continue;

defchar:
default:
*ep++ = CCHR;
*ep++ = c;
}
}
cerror:
expbuf[0] = 0;
nbra = 0;
error(Q);
}
static int execute(unsigned int *addr)
{
unsigned char *p1, *p2;
int c;

for (c=0; c<NBRA; c++) {
braslist[c] = 0;
braelist[c] = 0;
}
p2 = expbuf;
if (addr == (unsigned *)0) {
if (*p2==CCIRC)
return(0);
p1 = loc2;
} else if (addr==zero)
return(0);
else
p1 = GetLine(*addr);
if (*p2==CCIRC) {
loc1 = p1;
return(advance(p1, p2+1));
}
/* fast check for first character */
if (*p2==CCHR) {
c = p2[1];
do {
if (*p1!=c)
continue;
if (advance(p1, p2)) {
loc1 = p1;
return(1);
}
} while (*p1++);
return(0);
}
/* regular algorithm */
do {
if (advance(p1, p2)) {
loc1 = p1;
return(1);
}
} while (*p1++);
return(0);
}
static int advance(unsigned char *lp,unsigned char *ep)
{
unsigned char *curlp;
int i;

for (;;) switch (*ep++) {

case CCHR:
if (*ep++ == *lp++)
continue;
return(0);

case CDOT:
if (*lp++)
continue;
return(0);

case CDOL:
if (*lp==0)
continue;
return(0);

case CEOF:
loc2 = lp;
return(1);

case CCL:
if (cclass(ep, *lp++, 1)) {
ep += *ep;
continue;
}
return(0);

case NCCL:
if (cclass(ep, *lp++, 0)) {
ep += *ep;
continue;
}
return(0);

case CBRA:
braslist[*ep++] = lp;
continue;

case CKET:
braelist[*ep++] = lp;
continue;

case CBACK:
if (braelist[i = *ep++]==0)
error(Q);
if (backref(i, lp)) {
lp += braelist[i] - braslist[i];
continue;
}
return(0);

case CBACK|STAR:
if (braelist[i = *ep++] == 0)
error(Q);
curlp = lp;
while (backref(i, lp))
lp += braelist[i] - braslist[i];
while (lp >= curlp) {
if (advance(lp, ep))
return(1);
lp -= braelist[i] - braslist[i];
}
continue;

case CDOT|STAR:
curlp = lp;
while (*lp++)
;
goto star;

case CCHR|STAR:
curlp = lp;
while (*lp++ == *ep)
;
ep++;
goto star;

case CCL|STAR:
case NCCL|STAR:
curlp = lp;
while (cclass(ep, *lp++, ep[-1]==(CCL|STAR)))
;
ep += *ep;
goto star;

star:
do {
lp--;
if (advance(lp, ep))
return(1);
} while (lp > curlp);
return(0);

default:
error(Q);
}
}
static int backref(int i,unsigned char *lp)
{
unsigned char *bp;

bp = braslist[i];
while (*bp++ == *lp++)
if (bp >= braelist[i])
return(1);
return(0);
}
static int cclass(unsigned char *set, int c, int af)
{
int n;

if (c==0)
return(0);
n = *set++;
while (--n)
if (*set++ == c)
return(af);
return(!af);
}
static void putd(void)
{
int r;

r = count%10;
count /= 10;
if (count)
putd();
putchr(r + '0');
}
static unsigned char line[70];
static unsigned char *linp = line;
static void putchr(int ac)
{
unsigned char *lp;
int c;

lp = linp;
c = ac;
if (listf) {
if (c=='\n') {
if (linp!=line && linp[-1]==' ') {
*lp++ = '\\';
*lp++ = 'n';
}
} else {
if (col > (72-4-2)) {
col = 8;
*lp++ = '\\';
*lp++ = '\n';
*lp++ = '\t';
}
col++;
if (c=='\b' || c=='\t' || c=='\\') {
*lp++ = '\\';
if (c=='\b')
c = 'b';
else if (c=='\t')
c = 't';
col++;
} else if (c<' ' || c=='\177') {
*lp++ = '\\';
*lp++ = (c>>6) +'0';
*lp++ = ((c>>3)&07)+'0';
c = ( c &07)+'0';
col += 3;
}
}
}
*lp++ = c;
if(c == '\n' || lp >= &line[64]) {
linp = line;
write(oflag?2:1, line, lp-line);
return;
}
linp = lp;
}

Bonita Montero

unread,
Apr 22, 2021, 1:05:28 PM4/22/21
to

> Recently, somebody started a thread about how C wasn't a simple
> language. The excuse was a text editor project for a beginner.

It is simple, but as we can see from your code it's actually also
very ugly.

wij

unread,
Apr 22, 2021, 3:41:25 PM4/22/21
to
Never mind with such kind of people (this example is too complex for him).
When feeling is good, C++ is a super-set of C
When feeling is not so good, C++ is not C.

Juha Nieminen

unread,
Apr 23, 2021, 3:38:49 AM4/23/21
to
wij <wyn...@gmail.com> wrote:
> Never mind with such kind of people (this example is too complex for him).
> When feeling is good, C++ is a super-set of C
> When feeling is not so good, C++ is not C.

You had to quote 1600 lines of code in order to say that?

You do know that you can edit and shorten quotes, don't you?

Juha Nieminen

unread,
Apr 23, 2021, 3:55:26 AM4/23/21
to
In comp.lang.c++ jacobnavia <ja...@jacob.remcomp.fr> wrote:
> Attached is the source code for the 10th edition of Unix of the "ed"
> text editor, published by Brian Kernighan for his CS classes at
> Princeton in 2001.

Maybe it has significant historic value, but as a C program I don't think
it's that great.

It's quite hard to decipher (especially since it follows the typical
70's and 80's C style of using very short cryptic names everywhere),
but it appears to me that it has a fixed maximum line length of 4096.
I would guess you just can't edit lines longer than that.

(I also love how the program has one malloc() and no corresponding
free(), because why would it. It's not like it matters by the end
of the program...)

jacobnavia

unread,
Apr 23, 2021, 5:25:18 AM4/23/21
to
Le 23/04/2021 à 09:55, Juha Nieminen a écrit :
> (I also love how the program has one malloc() and no corresponding
> free(), because why would it. It's not like it matters by the end
> of the program...)

If you look a bit longer, maybe you find out why, but maybe not, depends
on how far you can look.

wij

unread,
Apr 23, 2021, 8:12:17 AM4/23/21
to
I do not really understand what 'quote' means here. So I reply
I can rewrite/translate ed.c as presented, using my own library easily and even
more powerfully and flexibly (file size would be larger and consume more resources).
My codes will not use 'stream stuff'(AT&T implicitly makes people believe it is the
'standard' everybody should follow), I use pure C-lib function calls, I follow the
standard C++ must follow as well.

If you rewrite ed.c (not yet a fully functional text editor) in pure C++,
what would be it look like? What would your evaluation be?

E.g. Let along signal and setjmp (and streaming I/O), I just pick one randomly.
I guess putchr('\n') might be translated to std::cout << std::endl. The point
here is std::endl. Could it be more elegantly and efficiently than
#define ENDL "\n" (C code)?
Maybe you would say you are talking memory allocation issues, but basically,
the main issue of your question is "No, C is not a simple language".

red floyd

unread,
Apr 23, 2021, 12:25:49 PM4/23/21
to
On 4/23/2021 5:12 AM, wij wrote:
> On Friday, 23 April 2021 at 15:38:49 UTC+8, Juha Nieminen wrote:
>> wij <wyn...@gmail.com> wrote:
>>> Never mind with such kind of people (this example is too complex for him).
>>> When feeling is good, C++ is a super-set of C
>>> When feeling is not so good, C++ is not C.
>> You had to quote 1600 lines of code in order to say that?
>>
>> You do know that you can edit and shorten quotes, don't you?
>
> I do not really understand what 'quote' means here.
[redacted]

What he means is, you didn't have to repost the entire C program to make
a four line comment on it. You could have edited it out, as I did here
with most of your comment

Otto J. Makela

unread,
Apr 23, 2021, 12:26:10 PM4/23/21
to
jacobnavia <ja...@jacob.remcomp.fr> wrote:

> Attached is the source code for the 10th edition of Unix of the "ed"
> text editor, published by Brian Kernighan for his CS classes at
> Princeton in 2001.

I like that it indeed does compile, but the fact that there are hardly
any comments, nor do the variable names really explain what is going on
hardly makes this a shining example of recommendable coding practices.

--
/* * * Otto J. Makela <o...@iki.fi> * * * * * * * * * */
/* Phone: +358 40 765 5772, ICBM: N 60 10' E 24 55' */
/* Mail: Mechelininkatu 26 B 27, FI-00100 Helsinki */
/* * * Computers Rule 01001111 01001011 * * * * * * */

Chris M. Thomasson

unread,
Apr 23, 2021, 3:45:43 PM4/23/21
to
for (;;)
{
struct node* = malloc(sizeof(*node));
}

Hey man, like, don't worry... The GC is there to save us! ;^o

Jorgen Grahn

unread,
Apr 23, 2021, 4:00:30 PM4/23/21
to
["Followup-To:" header set to comp.lang.c.]

On Fri, 2021-04-23, Juha Nieminen wrote:
> In comp.lang.c++ jacobnavia <ja...@jacob.remcomp.fr> wrote:
>> Attached is the source code for the 10th edition of Unix of the "ed"
>> text editor, published by Brian Kernighan for his CS classes at
>> Princeton in 2001.
>
> Maybe it has significant historic value, but as a C program I don't think
> it's that great.
>
> It's quite hard to decipher (especially since it follows the typical
> 70's and 80's C style of using very short cryptic names everywhere),

IMO, longer names without any other improvements, aren't an
improvement. The global 'int pflag' here, for example, would suck no
matter how you rename it. If it's changed to an enum, and wrapped and
documented in a struct EditorState[1], it doesn't need to be renamed.

(But I have a maths background, and like short names.)

/Jorgen

[1] I may misremember the code -- I only took a brief look.

--
// Jorgen Grahn <grahn@ Oo o. . .
\X/ snipabacken.se> O o .

jacobnavia

unread,
Apr 23, 2021, 4:29:20 PM4/23/21
to
The malloc allocates the working area of the editor. It is not freed
because it will be always needed; it is realloc'ed when needed, and
freed by the OS when the program terminates. This is abvious for anyone
reading C.

Chris M. Thomasson

unread,
Apr 23, 2021, 4:34:56 PM4/23/21
to
Oh wow. Don't tell me you are one of those people who think that free()
should not exist? I have dealt with them before.

Chris M. Thomasson

unread,
Apr 23, 2021, 4:37:15 PM4/23/21
to
Question from user: Why does my memory grow out of control?

Answer: Don't worry man.

wow. I have seen systems grind to a halt because of such things.

Chris M. Thomasson

unread,
Apr 23, 2021, 4:38:33 PM4/23/21
to
Have you ever had to work with non-paged memory before?

jacobnavia

unread,
Apr 23, 2021, 4:40:55 PM4/23/21
to
?????
Where did I said something like that?

Nowhere. You are just putting words in my mouth.

I explained to you why the code doesn't bother to free the memorysince
it is used for the whole time that the editor is running. Freeing it
just before exiting the program makes no sense.

Chris M. Thomasson

unread,
Apr 23, 2021, 4:52:00 PM4/23/21
to
On 4/23/2021 1:40 PM, jacobnavia wrote:
> Le 23/04/2021 à 22:34, Chris M. Thomasson a écrit :
>> On 4/23/2021 1:29 PM, jacobnavia wrote:
>>> Le 23/04/2021 à 21:45, Chris M. Thomasson a écrit :
>>>> On 4/23/2021 2:25 AM, jacobnavia wrote:
>>>>> Le 23/04/2021 à 09:55, Juha Nieminen a écrit :
>>>>>> (I also love how the program has one malloc() and no corresponding
>>>>>> free(), because why would it. It's not like it matters by the end
>>>>>> of the program...)
>>>>>
>>>>> If you look a bit longer, maybe you find out why, but maybe not,
>>>>> depends on how far you can look.
>>>>
>>>> for (;;)
>>>> {
>>>>    struct node* = malloc(sizeof(*node));
>>>> }
>>>>
>>>> Hey man, like, don't worry... The GC is there to save us! ;^o
>>>
>>> The malloc allocates the working area of the editor. It is not freed
>>> because it will be always needed; it is realloc'ed when needed, and
>>> freed by the OS when the program terminates. This is abvious for
>>> anyone reading C.
>>>
>>
>> Oh wow. Don't tell me you are one of those people who think that
>> free() should not exist? I have dealt with them before.
>
> ?????
> Where did I said something like that?
>
> Nowhere. You are just putting words in my mouth.

I was worrying that you might be one of those types.


>
> I explained to you why the code doesn't bother to free the memorysince
> it is used for the whole time that the editor is running. Freeing it
> just before exiting the program makes no sense.
>

Why? Did you create the system its running on?

Chris M. Thomasson

unread,
Apr 23, 2021, 4:55:37 PM4/23/21
to
I my world, every malloc shall have a corresponding free. Call me a
pendant if you want. Oh well.

James Kuyper

unread,
Apr 23, 2021, 4:59:23 PM4/23/21
to
On 4/23/21 8:12 AM, wij wrote:
> On Friday, 23 April 2021 at 15:38:49 UTC+8, Juha Nieminen wrote:
>> wij <wyn...@gmail.com> wrote:
>>> Never mind with such kind of people (this example is too complex for him).
>>> When feeling is good, C++ is a super-set of C
>>> When feeling is not so good, C++ is not C.
>> You had to quote 1600 lines of code in order to say that?
>>
>> You do know that you can edit and shorten quotes, don't you?
>
> I do not really understand what 'quote' means here.

When you send a message containing material not written by you (such as
part of the message that you're responding to) it's called a quotation,
or simply a quote.
The general rule you should follow on usenet when quoting material from
the message you're responding to is to quote as little as possible,
while providing other readers enough information so they can understand
the context of your response. The only thing you actually said about the
material you were responding to was that it was too complex. The most I
would have quoted from the previous message would have been the following:

On Friday, 23 April 2021 at 00:58:41 UTC+8, jacobnavia wrote:
> Recently, somebody started a thread about how C wasn't a simple
> language. The excuse was a text editor project for a beginner.
>
> Attached is the source code for the 10th edition of Unix of the "ed"
> text editor, published by Brian Kernighan for his CS classes at
> Princeton in 2001.
>
> Maybe C is not a simple language but one of its incredible strengths is
> its stability. This code is around 40 (yes FORTY) years old and you can
> compile it without any trouble today and it will work in any UNIX system.
>
> ----------------------------------------------------------------cut here
> /* This file contains the source for the 10th Edition Unix version of
> ed, which is
> 1700 lines long. It dates from about 1989, but is typical of Unix
> code from the
> mid 1970's: terse, tight, efficient, and largely uncommented
> This is a slightly modified version that compiles without any
> warnings under gcc in 2021

"1700 lines", and "terse, tight, efficient, and largely uncommented" is
all the context you really needed to establish.

jacobnavia

unread,
Apr 23, 2021, 5:02:20 PM4/23/21
to
Le 23/04/2021 à 22:51, Chris M. Thomasson a écrit :
> On 4/23/2021 1:40 PM, jacobnavia wrote:

>>
>> I explained to you why the code doesn't bother to free the memorysince
>> it is used for the whole time that the editor is running. Freeing it
>> just before exiting the program makes no sense.
>>
>
> Why? Did you create the system its running on?

The code is for the 10th edition of the UNIX system, and was around
probably since the start of UNIX.

Within the UNIX system, the OS frees all memory and ressources from a
program that terminates. No, I did not create UNIX, but maybe you should
study it a bit. It is a great system.

macpro: jacobnavia ~/ man ed

[snip]

HISTORY
An ed command appeared in Version 1 AT&T UNIX.

Chris M. Thomasson

unread,
Apr 23, 2021, 5:13:51 PM4/23/21
to
On 4/23/2021 2:02 PM, jacobnavia wrote:
> Le 23/04/2021 à 22:51, Chris M. Thomasson a écrit :
>> On 4/23/2021 1:40 PM, jacobnavia wrote:
>
>>>
>>> I explained to you why the code doesn't bother to free the
>>> memorysince it is used for the whole time that the editor is running.
>>> Freeing it just before exiting the program makes no sense.
>>>
>>
>> Why? Did you create the system its running on?
>
> The code is for the 10th edition of the UNIX system, and was around
> probably since the start of UNIX.
>
> Within the UNIX system, the OS frees all memory and ressources from a
> program that terminates. No, I did not create UNIX, but maybe you should
> study it a bit. It is a great system.

I know exactly what you are talking about. However, please clean up
after yourself? Pretty please? UNIX is not the only system out there.
This is a bad habit to get into. Clean up, gosh darn it.

Chris M. Thomasson

unread,
Apr 23, 2021, 5:15:40 PM4/23/21
to
On 4/23/2021 2:02 PM, jacobnavia wrote:
Please, don't be one of those people who never use fclose either!

https://pubs.opengroup.org/onlinepubs/009695399/functions/fclose.html

Scott Lurndal

unread,
Apr 23, 2021, 6:26:00 PM4/23/21
to
He's talking specifically about the ed.c code from Unix V10 that
was recently posted.

The later version (Unixware 2.12) uses sbrk (thus never frees).

Scott Lurndal

unread,
Apr 23, 2021, 6:27:23 PM4/23/21
to
"Chris M. Thomasson" <chris.m.t...@gmail.com> writes:
Ironically, the program in question was "run on non-paged memory",
i.e. a PDP-11.

Chris M. Thomasson

unread,
Apr 23, 2021, 6:32:10 PM4/23/21
to
Oh, for some reason I thought he was talking about the general case.
Sorry for the misunderstanding. Still, I have created custom allocators
on top on mmap and such. Also, I would always munmap it and close the
mapped file. ;^)

Chris M. Thomasson

unread,
Apr 23, 2021, 6:34:58 PM4/23/21
to
The problems I had with non-paged memory was back when I was creating
server software back in WinNT 4.0. Every IOCP operation that's inflight
would use non-paged memory. So, using too much would foobar the system.

Chris M. Thomasson

unread,
Apr 23, 2021, 6:39:28 PM4/23/21
to
On 4/23/2021 2:13 PM, Chris M. Thomasson wrote:
> On 4/23/2021 2:02 PM, jacobnavia wrote:
>> Le 23/04/2021 à 22:51, Chris M. Thomasson a écrit :
>>> On 4/23/2021 1:40 PM, jacobnavia wrote:
>>
>>>>
>>>> I explained to you why the code doesn't bother to free the
>>>> memorysince it is used for the whole time that the editor is
>>>> running. Freeing it just before exiting the program makes no sense.
>>>>
>>>
>>> Why? Did you create the system its running on?
>>
>> The code is for the 10th edition of the UNIX system, and was around
>> probably since the start of UNIX.
>>
>> Within the UNIX system, the OS frees all memory and ressources from a
>> program that terminates. No, I did not create UNIX, but maybe you
>> should study it a bit. It is a great system.
>
> I know exactly what you are talking about. However, please clean up
> after yourself? Pretty please? UNIX is not the only system out there.
> This is a bad habit to get into. Clean up, gosh darn it.

Ack! I missed the part where you were explicitly referencing the ed code
in question. Sorry! Ouch.

Bart

unread,
Apr 23, 2021, 8:20:25 PM4/23/21
to
On 23/04/2021 22:13, Chris M. Thomasson wrote:
> On 4/23/2021 2:02 PM, jacobnavia wrote:
>> Le 23/04/2021 à 22:51, Chris M. Thomasson a écrit :
>>> On 4/23/2021 1:40 PM, jacobnavia wrote:
>>
>>>>
>>>> I explained to you why the code doesn't bother to free the
>>>> memorysince it is used for the whole time that the editor is
>>>> running. Freeing it just before exiting the program makes no sense.
>>>>
>>>
>>> Why? Did you create the system its running on?
>>
>> The code is for the 10th edition of the UNIX system, and was around
>> probably since the start of UNIX.
>>
>> Within the UNIX system, the OS frees all memory and ressources from a
>> program that terminates. No, I did not create UNIX, but maybe you
>> should study it a bit. It is a great system.
>
> I know exactly what you are talking about. However, please clean up
> after yourself? Pretty please? UNIX is not the only system out there.
> This is a bad habit to get into. Clean up, gosh darn it.

Which OSes don't free the resources of an application that's just
terminated?

What happens if the app crashes before it manages to free them?

What about resources such as screen window handles, bitmaps, dynamic
libraries,.... ?

How about the memory that that the process itself takes up for its own
code, or its stack; how is an application supposed to free that?


Isn't this exacly what an OS is supposed to do?

Chris M. Thomasson

unread,
Apr 23, 2021, 8:43:24 PM4/23/21
to
On 4/23/2021 5:20 PM, Bart wrote:
> On 23/04/2021 22:13, Chris M. Thomasson wrote:
>> On 4/23/2021 2:02 PM, jacobnavia wrote:
>>> Le 23/04/2021 à 22:51, Chris M. Thomasson a écrit :
>>>> On 4/23/2021 1:40 PM, jacobnavia wrote:
>>>
>>>>>
>>>>> I explained to you why the code doesn't bother to free the
>>>>> memorysince it is used for the whole time that the editor is
>>>>> running. Freeing it just before exiting the program makes no sense.
>>>>>
>>>>
>>>> Why? Did you create the system its running on?
>>>
>>> The code is for the 10th edition of the UNIX system, and was around
>>> probably since the start of UNIX.
>>>
>>> Within the UNIX system, the OS frees all memory and ressources from a
>>> program that terminates. No, I did not create UNIX, but maybe you
>>> should study it a bit. It is a great system.
>>
>> I know exactly what you are talking about. However, please clean up
>> after yourself? Pretty please? UNIX is not the only system out there.
>> This is a bad habit to get into. Clean up, gosh darn it.
>
> Which OSes don't free the resources of an application that's just
> terminated?

I cannot remember if named win3d mutex/semaphore/event/ect are
automatically cleaned up... I think there could be an issue with SYSV
shared memory. Iirc, some device drivers. I need to do more research.

However, cleaning up after yourself is a good habit to get into. Afaict,
the OS cleans up to help prevent badly written code from bringing the
system down? A nit pick, but I can also think of leaking disk space wrt
a program creating a temporary file with a random name or something,
then forgetting to delete it. The OS is not going to automatically
delete the file. Forgetting to unlock a robust mutex can be interesting,
think of locking it as acquiring a resource, and unlocking as releasing
it. It can cause a robust mutex to go into an abandoned state. Try to
avoid that as much as possible. WAIT_ABANDONED or EOWNERDEAD should be
minimized.



>
> What happens if the app crashes before it manages to free them?
>
> What about resources such as screen window handles, bitmaps, dynamic
> libraries,.... ?
>
> How about the memory that that the process itself takes up for its own
> code, or its stack; how is an application supposed to free that?
>
>
> Isn't this exacly what an OS is supposed to do?

Depends on the system your are using. Well, its better to clean up after
yourself.

wij

unread,
Apr 24, 2021, 12:43:39 AM4/24/21
to
I like the style you did. But too formal in "Conversation".
When looking back, many of my replies are somewhat indirect. But I would
like to keep this way for a moment.
Almost all UTM programs (executables) can be analyzed or viewed as a decision
graph (note that physical objects and more abstract things are similar).
What the so called high level language does are actually about decision and
grouping of those left non-decision parts. The desired, net result is to reduce
or hide those 'unusual low level' decision makings and non-decision parts from
programmers (or theorists).
The real implements of these two parts depend on problem domain.

As to the issue that C++ codes are simpler and easier. In basics I agree, but
probably not much different from other popular high level languages in
application, except the method.

Approaching the more general problem from the real world of software engineer market,
while people saying C++ is the choice for high performance language, such as gaming,
banking, AI, VR,..., I saw the need for C++ engineer shrinks very fast these years.
Most companies are trying to reduce cost by shrinking existing C++ codes to minimal.
Fortran, Cobol are still active and high paid for 'high performance' need.

To be more on topic, to say C++ is better over C, support from hardware is a must.

Ian Collins

unread,
Apr 24, 2021, 1:06:00 AM4/24/21
to
On 24/04/2021 16:43, wij wrote:
>
> As to the issue that C++ codes are simpler and easier. In basics I agree, but
> probably not much different from other popular high level languages in
> application, except the method.
>
> Approaching the more general problem from the real world of software engineer market,
> while people saying C++ is the choice for high performance language, such as gaming,
> banking, AI, VR,..., I saw the need for C++ engineer shrinks very fast these years.
> Most companies are trying to reduce cost by shrinking existing C++ codes to minimal.
> Fortran, Cobol are still active and high paid for 'high performance' need.

We have the opposite problem, our ever expanding core product is C++
with no viable alternatives. With our borders basically shut, we are
also finding it hard to get staff...

> To be more on topic, to say C++ is better over C, support from hardware is a must.

They are pretty much equal except at the extreme low end, that is
targets not supported by gcc.

--
Ian.

wij

unread,
Apr 24, 2021, 3:50:10 AM4/24/21
to
On Saturday, 24 April 2021 at 13:06:00 UTC+8, Ian Collins wrote:
> On 24/04/2021 16:43, wij wrote:
> >
> > As to the issue that C++ codes are simpler and easier. In basics I agree, but
> > probably not much different from other popular high level languages in
> > application, except the method.
> >
> > Approaching the more general problem from the real world of software engineer market,
> > while people saying C++ is the choice for high performance language, such as gaming,
> > banking, AI, VR,..., I saw the need for C++ engineer shrinks very fast these years.
> > Most companies are trying to reduce cost by shrinking existing C++ codes to minimal.
> > Fortran, Cobol are still active and high paid for 'high performance' need.
> We have the opposite problem, our ever expanding core product is C++
> with no viable alternatives. With our borders basically shut, we are
> also finding it hard to get staff...

Make me think of QT,..., How it survive is interesting.

I understand why most of companies in my country abandon C++, because investment
on hardware is cheaper and efficient than on software.

daniel...@gmail.com

unread,
Apr 24, 2021, 11:51:07 AM4/24/21
to
On Saturday, April 24, 2021 at 3:50:10 AM UTC-4, wyn...@gmail.com wrote:
> Make me think of QT,..., How it survive is interesting.
>
> I understand why most of companies in my country abandon C++, because investment
> on hardware is cheaper and efficient than on software.

In my field, capital markets, in the places I worked, C and C++ were dominant
in the mid to early nineties, in all of IT data processing, middleware and messaging
vendors, internal trading systems, and vendor trading systems. That started to change in
the late nineties with the advent of Java. It happened first in IT, where Java quickly
replaced C and C++ for internal data processing. Then the middleware and messaging
vendors such as IBM and TIBCO dropped their legacy C products and replaced them
with new products written in Java. This happened simultaneously with the requirement to
support XML and related technologies, networking, multi-threading and Unicode, all
of which were well supported in Java, and less so in C++. Then vendor derivative
trading systems written in Java such as Calypso began to replace the older systems
written in C++ such as Infinity. C++ survived in quant groups and vetting groups
into the 2000's, but began to feel competition from C# into the late 2000's. Then,
new vendors risk systems that involved many millions of portfolio valuations
appeared in C#, replacing a previous generation written in C and C++.

There were of course a lot of reasons for that, as I'm sure everybody knows. Some of
it had to do with working with data, third party C++ library API's were abysmal for
working with XML and other data forms, still are, and that won't change until C++ has
meta programming, which likely won't be until 2026. It was vastly easier for vendors to
distribute user extendable software in Java or C# than in C++. C++ lacked and still does
basic types such as big decimal and date. Java and C# were more productive to
work in than C++. Massively parallel execution could substitute to some extent
for fast execution in a single process. There were many reasons.

There were some gains for C++ in the last decade, particularly in high frequency
trading, where performance is paramount. But it became niche.

Daniel

Scott Lurndal

unread,
Apr 24, 2021, 12:33:37 PM4/24/21
to
Cutler "borrowed" most of the NT I/O and Memory subsystems from
VMS (the VAX operating system). Which had the same issues with
the non-paged pool.

wij

unread,
Apr 24, 2021, 1:41:52 PM4/24/21
to
Why bother (from the view point of a company)?
Just wait for another couple years, every thing will automatically be faster and cheaper,
while programmer is a very unpredictable factor, and might demand more pay.

I used to think neural network is a dead end, but after BP algorithm is found
every changes and changed fast. I just hope the same thing happen to C++.

wij

unread,
Apr 24, 2021, 1:56:09 PM4/24/21
to
On Friday, 23 April 2021 at 00:58:41 UTC+8, jacobnavia wrote:
> Recently, somebody started a thread about how C wasn't a simple
> language. The excuse was a text editor project for a beginner.
>
> Attached is the source code for the 10th edition of Unix of the "ed"
> text editor, published by Brian Kernighan for his CS classes at
> Princeton in 2001.
>
> Maybe C is not a simple language but one of its incredible strengths is
> its stability. This code is around 40 (yes FORTY) years old and you can
> compile it without any trouble today and it will work in any UNIX system.
>
> ----------------------------------------------------------------cut here
> /* This file contains the source for the 10th Edition Unix version of
> ed, which is
> 1700 lines long. It dates from about 1989, but is typical of Unix
> code from the
> mid 1970's: terse, tight, efficient, and largely uncommented
> This is a slightly modified version that compiles without any
> warnings under gcc in 2021
> See https://www.cs.princeton.edu/courses/archive/spring01/cs333/ed.c
> Compile with gcc -O2 -o ed -Wall -Wno-parentheses ed.c
> */
> #include <signal.h>
> #include <stdlib.h>
> #include <setjmp.h>
> #include <stdio.h>
> #include <unistd.h>
> #include <fcntl.h>
> #include <wait.h>
> #include <string.h>
> #define BLKSIZE 4096 /* make BLKSIZE and LBSIZE 512 for smaller machines */
> #define NBLK 2047
> #define FNSIZE 128
> #define LBSIZE 4096
> #define ESIZE 256
> #define GBSIZE 256
> #define NBRA 5
> #define KSIZE 9
> #define CBRA 1
> #define CCHR 2
> #define CDOT 4
> #define CCL 6
> #define NCCL 8
> #define CDOL 10
> #define CEOF 11
> #define CKET 12
> #define CBACK 14
> #define CCIRC 15
> #define STAR 01
> static unsigned char Q[] = "";
> static unsigned char T[] = "TMP";
> #define READ 0
> #define WRITE 1
> static int peekc;
> static int lastc;
> static unsigned char savedfile[FNSIZE];
> static unsigned char file[FNSIZE];
> static unsigned char linebuf[LBSIZE];
> static unsigned char rhsbuf[LBSIZE/2];
> static unsigned char expbuf[ESIZE+4];
> static int given;
> static unsigned int *addr1, *addr2;
> static unsigned int *dot, *dol, *zero;
> static unsigned char genbuf[LBSIZE];
> static long count;
> static unsigned char *nextip;
> static unsigned char *linebp;
> static int ninbuf;
> static int io;
> static int pflag;
> static int vflag = 1;
> static int oflag;
> static int listf;
> static int listn;
> static int col;
> static unsigned char *globp;
> static int tfile = -1;
> static int tline;
> static char tfname[50];
> static unsigned char *loc1;
> static unsigned char *loc2;
> static unsigned char ibuff[BLKSIZE];
> static int iblock = -1;
> static unsigned char obuff[BLKSIZE];
> static int oblock = -1;
> static int ichanged;
> static int nleft;
> static char WRERR[] = "WRITE ERROR";
> static int names[26];
> static int anymarks;
> static unsigned char *braslist[NBRA];
> static unsigned char *braelist[NBRA];
> static int nbra;
> static int subnewa;
> static int subolda;
> static int fchange;
> static int wrapp;
> static unsigned nlall = 128;
> static char tmpXXXXX[50] = "/tmp/eXXXXXX";
>
> static unsigned char *getblock(unsigned int atl, int iof);
> static unsigned char *GetLine(unsigned int tl);
> static unsigned char *place(unsigned char *sp,unsigned char *l1,unsigned
> char *l2);
> static void add(int i);
> static int advance(unsigned char *lp, unsigned char *ep);
> static int append(int (*f)(void), unsigned int *a);
> static int backref(int i,unsigned char *lp);
> static void blkio(int b, unsigned char *buf, int iof);
> static void callunix(void);
> static int cclass(unsigned char *set, int c, int af);
> static void commands(void);
> static void compile(int eof);
> static int compsub(void);
> static void dosub(void);
> static void error(unsigned char *s);
> static int execute(unsigned int *addr);
> static void exfile(void);
> static void filename(int comm);
> static void gdelete(void);
> static int getchr(void);
> static int getcopy(void);
> static int getfile(void);
> static int getnum(void);
> static int getsub(void);
> static int gettty(void);
> static int gety(void);
> static void global(int k);
> static void init(void);
> static unsigned int *address(void);
> static void join(void);
> static void move(int cflag);
> static void newline(void);
> static void nonzero(void);
> static void onhup(int n);
> static void onintr(int n);
> static void print(void);
> static void putchr(int ac);
> static void putd(void);
> static void putfile(void);
> static int putline(void);
> static void quit(int n);
> static void rdelete(unsigned int *ad1, unsigned int *ad2);
> static void reverse(unsigned int *a1, unsigned int *a2);
> static void setwide(void);
> static void setnoaddr(void);
> static void squeeze(int i);
> static void substitute(int inglob);
> static jmp_buf savej;
> typedef void (*SIG_TYP)(int);
> static SIG_TYP oldhup;
> static SIG_TYP oldquit;
> /* these two are not in ansi, but we need them */
> #define SIGHUP 1 /* hangup */
> #define SIGQUIT 3 /* quit (ASCII FS) */
>
> int main(int argc, char *argv[])
> {
> unsigned char *p1, *p2;
> SIG_TYP oldintr;
>
> oldquit = signal(SIGQUIT, SIG_IGN);
> oldhup = signal(SIGHUP, SIG_IGN);
> oldintr = signal(SIGINT, SIG_IGN);
> if (signal(SIGTERM, SIG_IGN) == SIG_DFL)
> signal(SIGTERM, quit);
> argv++;
> while (argc > 1 && **argv=='-') {
> switch((*argv)[1]) {
>
> case '\0':
> vflag = 0;
> break;
>
> case 'q':
> signal(SIGQUIT, SIG_DFL);
> vflag = 1;
> break;
>
> case 'o':
> oflag = 1;
> break;
> }
> argv++;
> argc--;
> }
> if (oflag) {
> p1 = (unsigned char *)"/dev/stdout";
> p2 = savedfile;
> while (*p2++ = *p1++)
> ;
> }
> if (argc>1) {
> p1 = (unsigned char *)*argv;
> p2 = savedfile;
> while (*p2++ = *p1++)
> if (p2 >= &savedfile[sizeof(savedfile)])
> p2--;
> globp = (unsigned char *)"r";
> }
> zero = (unsigned *)malloc(nlall*sizeof(unsigned));
> strcpy(tfname,tmpXXXXX);
> mkstemp(tfname);
>
> init();
> if (oldintr!=SIG_IGN)
> signal(SIGINT, onintr);
> if (oldhup!=SIG_IGN)
> signal(SIGHUP, onhup);
> setjmp(savej);
> commands();
> quit(0);
> return 0;
> }
> static void commands(void)
> {
> unsigned int *a1;
> int c;
> int temp;
> unsigned char lastsep;
>
> for (;;) {
> if (pflag) {
> pflag = 0;
> addr1 = addr2 = dot;
> print();
> }
> c = '\n';
> for (addr1 = 0;;) {
> lastsep = c;
> a1 = address();
> c = getchr();
> if (c!=',' && c!=';')
> break;
> if (lastsep==',')
> error(Q);
> if (a1==0) {
> a1 = zero+1;
> if (a1>dol)
> a1--;
> }
> addr1 = a1;
> if (c==';')
> dot = a1;
> }
> if (lastsep!='\n' && a1==0)
> a1 = dol;
> if ((addr2=a1)==0) {
> given = 0;
> addr2 = dot;
> }
> else
> given = 1;
> if (addr1==0)
> addr1 = addr2;
> switch(c) {
> case 'a':
> add(0);
> continue;
> case 'c':
> nonzero();
> newline();
> rdelete(addr1, addr2);
> append(gettty, addr1-1);
> continue;
> case 'd':
> nonzero();
> newline();
> rdelete(addr1, addr2);
> continue;
> case 'E':
> fchange = 0;
> c = 'e';
> case 'e':
> setnoaddr();
> if (vflag && fchange) {
> fchange = 0;
> error(Q);
> }
> filename(c);
> init();
> addr2 = zero;
> goto caseread;
> case 'f':
> setnoaddr();
> filename(c);
> puts((const char *)savedfile);
> continue;
> case 'g':
> global(1);
> continue;
> case 'i':
> add(-1);
> continue;
> case 'j':
> if (!given)
> addr2++;
> newline();
> join();
> continue;
> case 'k':
> nonzero();
> if ((c = getchr()) < 'a' || c > 'z')
> error(Q);
> newline();
> names[c-'a'] = *addr2 & ~01;
> anymarks |= 01;
> continue;
> case 'm':
> move(0);
> continue;
> case 'n':
> listn++;
> newline();
> print();
> continue;
> case '\n':
> if (a1==0) {
> a1 = dot+1;
> addr2 = a1;
> addr1 = a1;
> }
> if (lastsep==';')
> addr1 = a1;
> print();
> continue;
> case 'l':
> listf++;
> case 'p':
> case 'P':
> newline();
> print();
> continue;
> case 'Q':
> fchange = 0;
> case 'q':
> setnoaddr();
> newline();
> quit(0);
> case 'r':
> filename(c);
> caseread:
> if ((io = open((const char *)file, 0)) < 0) {
> lastc = '\n';
> error(file);
> }
> setwide();
> squeeze(0);
> ninbuf = 0;
> c = zero != dol;
> append(getfile, addr2);
> exfile();
> fchange = c;
> continue;
> case 's':
> nonzero();
> substitute(globp!=0);
> continue;
> case 't':
> move(1);
> continue;
> case 'u':
> nonzero();
> newline();
> if ((*addr2&~01) != subnewa)
> error(Q);
> *addr2 = subolda;
> dot = addr2;
> continue;
> case 'v':
> global(0);
> continue;
> case 'W':
> wrapp++;
> case 'w':
> setwide();
> squeeze(dol>zero);
> if ((temp = getchr()) != 'q' && temp != 'Q') {
> peekc = temp;
> temp = 0;
> }
> filename(c);
> if(!wrapp ||
> ((io = open((const char *)file,1)) == -1) ||
> ((lseek(io, 0L, 2)) == -1))
> if ((io = creat((const char *)file, 0666)) < 0)
> error(file);
> wrapp = 0;
> if (dol > zero)
> putfile();
> exfile();
> if (addr1<=zero+1 && addr2==dol)
> fchange = 0;
> if (temp == 'Q')
> fchange = 0;
> if (temp)
> quit(0);
> continue;
> case '=':
> setwide();
> squeeze(0);
> newline();
> count = addr2 - zero;
> putd();
> putchr('\n');
> continue;
> case '!':
> callunix();
> continue;
> case EOF:
> return;
> }
> error(Q);
> }
> }
> static void print(void)
> {
> unsigned int *a1;
>
> nonzero();
> a1 = addr1;
> do {
> if (listn) {
> count = a1-zero;
> putd();
> putchr('\t');
> }
> puts((const char *)GetLine(*a1++));
> } while (a1 <= addr2);
> dot = addr2;
> listf = 0;
> listn = 0;
> pflag = 0;
> }
> static unsigned int * address(void)
> {
> int sign;
> unsigned int *a, *b;
> int opcnt, nextopand;
> int c;
>
> nextopand = -1;
> sign = 1;
> opcnt = 0;
> a = dot;
> do {
> do c = getchr(); while (c==' ' || c=='\t');
> if ('0'<=c && c<='9') {
> peekc = c;
> if (!opcnt) a = zero;
> a += sign*getnum();
> } else switch (c) {
> case '$':
> a = dol;
> /* fall through */
> case '.':
> if (opcnt) error(Q);
> break;
> case '\'':
> c = getchr();
> if (opcnt || c<'a' || 'z'<c) error(Q);
> a = zero;
> do a++; while (a<=dol && names[c-'a']!=(*a&~01));
> break;
> case '?':
> sign = -sign;
> /* fall through */
> case '/':
> compile(c);
> b = a;
> for (;;) {
> a += sign;
> if (a<=zero) a = dol;
> if (a>dol) a = zero;
> if (execute(a)) break;
> if (a==b) error(Q);
> }
> break;
> default:
> if (nextopand == opcnt) {
> a += sign;
> if (a<zero || dol<a)
> continue; /* error(Q); */
> }
> if (c!='+' && c!='-' && c!='^') {
> peekc = c;
> if (opcnt==0) a = 0;
> return (a);
> }
> sign = 1;
> if (c!='+') sign = -sign;
> nextopand = ++opcnt;
> continue;
> }
> sign = 1;
> opcnt++;
> } while (zero<=a && a<=dol);
> error(Q);
> /*NOTREACHED*/
> return 0;
> }
> static int getnum(void)
> {
> int r, c;
>
> r = 0;
> while ((c=getchr())>='0' && c<='9')
> r = r*10 + c - '0';
> peekc = c;
> return (r);
> }
> static void setwide(void)
> {
> if (!given) {
> addr1 = zero + (dol>zero);
> addr2 = dol;
> }
> }
> static void setnoaddr(void)
> {
> if (given) error(Q);
> }
> static void nonzero(void)
> {
> squeeze(1);
> }
> static void squeeze(int i)
> {
> if (addr1<zero+i || addr2>dol || addr1>addr2)
> error(Q);
> }
> static void newline(void)
> {
> int c;
>
> if ((c = getchr()) == '\n' || c == EOF) return;
> if (c=='p' || c=='l' || c=='n') {
> pflag++;
> if (c=='l') listf++;
> else if (c=='n') listn++;
> if ((c=getchr())=='\n') return;
> }
> error(Q);
> }
> static void filename(int comm)
> {
> unsigned char *p1, *p2;
> int c;
>
> count = 0;
> c = getchr();
> if (c=='\n' || c==EOF) {
> p1 = savedfile;
> if (*p1==0 && comm!='f') error(Q);
> p2 = file;
> while (*p2++ = *p1++)
> ;
> return;
> }
> if (c!=' ')
> error(Q);
> while ((c = getchr()) == ' ')
> ;
> if (c=='\n') error(Q);
> p1 = file;
> do {
> if (p1 >= &file[sizeof(file)-1] || c==' ' || c==EOF) error(Q);
> *p1++ = c;
> } while ((c = getchr()) != '\n');
> *p1++ = 0;
> if (savedfile[0]==0 || comm=='e' || comm=='f') {
> p1 = savedfile;
> p2 = file;
> while (*p1++ = *p2++)
> ;
> }
> }
> static void exfile(void)
> {
> close(io);
> io = -1;
> if (vflag) {
> putd();
> putchr('\n');
> }
> }
> static void onintr(int n)
> {
> signal(SIGINT, onintr);
> putchr('\n');
> lastc = '\n';
> error(Q);
> }
> static void onhup(int n)
> {
> signal(SIGINT, SIG_IGN);
> signal(SIGHUP, SIG_IGN);
> if (dol > zero) {
> addr1 = zero+1;
> addr2 = dol;
> io = creat("ed.hup", 0600);
> if (io > 0)
> putfile();
> }
> fchange = 0;
> quit(0);
> }
> static void error(unsigned char *s)
> {
> int c;
>
> wrapp = 0;
> listf = 0;
> listn = 0;
> putchr('?');
> puts((const char *)s);
> count = 0;
> lseek(0, (long)0, 2);
> pflag = 0;
> if (globp)
> lastc = '\n';
> globp = 0;
> peekc = lastc;
> if(lastc)
> while ((c = getchr()) != '\n' && c != EOF)
> ;
> if (io > 0) {
> close(io);
> io = -1;
> }
> longjmp(savej, 1);
> }
> static int getchr(void)
> {
> char c;
> if (lastc=peekc) {
> peekc = 0;
> return(lastc);
> }
> if (globp) {
> if ((lastc = *globp++) != 0)
> return(lastc);
> globp = 0;
> return(EOF);
> }
> if (read(0, &c, 1) <= 0)
> return(lastc = EOF);
> lastc = c&0177;
> return(lastc);
> }
> static int gettty(void)
> {
> int rc;
>
> if (rc = gety())
> return(rc);
> if (linebuf[0]=='.' && linebuf[1]==0)
> return(EOF);
> return(0);
> }
> static int gety(void)
> {
> int c;
> unsigned char *gf;
> unsigned char *p;
>
> p = linebuf;
> gf = globp;
> while ((c = getchr()) != '\n') {
> if (c==EOF) {
> if (gf)
> peekc = c;
> return(c);
> }
> if ((c &= 0177) == 0)
> continue;
> *p++ = c;
> if (p >= &linebuf[LBSIZE-2])
> error(Q);
> }
>
> *p++ = 0;
> return(0);
> }
> static int getfile(void)
> {
> int c;
> unsigned char *lp, *fp;
>
> lp = linebuf;
> fp = nextip;
> do {
> if (--ninbuf < 0) {
> if ((ninbuf = read(io, genbuf, LBSIZE)-1) < 0)
> if (lp>linebuf) {
> puts("'\\n' appended");
> *genbuf = '\n';
> }
> else return(EOF);
> fp = genbuf;
> while(fp < &genbuf[ninbuf]) {
> if (*fp++ & 0200)
> break;
> }
> fp = genbuf;
> }
> c = *fp++;
> if (c=='\0')
> continue;
> if (c&0200 || lp >= &linebuf[LBSIZE]) {
> lastc = '\n';
> error(Q);
> }
> *lp++ = c;
> count++;
> } while (c != '\n');
> *--lp = 0;
> nextip = fp;
> return(0);
> }
> static void putfile(void)
> {
> unsigned int *a1;
> int n;
> unsigned char *fp, *lp;
> int nib;
>
> nib = BLKSIZE;
> fp = genbuf;
> a1 = addr1;
> do {
> lp = GetLine(*a1++);
> for (;;) {
> if (--nib < 0) {
> n = fp-genbuf;
> if(write(io, genbuf, n) != n) {
> puts((const char *)WRERR);
> error(Q);
> }
> nib = BLKSIZE-1;
> fp = genbuf;
> }
> count++;
> if ((*fp++ = *lp++) == 0) {
> fp[-1] = '\n';
> break;
> }
> }
> } while (a1 <= addr2);
> n = fp-genbuf;
> if(write(io, genbuf, n) != n) {
> puts((const char *)WRERR);
> error(Q);
> }
> }
> static int append(int (*f)(void), unsigned int *a)
> {
> unsigned int *a1, *a2, *rdot;
> int nline, tl;
>
> nline = 0;
> dot = a;
> while ((*f)() == 0) {
> if ((dol-zero)+1 >= nlall) {
> unsigned *ozero = zero;
>
> nlall += 1024;
> if ((zero = (unsigned *)realloc((unsigned char *)zero,
> nlall*sizeof(unsigned)))==NULL) {
> error((unsigned char *)"MEM?");
> onhup(0);
> }
> dot += zero - ozero;
> dol += zero - ozero;
> }
> tl = putline();
> nline++;
> a1 = ++dol;
> a2 = a1+1;
> rdot = ++dot;
> while (a1 > rdot)
> *--a2 = *--a1;
> *rdot = tl;
> }
> return(nline);
> }
> static void add(int i)
> {
> if (i && (given || dol>zero)) {
> addr1--;
> addr2--;
> }
> squeeze(0);
> newline();
> append(gettty, addr2);
> }
> static void callunix(void)
> {
> SIG_TYP savint;
> int pid, rpid;
> int retcode;
>
> setnoaddr();
> if ((pid = fork()) == 0) {
> signal(SIGHUP, oldhup);
> signal(SIGQUIT, oldquit);
> execl("/bin/sh", "sh", "-t", NULL);
> exit(0100);
> }
> savint = signal(SIGINT, SIG_IGN);
> while ((rpid = wait(&retcode)) != pid && rpid != -1)
> ;
> signal(SIGINT, savint);
> if (vflag) {
> puts("!");
> }
> }
> static void quit(int n)
> {
> if (vflag && fchange && dol!=zero) {
> fchange = 0;
> error(Q);
> }
> unlink(tfname);
> exit(0);
> }
> static void rdelete(unsigned int *ad1, unsigned int *ad2)
> {
> unsigned int *a1, *a2, *a3;
>
> a1 = ad1;
> a2 = ad2+1;
> a3 = dol;
> dol -= a2 - a1;
> do {
> *a1++ = *a2++;
> } while (a2 <= a3);
> a1 = ad1;
> if (a1 > dol)
> a1 = dol;
> dot = a1;
> fchange = 1;
> }
> static void gdelete(void)
> {
> unsigned int *a1, *a2, *a3;
>
> a3 = dol;
> for (a1=zero; (*a1&01)==0; a1++)
> if (a1>=a3)
> return;
> for (a2=a1+1; a2<=a3;) {
> if (*a2&01) {
> a2++;
> dot = a1;
> } else
> *a1++ = *a2++;
> }
> dol = a1-1;
> if (dot>dol)
> dot = dol;
> fchange = 1;
> }
> static unsigned char *GetLine(unsigned int tl)
> {
> unsigned char *bp, *lp;
> int nl;
>
> lp = linebuf;
> bp = getblock(tl, READ);
> nl = nleft;
> tl &= ~((BLKSIZE/2)-1);
> while (*lp++ = *bp++)
> if (--nl == 0) {
> bp = getblock(tl+=(BLKSIZE/2), READ);
> nl = nleft;
> }
> return(linebuf);
> }
> static int putline(void)
> {
> unsigned char *bp, *lp;
> int nl;
> unsigned int tl;
>
> fchange = 1;
> lp = linebuf;
> tl = tline;
> bp = getblock(tl, WRITE);
> nl = nleft;
> tl &= ~((BLKSIZE/2)-1);
> while (*bp = *lp++) {
> if (*bp++ == '\n') {
> *--bp = 0;
> linebp = lp;
> break;
> }
> if (--nl == 0) {
> bp = getblock(tl+=(BLKSIZE/2), WRITE);
> nl = nleft;
> }
> }
> nl = tline;
> tline += (((lp-linebuf)+03)>>1)&077776;
> return(nl);
> }
> static unsigned char * getblock(unsigned int atl, int iof)
> {
> int bno, off;
>
> bno = (atl/(BLKSIZE/2));
> off = (atl<<1) & (BLKSIZE-1) & ~03;
> if (bno >= NBLK) {
> lastc = '\n';
> error(T);
> }
> nleft = BLKSIZE - off;
> if (bno==iblock) {
> ichanged |= iof;
> return(ibuff+off);
> }
> if (bno==oblock)
> return(obuff+off);
> if (iof==READ) {
> if (ichanged)
> blkio(iblock, ibuff, iof);
> ichanged = 0;
> iblock = bno;
> blkio(bno, ibuff, iof);
> return(ibuff+off);
> }
> if (oblock>=0)
> blkio(oblock, obuff, iof);
> oblock = bno;
> return(obuff+off);
> }
> static void blkio(int b, unsigned char *buf, int iof)
> {
> lseek(tfile, (long)b*BLKSIZE, 0);
> if (iof == READ) {
> if (read(tfile, buf, BLKSIZE) != BLKSIZE) {
> error(T);
> }
> }
> else {
> if (write(tfile, buf, BLKSIZE) != BLKSIZE) {
> error(T);
> }
> }
> }
> static void init(void)
> {
> int *markp;
>
> close(tfile);
> tline = 2;
> for (markp = names; markp < &names[26]; )
> *markp++ = 0;
> subnewa = 0;
> anymarks = 0;
> iblock = -1;
> oblock = -1;
> ichanged = 0;
> close(creat(tfname, 0600));
> tfile = open(tfname, 2);
> dot = dol = zero;
> }
> static void global(int k)
> {
> unsigned char *gp;
> int c;
> unsigned int *a1;
> unsigned char globuf[GBSIZE];
>
> if (globp)
> error(Q);
> setwide();
> squeeze(dol>zero);
> if ((c=getchr())=='\n')
> error(Q);
> compile(c);
> gp = globuf;
> while ((c = getchr()) != '\n') {
> if (c==EOF)
> error(Q);
> if (c=='\\') {
> c = getchr();
> if (c!='\n')
> *gp++ = '\\';
> }
> *gp++ = c;
> if (gp >= &globuf[GBSIZE-2])
> error(Q);
> }
> if (gp == globuf)
> *gp++ = 'p';
> *gp++ = '\n';
> *gp++ = 0;
> for (a1=zero; a1<=dol; a1++) {
> *a1 &= ~01;
> if (a1>=addr1 && a1<=addr2 && execute(a1)==k)
> *a1 |= 01;
> }
> /*
> * Special case: g/.../d (avoid n^2 algorithm)
> */
> if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
> gdelete();
> return;
> }
> for (a1=zero; a1<=dol; a1++) {
> if (*a1 & 01) {
> *a1 &= ~01;
> dot = a1;
> globp = globuf;
> commands();
> a1 = zero;
> }
> }
> }
> static void join(void)
> {
> unsigned char *gp, *lp;
> unsigned int *a1;
>
> nonzero();
> gp = genbuf;
> for (a1=addr1; a1<=addr2; a1++) {
> lp = GetLine(*a1);
> while (*gp = *lp++)
> if (gp++ >= &genbuf[LBSIZE-2])
> error(Q);
> }
> lp = linebuf;
> gp = genbuf;
> while (*lp++ = *gp++)
> ;
> *addr1 = putline();
> if (addr1<addr2)
> rdelete(addr1+1, addr2);
> dot = addr1;
> }
> static void substitute(int inglob)
> {
> int *mp, nl;
> unsigned int *a1;
> int gsubf;
> int n;
>
> n = getnum(); /* OK even if n==0 */
> gsubf = compsub();
> for (a1 = addr1; a1 <= addr2; a1++) {
> if (execute(a1)){
> unsigned *ozero;
> int m = n;
> do {
> int span = loc2-loc1;
> if (--m <= 0) {
> dosub();
> if (!gsubf)
> break;
> if (span==0) { /* null RE match */
> if (*loc2=='\0')
> break;
> loc2++;
> }
> }
> } while (execute((unsigned *)0));
> if (m <= 0) {
> inglob |= 01;
> subnewa = putline();
> *a1 &= ~01;
> if (anymarks) {
> for (mp = names; mp < &names[26]; mp++)
> if (*mp == *a1)
> *mp = subnewa;
> }
> subolda = *a1;
> *a1 = subnewa;
> ozero = zero;
> nl = append(getsub, a1);
> nl += zero-ozero;
> a1 += nl;
> addr2 += nl;
> }
> }
> }
> if (inglob==0)
> error(Q);
> }
> static int compsub(void)
> {
> int seof, c;
> unsigned char *p;
>
> if ((seof = getchr()) == '\n' || seof == ' ')
> error(Q);
> compile(seof);
> p = rhsbuf;
> for (;;) {
> c = getchr();
> if (c=='\\')
> c = getchr() | 0200;
> if (c=='\n') {
> if (globp && globp[0]) /* last '\n' does not count */
> c |= 0200;
> else {
> peekc = c;
> pflag++;
> break;
> }
> }
> if (c==seof)
> break;
> *p++ = c;
> if (p >= &rhsbuf[LBSIZE/2])
> error(Q);
> }
> *p++ = 0;
> if ((peekc = getchr()) == 'g') {
> peekc = 0;
> newline();
> return(1);
> }
> newline();
> return(0);
> }
> static int getsub(void)
> {
> unsigned char *p1, *p2;
>
> p1 = linebuf;
> if ((p2 = linebp) == 0)
> return(EOF);
> while (*p1++ = *p2++)
> ;
> linebp = 0;
> return(0);
> }
> static void dosub(void)
> {
> unsigned char *lp, *sp, *rp;
> int c;
>
> lp = linebuf;
> sp = genbuf;
> rp = rhsbuf;
> while (lp < loc1)
> *sp++ = *lp++;
> while (c = *rp++&0377) {
> if (c=='&') {
> sp = place(sp, loc1, loc2);
> continue;
> } else if (c&0200 && (c &= 0177) >='1' && c < nbra+'1') {
> sp = place(sp, braslist[c-'1'], braelist[c-'1']);
> continue;
> }
> *sp++ = c&0177;
> if (sp >= &genbuf[LBSIZE])
> error(Q);
> }
> lp = loc2;
> loc2 = sp - genbuf + linebuf;
> while (*sp++ = *lp++)
> if (sp >= &genbuf[LBSIZE])
> error(Q);
> lp = linebuf;
> sp = genbuf;
> while (*lp++ = *sp++)
> ;
> }
> static unsigned char * place(unsigned char *sp, unsigned char
> *l1,unsigned char *l2)
> {
>
> while (l1 < l2) {
> *sp++ = *l1++;
> if (sp >= &genbuf[LBSIZE])
> error(Q);
> }
> return(sp);
> }
> static void move(int cflag)
> {
> unsigned int *adt, *ad1, *ad2;
>
> nonzero();
> if ((adt = address())==0) /* address() guarantees addr is in range */
> error(Q);
> newline();
> if (cflag) {
> unsigned int *ozero;
> int delta;
>
> ad1 = dol;
> ozero = zero;
> append(getcopy, ad1++);
> ad2 = dol;
> delta = zero - ozero;
> ad1 += delta;
> adt += delta;
> } else {
> ad2 = addr2;
> for (ad1 = addr1; ad1 <= ad2;)
> *ad1++ &= ~01;
> ad1 = addr1;
> }
> ad2++;
> if (adt<ad1) {
> dot = adt + (ad2-ad1);
> if ((++adt)==ad1)
> return;
> reverse(adt, ad1);
> reverse(ad1, ad2);
> reverse(adt, ad2);
> } else if (adt >= ad2) {
> dot = adt++;
> reverse(ad1, ad2);
> reverse(ad2, adt);
> reverse(ad1, adt);
> } else
> error(Q);
> fchange = 1;
> }
> static void reverse(unsigned int *a1, unsigned int *a2)
> {
> int t;
>
> for (;;) {
> t = *--a2;
> if (a2 <= a1)
> return;
> *a2 = *a1;
> *a1++ = t;
> }
> }
> static int getcopy(void)
> {
> if (addr1 > addr2)
> return(EOF);
> GetLine(*addr1++);
> return(0);
> }
> static void compile(int eof)
> {
> int c;
> unsigned char *ep;
> unsigned char *lastep;
> unsigned char bracket[NBRA], *bracketp;
> int cclcnt;
>
> ep = expbuf;
> bracketp = bracket;
> if ((c = getchr()) == '\n') {
> peekc = c;
> c = eof;
> }
> if (c == eof) {
> if (*ep==0)
> error(Q);
> return;
> }
> nbra = 0;
> if (c=='^') {
> c = getchr();
> *ep++ = CCIRC;
> }
> peekc = c;
> lastep = 0;
> for (;;) {
> if (ep >= &expbuf[ESIZE])
> goto cerror;
> c = getchr();
> if (c == '\n') {
> peekc = c;
> c = eof;
> }
> if (c==eof) {
> if (bracketp != bracket)
> goto cerror;
> *ep++ = CEOF;
> return;
> }
> if (c!='*')
> lastep = ep;
> switch (c) {
>
> case '\\':
> if ((c = getchr())=='(') {
> if (nbra >= NBRA)
> goto cerror;
> *bracketp++ = nbra;
> *ep++ = CBRA;
> *ep++ = nbra++;
> continue;
> }
> if (c == ')') {
> if (bracketp <= bracket)
> goto cerror;
> *ep++ = CKET;
> *ep++ = *--bracketp;
> continue;
> }
> if (c>='1' && c<'1'+NBRA) {
> *ep++ = CBACK;
> *ep++ = c-'1';
> continue;
> }
> *ep++ = CCHR;
> if (c=='\n')
> goto cerror;
> *ep++ = c;
> continue;
>
> case '.':
> *ep++ = CDOT;
> continue;
>
> case '\n':
> goto cerror;
>
> case '*':
> if (lastep==0 || *lastep==CBRA || *lastep==CKET)
> goto defchar;
> *lastep |= STAR;
> continue;
>
> case '$':
> if ((peekc=getchr()) != eof && peekc!='\n')
> goto defchar;
> *ep++ = CDOL;
> continue;
>
> case '[':
> *ep++ = CCL;
> *ep++ = 0;
> cclcnt = 1;
> if ((c=getchr()) == '^') {
> c = getchr();
> ep[-2] = NCCL;
> }
> do {
> if (c=='\n')
> goto cerror;
> if (c=='-' && ep[-1]!=0) {
> if ((c=getchr())==']') {
> *ep++ = '-';
> cclcnt++;
> break;
> }
> while (ep[-1]<c) {
> *ep = ep[-1]+1;
> ep++;
> cclcnt++;
> if (ep>=&expbuf[ESIZE])
> goto cerror;
> }
> }
> *ep++ = c;
> cclcnt++;
> if (ep >= &expbuf[ESIZE])
> goto cerror;
> } while ((c = getchr()) != ']');
> lastep[1] = cclcnt;
> continue;
>
> defchar:
> default:
> *ep++ = CCHR;
> *ep++ = c;
> }
> }
> cerror:
> expbuf[0] = 0;
> nbra = 0;
> error(Q);
> }
> static int execute(unsigned int *addr)
> {
> unsigned char *p1, *p2;
> int c;
>
> for (c=0; c<NBRA; c++) {
> braslist[c] = 0;
> braelist[c] = 0;
> }
> p2 = expbuf;
> if (addr == (unsigned *)0) {
> if (*p2==CCIRC)
> return(0);
> p1 = loc2;
> } else if (addr==zero)
> return(0);
> else
> p1 = GetLine(*addr);
> if (*p2==CCIRC) {
> loc1 = p1;
> return(advance(p1, p2+1));
> }
> /* fast check for first character */
> if (*p2==CCHR) {
> c = p2[1];
> do {
> if (*p1!=c)
> continue;
> if (advance(p1, p2)) {
> loc1 = p1;
> return(1);
> }
> } while (*p1++);
> return(0);
> }
> /* regular algorithm */
> do {
> if (advance(p1, p2)) {
> loc1 = p1;
> return(1);
> }
> } while (*p1++);
> return(0);
> }
> static int advance(unsigned char *lp,unsigned char *ep)
> {
> unsigned char *curlp;
> int i;
>
> for (;;) switch (*ep++) {
>
> case CCHR:
> if (*ep++ == *lp++)
> continue;
> return(0);
>
> case CDOT:
> if (*lp++)
> continue;
> return(0);
>
> case CDOL:
> if (*lp==0)
> continue;
> return(0);
>
> case CEOF:
> loc2 = lp;
> return(1);
>
> case CCL:
> if (cclass(ep, *lp++, 1)) {
> ep += *ep;
> continue;
> }
> return(0);
>
> case NCCL:
> if (cclass(ep, *lp++, 0)) {
> ep += *ep;
> continue;
> }
> return(0);
>
> case CBRA:
> braslist[*ep++] = lp;
> continue;
>
> case CKET:
> braelist[*ep++] = lp;
> continue;
>
> case CBACK:
> if (braelist[i = *ep++]==0)
> error(Q);
> if (backref(i, lp)) {
> lp += braelist[i] - braslist[i];
> continue;
> }
> return(0);
>
> case CBACK|STAR:
> if (braelist[i = *ep++] == 0)
> error(Q);
> curlp = lp;
> while (backref(i, lp))
> lp += braelist[i] - braslist[i];
> while (lp >= curlp) {
> if (advance(lp, ep))
> return(1);
> lp -= braelist[i] - braslist[i];
> }
> continue;
>
> case CDOT|STAR:
> curlp = lp;
> while (*lp++)
> ;
> goto star;
>
> case CCHR|STAR:
> curlp = lp;
> while (*lp++ == *ep)
> ;
> ep++;
> goto star;
>
> case CCL|STAR:
> case NCCL|STAR:
> curlp = lp;
> while (cclass(ep, *lp++, ep[-1]==(CCL|STAR)))
> ;
> ep += *ep;
> goto star;
>
> star:
> do {
> lp--;
> if (advance(lp, ep))
> return(1);
> } while (lp > curlp);
> return(0);
>
> default:
> error(Q);
> }
> }
> static int backref(int i,unsigned char *lp)
> {
> unsigned char *bp;
>
> bp = braslist[i];
> while (*bp++ == *lp++)
> if (bp >= braelist[i])
> return(1);
> return(0);
> }
> static int cclass(unsigned char *set, int c, int af)
> {
> int n;
>
> if (c==0)
> return(0);
> n = *set++;
> while (--n)
> if (*set++ == c)
> return(af);
> return(!af);
> }
> static void putd(void)
> {
> int r;
>
> r = count%10;
> count /= 10;
> if (count)
> putd();
> putchr(r + '0');
> }
> static unsigned char line[70];
> static unsigned char *linp = line;
> static void putchr(int ac)
> {
> unsigned char *lp;
> int c;
>
> lp = linp;
> c = ac;
> if (listf) {
> if (c=='\n') {
> if (linp!=line && linp[-1]==' ') {
> *lp++ = '\\';
> *lp++ = 'n';
> }
> } else {
> if (col > (72-4-2)) {
> col = 8;
> *lp++ = '\\';
> *lp++ = '\n';
> *lp++ = '\t';
> }
> col++;
> if (c=='\b' || c=='\t' || c=='\\') {
> *lp++ = '\\';
> if (c=='\b')
> c = 'b';
> else if (c=='\t')
> c = 't';
> col++;
> } else if (c<' ' || c=='\177') {
> *lp++ = '\\';
> *lp++ = (c>>6) +'0';
> *lp++ = ((c>>3)&07)+'0';
> c = ( c &07)+'0';
> col += 3;
> }
> }
> }
> *lp++ = c;
> if(c == '\n' || lp >= &line[64]) {
> linp = line;
> write(oflag?2:1, line, lp-line);
> return;
> }
> linp = lp;
> }

I just looked into the code deeper, quite tricky and old fassion.
No good for beginners, except several recurrsive method.

Keith Thompson

unread,
Apr 24, 2021, 5:21:34 PM4/24/21
to
wij <wyn...@gmail.com> writes:
> On Friday, 23 April 2021 at 00:58:41 UTC+8, jacobnavia wrote:
>> Recently, somebody started a thread about how C wasn't a simple
>> language. The excuse was a text editor project for a beginner.
>>
>> Attached is the source code for the 10th edition of Unix of the "ed"
>> text editor, published by Brian Kernighan for his CS classes at
>> Princeton in 2001.
>>
[1596 lines deleted]
>
> I just looked into the code deeper, quite tricky and old fassion.
> No good for beginners, except several recurrsive method.

Again, *please* don't quote the entire article when you post a followup.
Just quote enough so that readers can understand the context. Nobody
reading your followup wants to read that entire block of text -- so
don't include it in your post.

--
Keith Thompson (The_Other_Keith) Keith.S.T...@gmail.com
Working, but not speaking, for Philips Healthcare
void Void(void) { Void(); } /* The recursive call of the void */

Kent Dickey

unread,
Apr 25, 2021, 1:05:12 AM4/25/21
to
In article <s5vd9u$1um1$1...@gioia.aioe.org>,
Chris M. Thomasson <chris.m.t...@gmail.com> wrote:
>On 4/23/2021 2:02 PM, jacobnavia wrote:
>> Le 23/04/2021 à 22:51, Chris M. Thomasson a écrit :
>>> On 4/23/2021 1:40 PM, jacobnavia wrote:
>>>> I explained to you why the code doesn't bother to free the
>>>> memorysince it is used for the whole time that the editor is running.
>>>> Freeing it just before exiting the program makes no sense.

[ snip]

>I know exactly what you are talking about. However, please clean up
>after yourself? Pretty please? UNIX is not the only system out there.
>This is a bad habit to get into. Clean up, gosh darn it.

First: Programs do need to erase any temp files and free any shared
resources.

I wouldn't have really cared about matching malloc()/free() as an issue, but
I have a problem with a program trying to free() all of its memory when
qutting that is causing a problem itself.

1) It allocates shared memory segments. If they already exist, an error is
printed which does not in any way explain the problem.
2) It does millions of malloc()'s (or their equivalent, like many programs now,
it's written in at least 4 languages).
3) I try to exit the program.
4) While walking it's crazy internal structures, to simply call free(), it
finds an inconsistency and immediately exits.
5) It forgets to free the shared memory segments since that would be done
after the consistency check.

If you try to re-run the program now, it may fail due to the shared memory
segments still existing.

The problem is at no point does the code really care about these internal
consistency checks--except when it wants to exit. And it's deemed not a
real problem since the program does exit (with an error code).

If the code simply freed the shared memory segments and did exit(0), and
didn't try to balance malloc() and free(), it would just work.

There's a great deal of complexity unwinding complex malloc() chains, and
why bother debugging that mess? Yes, memory leaks are bad, but allocations
that will never be freed are not a problem. Every OS (even trivial ones)
can handle programs not calling free() when the program exits.

Kent

Chris M. Thomasson

unread,
Apr 25, 2021, 1:30:12 AM4/25/21
to
Actually, I am quite fond of region allocators, where there is no need
to balance between allocating and flushing a region. I wrote one a while
back:

https://groups.google.com/g/comp.lang.c/c/7oaJFWKVCTw/m/sSWYU9BUS_QJ

https://pastebin.com/raw/f37a23918

I used it in various ways, even partitioning it to allow for "partial"
region resets. This is using a nasty hack, to get alignment. But, shit
happens. ;^o

Layering a malloc/free on top of a region allocator can sometimes allow
for the free function to be a no-op.


> There's a great deal of complexity unwinding complex malloc() chains, and
> why bother debugging that mess? Yes, memory leaks are bad, but allocations
> that will never be freed are not a problem. Every OS (even trivial ones)
> can handle programs not calling free() when the program exits.

Still, if you are running any type of "event" loop, memory leaks can be
a problem. I had to debug a horrible one where a subtle race-condition
caused memory leaks over time. The horrible nature of it was the race
condition would only trip at basically random times.

Speaking of random, for fun, check out the random number generator thing
I wrote that is based on race conditions:

https://groups.google.com/g/comp.lang.c++/c/7u_rLgQe86k/m/XiqELOEECAAJ

lol. ;^)

Chris M. Thomasson

unread,
Apr 25, 2021, 1:59:41 AM4/25/21
to
Yeah. It was scary, in a sense. The problem rose its ugly head when to
many IOCP requests were in flight at the same time. The non-paged pool
could get nuked. I definitely had timeout logic to abort a certain class
of connections during times of stress. If a connection is allowed to
persist with no activity forever, it is still there, using up non-paged
memory. So, I would abort them based on timeouts, kind of a "its been
ten minutes, are you still there?" type of thing. I would cancel all of
the timeout pending connections when the server went into a certian
"panic" mode.

Way back, I even created a version that spawned clients, and forced them
to make a shitload of connections, and transfer huge files back and
forth, like ping pong. For each established connection the server would
store the current number of connections, along with other system data,
into a file. The system was allowed to crash. Reboot, and the file was
read... System crash at this many connections. So, this gave me a bit of
an insight into how many connections the system could handle before it
eats shit.

Chris M. Thomasson

unread,
Apr 25, 2021, 2:06:01 AM4/25/21
to
On 4/24/2021 10:04 PM, Kent Dickey wrote:
Have you ever messed around with Reaps?

https://people.cs.umass.edu/~emery/pubs/berger-oopsla2002.pdf

Paavo Helde

unread,
Apr 25, 2021, 3:57:23 AM4/25/21
to
We have no way to ensure your diagnosis is correct. The program is
clearly buggy, trying to ignore those bugs won't make the bugs
disappear, it only hides them. In my world, a hidden bug is much worse
than a surfacing bug, as it might reappear at any moment in another place.

Based on my experience, I would also say that errors on shutdown are
much more likely caused by bugs in joining of threads or bugs in
shutting down independent components, NOT by freeing dynamically
allocated memory, which is a relatively simple task. Avoiding memory
freeing will most likely not fix those bugs, so one still cannot be
really sure that all the important cleanup like releasing shared memory
is done properly.

Technically, if the program is unable or unwilling to join all its
threads properly on shutdown, the correct way to terminate is to call
_exit(), not exit(). The latter will still try to release static data
structures, which may cause havoc with the still running rampant threads.

Chris M. Thomasson

unread,
Apr 25, 2021, 4:18:11 AM4/25/21
to
This is one reason why I always feared detached threads, in a sense... ;^o

Paavo Helde

unread,
Apr 25, 2021, 4:28:48 AM4/25/21
to
And rightly so!

Öö Tiib

unread,
Apr 25, 2021, 9:19:34 AM4/25/21
to
On Saturday, 24 April 2021 at 08:06:00 UTC+3, Ian Collins wrote:
> On 24/04/2021 16:43, wij wrote:
> >
> > As to the issue that C++ codes are simpler and easier. In basics I agree, but
> > probably not much different from other popular high level languages in
> > application, except the method.
> >
> > Approaching the more general problem from the real world of software engineer market,
> > while people saying C++ is the choice for high performance language, such as gaming,
> > banking, AI, VR,..., I saw the need for C++ engineer shrinks very fast these years.
> > Most companies are trying to reduce cost by shrinking existing C++ codes to minimal.
> > Fortran, Cobol are still active and high paid for 'high performance' need.

I do not see that point ... What companies want to reduce C++ by using more COBOL,
FORTRAN, Ada, D or C? Can anyone point at such companies? Any cite?

> We have the opposite problem, our ever expanding core product is C++
> with no viable alternatives. With our borders basically shut, we are
> also finding it hard to get staff...

I also experience same even with borders being open. Wherever there are some real
sensors / actuators into real world to develop there are only those few alternatives
and next to nothing can be done with that Python or JavaScript. So how to reduce
C++ code there?

Current main issue is that people want remote work when there is pandemic but for
systems expensive to transport, consisting of number of optional components or
expensive peripherals that have part of control manual it is quite painful to set up.
So the developers have to be physically present at least part of the time. Budgets
available and used are absurd but still the products lag behind because of lack of C++
developers. Writing parts in C can only relieve it very slightly as availability of potent
developers of C is even worse or those are often very same persons fluent in both
but preferring C++.

> > To be more on topic, to say C++ is better over C, support from hardware is a must.
> They are pretty much equal except at the extreme low end, that is
> targets not supported by gcc.

Yes, and where we have targets supported by gcc there it does not matter at all
that gcc adds technically Fortran, Ada and D as alternatives as companies do
not want to move to those.

wij

unread,
Apr 25, 2021, 1:24:07 PM4/25/21
to
On Sunday, 25 April 2021 at 21:19:34 UTC+8, Öö Tiib wrote:
> On Saturday, 24 April 2021 at 08:06:00 UTC+3, Ian Collins wrote:
> > On 24/04/2021 16:43, wij wrote:
> > >
> > > As to the issue that C++ codes are simpler and easier. In basics I agree, but
> > > probably not much different from other popular high level languages in
> > > application, except the method.
> > >
> > > Approaching the more general problem from the real world of software engineer market,
> > > while people saying C++ is the choice for high performance language, such as gaming,
> > > banking, AI, VR,..., I saw the need for C++ engineer shrinks very fast these years.
> > > Most companies are trying to reduce cost by shrinking existing C++ codes to minimal.
> > > Fortran, Cobol are still active and high paid for 'high performance' need.
> I do not see that point ... What companies want to reduce C++ by using more COBOL,
> FORTRAN, Ada, D or C? Can anyone point at such companies? Any cite?

That saying is based on my own investigation of local job market several years ago.
And, for the question of this Conversation thread, I checked the software engineer
need of the market in my country again for just about 10 pages (no company wants
C++ programer).
I do not remember exactly which company, the one that I remember requires
FORTRAN/C skill is a company or institute for high speed computation (clustered,
large computers). Surely, COBOL is from some banking company, you can also
assume they also need 'high performance'.
Probably a little surprise to me is gaming companies seemingly began to focus
on using 'engines'.
I myself have some career with gaming and security surveillance.., firmware and
hardware. From the experience of hardware development, cost-reduction is a
tough issue. Basically or in such experiences, truly real tech. innovation means
lower cost, vise versa.

Q: ...By using *more* COBOL, FORTRAN, Ada, D or C?
A: The reason to using more FORTRAN and C is reasonable as it occurred.
I can only imagine what such a company really want is mathematical skill and
do not want more other restrictions and introducing unnecessary complications.
As to using more COBOL codes, I do not really know, but you can come up with
some reasonable scenario.
As to Ada or D, none in my country I know requires such skill.

James Kuyper

unread,
Apr 25, 2021, 3:02:18 PM4/25/21
to
25.04.2021 11:18 Chris M. Thomasson kirjutas:
...
> This is one reason why I always feared detached threads, in a sense... ;^o

I couldn't help thinking how weird that statement would sound outside of
a computer programming context. I imagined someone shying away in terror
when someone else used scissors to cut off a piece of thread. :-)

Öö Tiib

unread,
Apr 25, 2021, 3:46:52 PM4/25/21
to
On Sunday, 25 April 2021 at 20:24:07 UTC+3, wyn...@gmail.com wrote:
> On Sunday, 25 April 2021 at 21:19:34 UTC+8, Öö Tiib wrote:
> > On Saturday, 24 April 2021 at 08:06:00 UTC+3, Ian Collins wrote:
> > > On 24/04/2021 16:43, wij wrote:
> > > >
> > > > As to the issue that C++ codes are simpler and easier. In basics I agree, but
> > > > probably not much different from other popular high level languages in
> > > > application, except the method.
> > > >
> > > > Approaching the more general problem from the real world of software engineer market,
> > > > while people saying C++ is the choice for high performance language, such as gaming,
> > > > banking, AI, VR,..., I saw the need for C++ engineer shrinks very fast these years.
> > > > Most companies are trying to reduce cost by shrinking existing C++ codes to minimal.
> > > > Fortran, Cobol are still active and high paid for 'high performance' need.
> > I do not see that point ... What companies want to reduce C++ by using more COBOL,
> > FORTRAN, Ada, D or C? Can anyone point at such companies? Any cite?
> That saying is based on my own investigation of local job market several years ago.
> And, for the question of this Conversation thread, I checked the software engineer
> need of the market in my country again for just about 10 pages (no company wants
> C++ programer).

Huh? Few look at those ads that all places are full of. These do not indicate who
wants to reduce C++ in any manner. <https://stackoverflow.com/jobs/companies?q=c%2B%2B>
321 results.

Job market is nontransparent. Our recruiters periodically ask every single C++-capable
individual whom we want in our country personally if they are interested in new work or
when it is student then even first work. Also they check globally who wants to come into
our little and relatively cold country. They have helped to form migration papers from
places like Russia, Belarus, Argentina, Ecuador and Philippines. Sure the list of people
whom we do not want is lot longer. There are no point to announce anything as it is
more likely that someone whom we don't want will apply. It feels unrelated to
what I was asking.

> I do not remember exactly which company, the one that I remember requires
> FORTRAN/C skill is a company or institute for high speed computation (clustered,
> large computers). Surely, COBOL is from some banking company, you can also
> assume they also need 'high performance'.
> Probably a little surprise to me is gaming companies seemingly began to focus
> on using 'engines'.
> I myself have some career with gaming and security surveillance.., firmware and
> hardware. From the experience of hardware development, cost-reduction is a
> tough issue. Basically or in such experiences, truly real tech. innovation means
> lower cost, vise versa.
>
> Q: ...By using *more* COBOL, FORTRAN, Ada, D or C?
> A: The reason to using more FORTRAN and C is reasonable as it occurred.
> I can only imagine what such a company really want is mathematical skill and
> do not want more other restrictions and introducing unnecessary complications.
> As to using more COBOL codes, I do not really know, but you can come up with
> some reasonable scenario.
> As to Ada or D, none in my country I know requires such skill.

New projects can be started in any language.
Where gcc is compiler there these start in C++ far more frequently than in
FORTRAN or C. But that wasn't even a question as I was particularly asking who
wants to migrate/translate/switch from C++ product/code base to any of those
now and why as I know outright totally none cases.

wij

unread,
Apr 25, 2021, 11:34:52 PM4/25/21
to
> > Q: ...By using *more* COBOL, FORTRAN, Ada, D or C?
> > A: The reason to using more FORTRAN and C is reasonable as it occurred.
> > I can only imagine what such a company really want is mathematical skill and
> > do not want more other restrictions and introducing unnecessary complications.
> > As to using more COBOL codes, I do not really know, but you can come up with
> > some reasonable scenario.
> > As to Ada or D, none in my country I know requires such skill.
> New projects can be started in any language.
> Where gcc is compiler there these start in C++ far more frequently than in
> FORTRAN or C. But that wasn't even a question as I was particularly asking who
> wants to migrate/translate/switch from C++ product/code base to any of those
> now and why as I know outright totally none cases.

Who? Who can say that specifically except themselves?

For many companies/application users, a program is just a program of which
the language written is irrelevant. The product of many companies in my country
are physical things. They hire programmers because of the functions needed
change frequently in various production phase/line. In their view, it the
question of changing programs, not programming languages. Thus, the hired
programmers have to adapt, probably endup with script/mixed languages.

Qt is the thing I know better because I had a commercial license (Qt3, I just use
several basic elements), they send me email regularly. That Qt itself is in C++
probably will not change, but their products seems changing from pure C++
codes to QML and tool set. In the end, I fear the C++ core part may shrink accordingly.
Who? you ask them.

Juha Nieminen

unread,
Apr 26, 2021, 1:22:39 AM4/26/21
to
In comp.lang.c++ jacobnavia <ja...@jacob.remcomp.fr> wrote:
> The malloc allocates the working area of the editor. It is not freed
> because it will be always needed; it is realloc'ed when needed, and
> freed by the OS when the program terminates. This is abvious for anyone
> reading C.

The main point is that malloc()'ing something and not free()'ing it is
bad practice. Not only does it teach you bad habits, it teaches other
people reading the code this same bad habit. This is especially
egregious in code that's being used for didactic purposes out there,
for thousands and thousands of students to see.

This potentially (quite potentially) will lead to people learning the
bad habit and then not bothering to free() what they malloc() in
situations where it actually becomes detrimental. In loops where the
amount of leaked memory will start increasing and increasing indefinitely.
When the code is complicated enough, and there are hundreds and hundreds
of such non-freed mallocs all over the place, in complex code, it becomes
a nightmare to fix.

On that note, I don't think the term "freed by the OS" should be used
in this context. In fact, I would say that anybody who makes that claim
doesn't actually understand how malloc() and free() work, and what their
relationship is with the OS (at least in most operating systems).

It makes it sound like every time you call malloc(), it will make an OS
call, which will make the OS allocate a block of memory of that size
(which wasn't allocated for the program before), and that the OS itself
keeps track of these possibly thousands and thousands, sometimes millions
of tiny allocations (think of a linked list, or a binary tree, where each
node is allocated individually), and once the program terminates, the OS
will "garbage-collect" all these tiny individual allocations and free
them again for other programs to use. In other words, as if the OS called
free() for you, for each malloc() you did (and didn't explicitly free),
when the program ends.

More particularly, the claim makes it sound like if the OS didn't
"garbage-collect" all the malloc()s you did, which you didn't free()
yourself, they would be permanently allocated, never to be reusable by
any other program.

Of course that's not how it works. In this sense the claim is incorrect.

Juha Nieminen

unread,
Apr 26, 2021, 1:29:43 AM4/26/21
to
In comp.lang.c++ Jorgen Grahn <grahn...@snipabacken.se> wrote:
> ["Followup-To:" header set to comp.lang.c.]
>
> On Fri, 2021-04-23, Juha Nieminen wrote:
>> In comp.lang.c++ jacobnavia <ja...@jacob.remcomp.fr> wrote:
>>> Attached is the source code for the 10th edition of Unix of the "ed"
>>> text editor, published by Brian Kernighan for his CS classes at
>>> Princeton in 2001.
>>
>> Maybe it has significant historic value, but as a C program I don't think
>> it's that great.
>>
>> It's quite hard to decipher (especially since it follows the typical
>> 70's and 80's C style of using very short cryptic names everywhere),
>
> IMO, longer names without any other improvements, aren't an
> improvement. The global 'int pflag' here, for example, would suck no
> matter how you rename it. If it's changed to an enum, and wrapped and
> documented in a struct EditorState[1], it doesn't need to be renamed.

I don't agree. Code becomes more readable when cryptic and non-descript
names are changed to names that make the code more self-documenting,
even without any other changes.

As an example, suppose you see a line of code like this:

convert(a, b);

Well, that doesn't say much. We have absolutely no way of knowing what
it does. But suppose the function were renamed to:

convert_string_to_wstring(a, b);

Ah, now we are talking. Now it actually says what it's doing.
There's of course still the minor problem that we don't know which
parameter is the string, and which one is the wstring. But we can also
rename the variables:

convert_string_to_wstring(dest, src);

And suddenly, a line of code that told us absolutely nothing, now
tells us everything.

(The above example is actually based on actual production code I have seen.)

jacobnavia

unread,
Apr 26, 2021, 4:00:46 AM4/26/21
to
Le 26/04/2021 à 07:22, Juha Nieminen a écrit :
> In comp.lang.c++ jacobnavia <ja...@jacob.remcomp.fr> wrote:
>> The malloc allocates the working area of the editor. It is not freed
>> because it will be always needed; it is realloc'ed when needed, and
>> freed by the OS when the program terminates. This is abvious for anyone
>> reading C.
>
> The main point is that malloc()'ing something and not free()'ing it is
> bad practice. Not only does it teach you bad habits, it teaches other
> people reading the code this same bad habit. This is especially
> egregious in code that's being used for didactic purposes out there,
> for thousands and thousands of students to see.
>
1) The code was presented as a test text for a GREP assignment. The
assignment was to rewrite GREP and test it with the ed.c text file.

Since you spoke of a simple editor project in C, I showed you one
written 50 years ago of just 1700 lines. It compiles and runs 50 years
later. Please show me a complex program in C++ from 2000 that compiles
unmodified and runs today.

2) In many applications, the information gathered and stored using
malloc will be used until the exit of the program: in a compiler, for
instance, or in a text editor. A good strategy is to malloc and never
free, since all the memory is used until the program exits. This
simplifies and accelerates the program.

> This potentially (quite potentially) will lead to people learning the
> bad habit and then not bothering to free() what they malloc() in
> situations where it actually becomes detrimental.

Maybe, maybe not. I suppose that people that want to program are able to
see when to use malloc and free, when to use just malloc,never freeeing
and letting the OS free everything at exit.

> In loops where the
> amount of leaked memory will start increasing and increasing indefinitely.
> When the code is complicated enough, and there are hundreds and hundreds
> of such non-freed mallocs all over the place, in complex code, it becomes
> a nightmare to fix.
>

This has a simple fix (that I have used a lot):
1) Replace all malloc calls by calls to Mymalloc
2) Write a simple function Mymalloc that allocates from a heap
3) Free the heap when the program terminates. This way, a program that
never freed anything can be included into a larger project that needs
the free().


> On that note, I don't think the term "freed by the OS" should be used
> in this context. In fact, I would say that anybody who makes that claim
> doesn't actually understand how malloc() and free() work, and what their
> relationship is with the OS (at least in most operating systems).
>

I think that you are the one that doesn't understand. Let's see:

> It makes it sound like every time you call malloc(), it will make an OS
> call, which will make the OS allocate a block of memory of that size
> (which wasn't allocated for the program before),


This is exactly what happens. The OS allocates a block of memory that
malloc/free manages. All mallocs use some API for calling the OS.

and that the OS itself
> keeps track of these possibly thousands and thousands, sometimes millions
> of tiny allocations (think of a linked list, or a binary tree, where each
> node is allocated individually), and once the program terminates, the OS
> will "garbage-collect" all these tiny individual allocations and free
> them again for other programs to use. In other words, as if the OS called
> free() for you, for each malloc() you did (and didn't explicitly free),
> when the program ends.

The OS frees the heap of the application (conceptually).
>
> More particularly, the claim makes it sound like if the OS didn't
> "garbage-collect" all the malloc()s you did, which you didn't free()
> yourself, they would be permanently allocated, never to be reusable by
> any other program.
>

????

Chris M. Thomasson

unread,
Apr 26, 2021, 4:26:12 AM4/26/21
to
No man, don't cut the cord, join it... ;^)

Big time! :^D

Chris M. Thomasson

unread,
Apr 26, 2021, 4:26:55 AM4/26/21
to
On 4/25/2021 12:02 PM, James Kuyper wrote:
AHAHAHA!

Juha Nieminen

unread,
Apr 26, 2021, 5:58:05 AM4/26/21
to
In comp.lang.c++ jacobnavia <ja...@jacob.remcomp.fr> wrote:
> Since you spoke of a simple editor project in C, I showed you one
> written 50 years ago of just 1700 lines.

Not comparable because my entire point was about how laborious and
error-prone it is to handle *dynamically allocated* (and length-changing)
strings. The program you posted uses a static array for the lines,
with a fixed maximum length. Not really comparable.

> It compiles and runs 50 years
> later. Please show me a complex program in C++ from 2000 that compiles
> unmodified and runs today.

Can you show me a C++ program from 2000 that doesn't?

> 2) In many applications, the information gathered and stored using
> malloc will be used until the exit of the program: in a compiler, for
> instance, or in a text editor. A good strategy is to malloc and never
> free, since all the memory is used until the program exits. This
> simplifies and accelerates the program.

For starters, allocating some kind of memory buffer that gets reused in
the program for many things doesn't stop you from freeing it at the end.

Secondly, that's not really an option for a text editor where you have
to load a text file into RAM and allow the user to edit its lines (by eg.
adding content to them). You can't do this with one single allocation.
(You can read the file into RAM with one single allocation, but you can't
then support adding stuff to it without reallocation. Moreover, if it's
a text editor you probably want to allocate each line individually to
allow them to be grown efficiently, without having to realloc the entire
thing.)

>> In loops where the
>> amount of leaked memory will start increasing and increasing indefinitely.
>> When the code is complicated enough, and there are hundreds and hundreds
>> of such non-freed mallocs all over the place, in complex code, it becomes
>> a nightmare to fix.
>>
>
> This has a simple fix (that I have used a lot):
> 1) Replace all malloc calls by calls to Mymalloc
> 2) Write a simple function Mymalloc that allocates from a heap
> 3) Free the heap when the program terminates. This way, a program that
> never freed anything can be included into a larger project that needs
> the free().

That doesn't make any difference. If you are leaking memory in a loop,
your program will start consuming more and more RAM until it runs
out of it. It makes no difference whether you use your own allocator
for this or not, if you never free the memory blocks that you don't
need anymore.

>> It makes it sound like every time you call malloc(), it will make an OS
>> call, which will make the OS allocate a block of memory of that size
>> (which wasn't allocated for the program before),
>
> This is exactly what happens. The OS allocates a block of memory that
> malloc/free manages. All mallocs use some API for calling the OS.

No, it's not. malloc() and free() are (typically) handled by the C
runtime library. This library will make a system call to ask for more
heap space only if needed. If a malloc() can be done within the current
heap, it won't ask the OS for more.

The only thing that the OS sees is the heap expansion requests (and
possibly heap shrinking requests). It doesn't see the individual
mallocs and frees. It doesn't know nor care which parts of the heap
are "unused" and which are "in use" by the program internally.

> and that the OS itself
>> keeps track of these possibly thousands and thousands, sometimes millions
>> of tiny allocations (think of a linked list, or a binary tree, where each
>> node is allocated individually), and once the program terminates, the OS
>> will "garbage-collect" all these tiny individual allocations and free
>> them again for other programs to use. In other words, as if the OS called
>> free() for you, for each malloc() you did (and didn't explicitly free),
>> when the program ends.
>
> The OS frees the heap of the application (conceptually).

That's what I said. It doesn't care about any "mallocs" or "frees" of
the program. It just sees a heap, and doesn't care what the program
does with it. When the program ends, the OS reclaims that heap.

Robert Latest

unread,
Apr 26, 2021, 6:41:30 AM4/26/21
to
["Followup-To:" header set to comp.lang.c.]
Juha Nieminen wrote:
> Not comparable because my entire point was about how laborious and
> error-prone it is to handle *dynamically allocated* (and length-changing)
> strings.
All coding is laborious and error prone if you're both lazy and sloppy.

--
robert

jacobnavia

unread,
Apr 26, 2021, 7:20:59 AM4/26/21
to
Le 26/04/2021 à 11:57, Juha Nieminen a écrit :
> Can you show me a C++ program from 2000 that doesn't?

Breaking changes in C++ 2011

1) the introduction of explicit operator bool() in the standard library,
replacing old instances of operator void*().

2) #define u8 "abc"
const char *s = u8"def"; // Previously "abcdef", now "def"

3) #define _x "there"
"hello"_x // now a user-defined-string-literal. Previously, expanded _x

4) New keywords: alignas, alignof, char16_t, char32_t, constexpr,
decltype, noexcept, nullptr, static_assert, and thread_local

5) Certain integer literals larger than can be represented by long could
change from an unsigned integer type to signed long long.

6) Valid C++ 2003 code that uses integer division rounds the result
toward 0 or toward negative infinity, whereas C++0x always rounds the
result toward 0.

7) struct A { private: A(); };
struct B : A { };
int main() { sizeof B(); /* valid in C++03, invalid in C++0x */ }
Such sizeof tricks have been used by some SFINAE, and needs to be
changed now :)

8) banning of narrowing conversions during aggregate initialization:

int a = {1.0; }; // error

9) Implicit move breaks C++ 2003 code. The long explanation why is to be
found at:
http://web.archive.org/web/20140110035813/http://cpp-next.com/archive/2010/10/implicit-move-must-go/

10) I remember that in an older version of C++ the templates arguments
are checked and must be defined at the point of expansion, the new
version of the language needs that they are defined at the point of
definition. Or something like that. I earned two weeks consulting to
update a huge codebase. Actually I love C++ :-)

And I will stop here... You apparently have no idea how c++ has changed
over the years, with each new version incompatible in some point with
the older one.

Only in C you can still use the code of 50 years ago.

Juha Nieminen

unread,
Apr 26, 2021, 9:17:50 AM4/26/21
to
In comp.lang.c++ jacobnavia <ja...@jacob.remcomp.fr> wrote:
> Le 26/04/2021 à 11:57, Juha Nieminen a écrit :
>> Can you show me a C++ program from 2000 that doesn't?
>
> Breaking changes in C++ 2011

I think all the major C++ compilers support C++98 mode.

And it's not like C hasn't change either. This compiles as C89 but not
as C99:

#include <stdio.h>

int main()
{
int restrict = 1, inline = 2;
printf("%i %i\n", restrict, inline);
return 0;
}

Bonita Montero

unread,
Apr 26, 2021, 12:04:19 PM4/26/21
to
Use C++ - much less code, more maintainable and more readable code
if you use it properly. Sth. like malloc() f.e. is really disgusting
when compared to using a vector<>.

Guillaume

unread,
Apr 26, 2021, 12:49:02 PM4/26/21
to
Le 26/04/2021 à 07:22, Juha Nieminen a écrit :
> In comp.lang.c++ jacobnavia <ja...@jacob.remcomp.fr> wrote:
>> The malloc allocates the working area of the editor. It is not freed
>> because it will be always needed; it is realloc'ed when needed, and
>> freed by the OS when the program terminates. This is abvious for anyone
>> reading C.
>
> The main point is that malloc()'ing something and not free()'ing it is
> bad practice. Not only does it teach you bad habits, it teaches other
> people reading the code this same bad habit.
Whereas I personally agree with this, I reckon there are different ways
of seeing it, some in favor of actually doing exactly what they did.

If you assume that all malloc'ed memory will be reclaimed when the
program terminates, then not explicitely freeing it in this case (where
you would be free it just before the program terminates anyway) actually
makes sense and shows you know what you're doing. One can even argue
that useless statements are just code pollution.

Now regarding the above assumption, whereas it's reasonable on any
"decent" OS, including the one(s) the authors were targetting at the
time they wrote this piece of code, I could not find any explicit
mention of this in the C standard regarding memory allocation - unless
of course I missed it, which means the hosted environment is not
required to reclaim allocated memory upon program termination. It's
implementation-defined, thus the quoted program is, by definition,
non-portable. Which is one good reason to claim it's bad practice indeed.

As to another reason why I would not do this: code reusability. I tend
to write code so that it can be reused. The way it's implemented in the
quoted program, it's just not reusable if you wanted to make a "text
editor" component out of it and reuse it in a completely different program.

jacobnavia

unread,
Apr 26, 2021, 1:53:59 PM4/26/21
to
Yes, that is why I did NOT even mention the same problems that C++ has
(auto keyword, and MANY others, much more than C)

James Kuyper

unread,
Apr 26, 2021, 1:54:46 PM4/26/21
to
On 4/26/21 1:22 AM, Juha Nieminen wrote:
...
> The main point is that malloc()'ing something and not free()'ing it is
> bad practice. Not only does it teach you bad habits, it teaches other
> people reading the code this same bad habit. This is especially
> egregious in code that's being used for didactic purposes out there,
> for thousands and thousands of students to see.

There's a fundamental problem with arguments of that type: if the person
you're arguing with doesn't already accept that it is a bad practice,
he's not going to have any problems with teaching others to adopt it. If
he does accept that it's a bad practice, he'll stop using it for that
reason alone, and will then automatically become a good example for
others to follow. So bringing up the effects on other people doesn't
actually do anything useful for your argument.

David Brown

unread,
Apr 26, 2021, 2:34:25 PM4/26/21
to
Yes, you did - your number 4 item was a list of new keywords whose only
backwards compatibility issue is if they happen to be used as
identifiers. (There are more than in C99 - no arguments there. I'd not
expect significantly more conflicts, however.)

There are a few cases where it might be reasonable to think /real/ code
written for C++03 would no longer be valid (or have the same effect)
when compiled as C++11 or newer. But they would be rare. Listing silly
things like defining "u8" as a macro of a string and using it for string
concatenation (without a space) is just petty. It is not a realistic
concern, nor an example of what Juha asked for - it is just FUD, and a
sign of desperation. You prefer C to C++ - that's fine, everyone has
their preferences. But if you don't have real reasons, there's no need
to invent unrealistic reasons.


Ben Bacarisse

unread,
Apr 26, 2021, 4:37:28 PM4/26/21
to
jacobnavia <ja...@jacob.remcomp.fr> writes:

> Since you spoke of a simple editor project in C, I showed you one
> written 50 years ago of just 1700 lines. It compiles and runs 50 years
> later. Please show me a complex program in C++ from 2000 that compiles
> unmodified and runs today.

The posted code can't be 50 years old. ed is 50 years old, but the
original was not written in C (there was no C in 1971).

The version posted uses void and function prototypes so it dates from
the time C was first being standardised. A comment says it has been
slightly modified. It would be nice to know if that was necessary to
get it to compile, and if so, what the modification was.

--
Ben.

jacobnavia

unread,
Apr 26, 2021, 4:48:27 PM4/26/21
to
Le 26/04/2021 à 22:37, Ben Bacarisse a écrit :
> jacobnavia <ja...@jacob.remcomp.fr> writes:
>
>> Since you spoke of a simple editor project in C, I showed you one
>> written 50 years ago of just 1700 lines. It compiles and runs 50 years
>> later. Please show me a complex program in C++ from 2000 that compiles
>> unmodified and runs today.
>
> The posted code can't be 50 years old. ed is 50 years old, but the
> original was not written in C (there was no C in 1971).
>

OK, granted

> The version posted uses void and function prototypes so it dates from
> the time C was first being standardised. A comment says it has been
> slightly modified. It would be nice to know if that was necessary to
> get it to compile, and if so, what the modification was.
>

I modifiedit to get rid of warnings of gcc, by changing char * into
unsigned char *. Not a big deal. You can download the original from the
URL I included

jacobnavia

unread,
Apr 26, 2021, 4:49:18 PM4/26/21
to
Sure sure, I am desperate. But you ignored the problem with templates...
and many others I mentioned

Ben Bacarisse

unread,
Apr 26, 2021, 5:04:23 PM4/26/21
to
jacobnavia <ja...@jacob.remcomp.fr> writes:

> 2) #define u8 "abc"
> const char *s = u8"def"; // Previously "abcdef", now "def"

C has the same breaking change, but, just as with C++, you can usually
ask the compiler to use an older standard.

> Only in C you can still use the code of 50 years ago.

There was no C code 50 years ago. This is a bit of a quibble since
there was C code we'd just about recognise in about 1973 or 4, but some
of it would fool even gcc today. The biggest change was the great =<op>
to <op>= switch. This silently changed the meaning of a lot of code.

--
Ben.

Scott Lurndal

unread,
Apr 26, 2021, 5:53:27 PM4/26/21
to
Ben Bacarisse <ben.u...@bsb.me.uk> writes:
>jacobnavia <ja...@jacob.remcomp.fr> writes:
>
>> Since you spoke of a simple editor project in C, I showed you one
>> written 50 years ago of just 1700 lines. It compiles and runs 50 years
>> later. Please show me a complex program in C++ from 2000 that compiles
>> unmodified and runs today.
>
>The posted code can't be 50 years old. ed is 50 years old, but the
>original was not written in C (there was no C in 1971).

ed first appeared in V2. ed2.s and ed3.s

https://minnie.tuhs.org/cgi-bin/utree.pl?file=V2/cmd/ed2.s

ed.c first appeared in V6 (1975).

https://minnie.tuhs.org/cgi-bin/utree.pl?file=V6/usr/source/s1/ed.c

Eli the Bearded

unread,
Apr 26, 2021, 6:19:48 PM4/26/21
to
In comp.lang.c, Ben Bacarisse <ben.u...@bsb.me.uk> wrote:
> There was no C code 50 years ago. This is a bit of a quibble since
> there was C code we'd just about recognise in about 1973 or 4, but some
> of it would fool even gcc today. The biggest change was the great =<op>
> to <op>= switch. This silently changed the meaning of a lot of code.

Maybe five years ago I tried to compile some C code I found in buried
in a tar file on a dusty ftp server[*]. The READ_ME implied it was last
touched in June of 1980, the filename agreed with the year.

Things I found tricky about the code:

1. It predated printf being called that. I think it was just using
print(). And that used "-lS" in the Makefile.

2. Prototypes? Who needs em? ("#include <stdio.h>"? What, why?)

3. No need to declare ints, eg:
main(argc, argv)
char **argv;
{

4. Structs are implicitly unions:
struct plot {
char p_x;
char p_y;
} p_plot[LIM_PLOTS];

/* ... */

struct {
int p_xy; /* used to set/transfer entire plot */
};

5. Implicit in above: seems like there is the expectation that
sizeof(int) == 2.


Items 1 to 3 made things difficult. Items 4 and 5 meant actual code
audits were needed.

[*] I last saw it here, but that 404's today:
http://mirror.cc.vt.edu/pub2/Ancient_Unix/Applications/Shoppa_Tapes/usenix_80_delaware.tar.gz

Elijah
------
the .c files were easier to make sense of than the .o files included

Keith Thompson

unread,
Apr 26, 2021, 7:08:04 PM4/26/21
to
Eli the Bearded <*@eli.users.panix.com> writes:
> In comp.lang.c, Ben Bacarisse <ben.u...@bsb.me.uk> wrote:
>> There was no C code 50 years ago. This is a bit of a quibble since
>> there was C code we'd just about recognise in about 1973 or 4, but some
>> of it would fool even gcc today. The biggest change was the great =<op>
>> to <op>= switch. This silently changed the meaning of a lot of code.
>
> Maybe five years ago I tried to compile some C code I found in buried
> in a tar file on a dusty ftp server[*]. The READ_ME implied it was last
> touched in June of 1980, the filename agreed with the year.
>
> Things I found tricky about the code:
>
> 1. It predated printf being called that. I think it was just using
> print(). And that used "-lS" in the Makefile.

I believe printf is older than C. A form of it existed by that name in
B, and it's used (with an implementation presented) in the 1974 and 1975
editions of the C manual. But a particular program might have chosen to
use something else.

> 2. Prototypes? Who needs em? ("#include <stdio.h>"? What, why?)

Prototypes still aren't mandatory, though calling printf without a
visible declaration has been a constraint violation since 1999 (and
undefined behavior in C89/C90, but very likely to work).

> 3. No need to declare ints, eg:
> main(argc, argv)
> char **argv;
> {

Valid in C90. Add "int" to main and argc and it's still valid (but
obsolescent).

> 4. Structs are implicitly unions:
> struct plot {
> char p_x;
> char p_y;
> } p_plot[LIM_PLOTS];
>
> /* ... */
>
> struct {
> int p_xy; /* used to set/transfer entire plot */
> };

A struct member name implied a type and offset, and was not tied to
the struct type. That's why a lot of member names have unique prefixes,
like tm_sec and friends. I think C89/C90 changed that.

> 5. Implicit in above: seems like there is the expectation that
> sizeof(int) == 2.

There were implementations with sizeof(int)!=2 as early as the late
1980s, but there's never been any shortage of non-portable code.

> Items 1 to 3 made things difficult. Items 4 and 5 meant actual code
> audits were needed.
>
> [*] I last saw it here, but that 404's today:
> http://mirror.cc.vt.edu/pub2/Ancient_Unix/Applications/Shoppa_Tapes/usenix_80_delaware.tar.gz
>
> Elijah
> ------
> the .c files were easier to make sense of than the .o files included

--
Keith Thompson (The_Other_Keith) Keith.S.T...@gmail.com
Working, but not speaking, for Philips Healthcare
void Void(void) { Void(); } /* The recursive call of the void */

Joe Pfeiffer

unread,
Apr 26, 2021, 7:09:17 PM4/26/21
to
Eli the Bearded <*@eli.users.panix.com> writes:

> In comp.lang.c, Ben Bacarisse <ben.u...@bsb.me.uk> wrote:
>> There was no C code 50 years ago. This is a bit of a quibble since
>> there was C code we'd just about recognise in about 1973 or 4, but some
>> of it would fool even gcc today. The biggest change was the great =<op>
>> to <op>= switch. This silently changed the meaning of a lot of code.
>
> Maybe five years ago I tried to compile some C code I found in buried
> in a tar file on a dusty ftp server[*]. The READ_ME implied it was last
> touched in June of 1980, the filename agreed with the year.
>
> Things I found tricky about the code:
>
> 1. It predated printf being called that. I think it was just using
> print(). And that used "-lS" in the Makefile.

It was called printf() at least as long ago as Kernighan's "Programming
in C - A Tutorial", which dates from 1974. So the print() you came
across was some other function.

> 2. Prototypes? Who needs em? ("#include <stdio.h>"? What, why?)
>
> 3. No need to declare ints, eg:
> main(argc, argv)
> char **argv;
> {
>
> 4. Structs are implicitly unions:
> struct plot {
> char p_x;
> char p_y;
> } p_plot[LIM_PLOTS];
>
> /* ... */
>
> struct {
> int p_xy; /* used to set/transfer entire plot */
> };

*Lord* that one bit me hard early on. Having all struct members
use a single namespace is a decision I still don't understand (I don't
care how limited the machines running the compiler were).

Keith Thompson

unread,
Apr 26, 2021, 7:58:09 PM4/26/21
to
Joe Pfeiffer <pfei...@cs.nmsu.edu> writes:
> Eli the Bearded <*@eli.users.panix.com> writes:
[...]
>> 4. Structs are implicitly unions:
>> struct plot {
>> char p_x;
>> char p_y;
>> } p_plot[LIM_PLOTS];
>>
>> /* ... */
>>
>> struct {
>> int p_xy; /* used to set/transfer entire plot */
>> };
>
> *Lord* that one bit me hard early on. Having all struct members
> use a single namespace is a decision I still don't understand (I don't
> care how limited the machines running the compiler were).

As of the 1975 C manual, a "prefix.identifier" or "prefix->identifier"
expression *assumed* that the LHS was of the correct type. For "->",
it wasn't even required to be a pointer; it could be a pointer,
character, or integer.

K&R1 (1978) imposed the requirement for the LHS to be of the correct
struct or union type for "." or a pointer to the correct struct or union
type for "->".

If you ran into that problem, it must have been some very early code
and/or a very old compiler.

I've used a compiler (VAXC) that knew about the modern "+=" compound
assignment operators, but also accepted the older "=+" forms -- and
preferred them in ambiguous cases. That was in the late 1990s. That's
also a change that was made between 1975 and K&R1 in 1978. Fortunately,
though VAXC was available, we mostly used the more modern DECC.

[...]

Ben Bacarisse

unread,
Apr 26, 2021, 8:35:07 PM4/26/21
to
sc...@slp53.sl.home (Scott Lurndal) writes:

> Ben Bacarisse <ben.u...@bsb.me.uk> writes:
>>jacobnavia <ja...@jacob.remcomp.fr> writes:
>>
>>> Since you spoke of a simple editor project in C, I showed you one
>>> written 50 years ago of just 1700 lines. It compiles and runs 50 years
>>> later. Please show me a complex program in C++ from 2000 that compiles
>>> unmodified and runs today.
>>
>>The posted code can't be 50 years old. ed is 50 years old, but the
>>original was not written in C (there was no C in 1971).
>
> ed first appeared in V2. ed2.s and ed3.s

Oh. I found a man page dated 1971 in V1 Unix.

> https://minnie.tuhs.org/cgi-bin/utree.pl?file=V2/cmd/ed2.s
>
> ed.c first appeared in V6 (1975).

Yup. ed.c can't predate C!

> https://minnie.tuhs.org/cgi-bin/utree.pl?file=V6/usr/source/s1/ed.c

--
Ben.

Kaz Kylheku

unread,
Apr 26, 2021, 8:47:06 PM4/26/21
to
On 2021-04-26, Eli the Bearded <*@eli.users.panix.com> wrote:
> In comp.lang.c, Ben Bacarisse <ben.u...@bsb.me.uk> wrote:
>> There was no C code 50 years ago. This is a bit of a quibble since
>> there was C code we'd just about recognise in about 1973 or 4, but some
>> of it would fool even gcc today. The biggest change was the great =<op>
>> to <op>= switch. This silently changed the meaning of a lot of code.
>
> Maybe five years ago I tried to compile some C code I found in buried
> in a tar file on a dusty ftp server[*]. The READ_ME implied it was last
> touched in June of 1980, the filename agreed with the year.
>
> Things I found tricky about the code:
>
> 1. It predated printf being called that. I think it was just using
> print(). And that used "-lS" in the Makefile.
>
> 2. Prototypes? Who needs em? ("#include <stdio.h>"? What, why?)

Version 7 Unix in 1979 had <stdio.h>, and fprintf and printf functions,
wrappers around an assembly language routine taking the stream as a
parameter. In version 5 (1974), printf was still a dedicated assembly
routine. It had the "f" in the name.

> 3. No need to declare ints, eg:
> main(argc, argv)
> char **argv;
> {
>
> 4. Structs are implicitly unions:
> struct plot {
> char p_x;
> char p_y;
> } p_plot[LIM_PLOTS];
>
> /* ... */
>
> struct {
> int p_xy; /* used to set/transfer entire plot */
> };

That was a feature of early C. Basically, no type checking. If you
wrote

ptr->member

then it would look up member in a global dictionary of *all* structure
members that have been declared in the translation unit, and not in the
*ptr type! And then it would just generate the machine code to access
the pointer relative to that offset.

This is part of the reason why Unix structure members have prefixes,
just like the convention you see in the code you found above.

E.g. "struct stat" has "st_mtime", "st_size" and so on.

That situation persists until today.

Another funny fact is that some early compilers used a static buffer for
returning structs instead of the stack. Oops, not safe w.r.t. signals or
threading.

> 5. Implicit in above: seems like there is the expectation that
> sizeof(int) == 2.

Newly written code today has assumptions like this. TOns of code written
for 16-bit systems (PC's with MS-DOS, for instance) was riddled with
sizeof(int) == 2 == 16 bits assumptions.

--
TXR Programming Language: http://nongnu.org/txr
Cygnal: Cygwin Native Application Library: http://kylheku.com/cygnal

Joe Pfeiffer

unread,
Apr 26, 2021, 11:43:06 PM4/26/21
to
Keith Thompson <Keith.S.T...@gmail.com> writes:
> Joe Pfeiffer <pfei...@cs.nmsu.edu> writes:
>>
>> *Lord* that one bit me hard early on. Having all struct members
>> use a single namespace is a decision I still don't understand (I don't
>> care how limited the machines running the compiler were).
>
> As of the 1975 C manual, a "prefix.identifier" or "prefix->identifier"
> expression *assumed* that the LHS was of the correct type. For "->",
> it wasn't even required to be a pointer; it could be a pointer,
> character, or integer.
>
> K&R1 (1978) imposed the requirement for the LHS to be of the correct
> struct or union type for "." or a pointer to the correct struct or union
> type for "->".
>
> If you ran into that problem, it must have been some very early code
> and/or a very old compiler.

It wasn't an old compiler... at the time... it was roughly 1977 or so.

Juha Nieminen

unread,
Apr 27, 2021, 12:52:07 AM4/27/21
to
In comp.lang.c++ Eli the Bearded <*@eli.users.panix.com> wrote:
> 5. Implicit in above: seems like there is the expectation that
> sizeof(int) == 2.

That's actually a good point, which I didn't myself think of earlier.

Several arguments have been made in this thread that C is very "portable",
and that C code written in the 1970's still compiles and works just fine
(well, at least if you tell gcc/clang to use the C89 standard, hopefully).

However, the undetermined size of basic types (especially before
standardization) makes it more likely for C programs, especially ones
written back then, to be non-portable. Sure, even back when K&R first
"soft-standardized" the C language you shouldn't have assumed a certain
size for any basic type.

(I don't know if sizeof(char) was guaranteed to be 1 even since K&R,
but even then, and to this day, you can't really trust that it's actually
one 8-bit byte, only that the sizes of all other types are multiples of it.)

There were no fancy uint32_t and other such type aliases back then
(and not even in C89), so there wasn't really a sure way to have a basic
type of a particular size. (You can check if a basic type is of a given
size with #if, and try several of them to see if one of them is of
the desired size, and produce an #error if none of them are, but that's
as far as you could go. In fact, even today that's technically as far
as you can go, even with the possibly existing standard typedefs.)

For most code it's enough for basic types to have a minimum size, but this
isn't always the case, and it's easy to write code that assumes a particular
size for such a type and breaks if it actually isn't of that size.

I suppose the conclusion is that maybe C code written in the 70's does
compile and work today... but only if it was properly written. If it
wasn't properly written, it's perfectly possible it won't work today,
in a different architecture than it was originally written for
(for example because it assumes the wrong size for 'int'.)

David Brown

unread,
Apr 27, 2021, 4:59:44 AM4/27/21
to
I didn't ignore them, I just didn't mention them. There /are/ backwards
incompatible changes between C++03 and C++11 (and later versions). Some
of these will be in language or library features that might well occur
in real code. It would be perfectly reasonable to point out these
differences - especially if you can give examples or references to real
cases.

What is /unreasonable/ and shows desperation (perhaps people moving to
C++ is bad for your business) is listing every little point you can
think of, regardless of realism. It makes a mockery of your /real/ points.

Scott Lurndal

unread,
Apr 27, 2021, 10:29:05 AM4/27/21
to
Eli the Bearded <*@eli.users.panix.com> writes:
>In comp.lang.c, Ben Bacarisse <ben.u...@bsb.me.uk> wrote:
>> There was no C code 50 years ago. This is a bit of a quibble since
>> there was C code we'd just about recognise in about 1973 or 4, but some
>> of it would fool even gcc today. The biggest change was the great =<op>
>> to <op>= switch. This silently changed the meaning of a lot of code.
>
>Maybe five years ago I tried to compile some C code I found in buried
>in a tar file on a dusty ftp server[*]. The READ_ME implied it was last
>touched in June of 1980, the filename agreed with the year.
>
>Things I found tricky about the code:
>
>1. It predated printf being called that. I think it was just using
> print(). And that used "-lS" in the Makefile.

printf was printf from day one. Sounds like 'print' was provided
by an implementation library that wasn't included with the tar file you resurrected.

>
>2. Prototypes? Who needs em? ("#include <stdio.h>"? What, why?)

Prototypes were not used at the time, argument types were
'flexible', the programmer was expected to do the right thing.

>
>3. No need to declare ints, eg:
> main(argc, argv)
> char **argv;
> {
>
>4. Structs are implicitly unions:
> struct plot {
> char p_x;
> char p_y;
> } p_plot[LIM_PLOTS];

Now this isn't a union. p_x and p_y occupy unique memory
locations.

But member names (MoS - Member of Structure) were top-level
symbol table names, so any member from any struct could be
used with any other struct, so:
a
>
> /* ... */
>
> struct {
> int p_xy; /* used to set/transfer entire plot */
> };

p_xy has offset zero from the start of the structure; when software
uses p_xy (e.g. structpointer->p_xy) it has offset zero which allows
16-bit accesses to the two 8-bit fields in the p_plot struct.

>
>5. Implicit in above: seems like there is the expectation that
> sizeof(int) == 2.

Yes, for that particular application, it was assumed that
sizeof(int) == 2 * sizeof(char). Which was the case on the
PDP-11.


Juha Nieminen

unread,
Apr 27, 2021, 11:11:49 AM4/27/21
to
In comp.lang.c++ Scott Lurndal <sc...@slp53.sl.home> wrote:
>>2. Prototypes? Who needs em? ("#include <stdio.h>"? What, why?)
>
> Prototypes were not used at the time, argument types were
> 'flexible', the programmer was expected to do the right thing.

I love the example given in the original 1978 The C Programming
Language, which demonstrates how non-int-returning standard
library functions ought to be used:

FILE *fopen(), *in;
in = fopen("name", "r");

Apparently back in those days <stdio.h> couldn't be expected
to declare the fopen function.

Joe Pfeiffer

unread,
Apr 27, 2021, 12:03:47 PM4/27/21
to
Juha Nieminen <nos...@thanks.invalid> writes:

> However, the undetermined size of basic types (especially before
> standardization) makes it more likely for C programs, especially ones
> written back then, to be non-portable. Sure, even back when K&R first
> "soft-standardized" the C language you shouldn't have assumed a certain
> size for any basic type.

Everybody "knew" longs were 32 bits, shorts and ints were both 16 bits,
chars were 8 bits and we coded under that assumption. Lots and lots of
code broke when we moved from 16 bit to 32 bit machines and the size of
an int changed as a result.

<snip>

> I suppose the conclusion is that maybe C code written in the 70's does
> compile and work today... but only if it was properly written. If it
> wasn't properly written, it's perfectly possible it won't work today,
> in a different architecture than it was originally written for
> (for example because it assumes the wrong size for 'int'.)

Correct (as those of us who are old enough learned in the 80s!).

Keith Thompson

unread,
Apr 27, 2021, 12:20:22 PM4/27/21
to
Joe Pfeiffer <pfei...@cs.nmsu.edu> writes:
> Juha Nieminen <nos...@thanks.invalid> writes:
>> However, the undetermined size of basic types (especially before
>> standardization) makes it more likely for C programs, especially ones
>> written back then, to be non-portable. Sure, even back when K&R first
>> "soft-standardized" the C language you shouldn't have assumed a certain
>> size for any basic type.
>
> Everybody "knew" longs were 32 bits, shorts and ints were both 16 bits,
> chars were 8 bits and we coded under that assumption. Lots and lots of
> code broke when we moved from 16 bit to 32 bit machines and the size of
> an int changed as a result.

Everybody who programmed on PDP-11s knew that. There were other
configurations at least as early as K&R1, 1978. But most programmers at
the time probably wouldn't have worked on more than one system.

> <snip>
>
>> I suppose the conclusion is that maybe C code written in the 70's does
>> compile and work today... but only if it was properly written. If it
>> wasn't properly written, it's perfectly possible it won't work today,
>> in a different architecture than it was originally written for
>> (for example because it assumes the wrong size for 'int'.)
>
> Correct (as those of us who are old enough learned in the 80s!).
>

jacobnavia

unread,
Apr 27, 2021, 12:31:32 PM4/27/21
to
Le 25/04/2021 à 15:19, Öö Tiib a écrit :
> I do not see that point ... What companies want to reduce C++ by using more COBOL,
> FORTRAN, Ada, D or C? Can anyone point at such companies? Any cite?

Look. C is the most popular programming language, according to the TIOBE
index

https://www.tiobe.com/tiobe-index/

SOME companies must be using it, don't you think so?

C++ comes 4th.

Scott Lurndal

unread,
Apr 27, 2021, 12:34:35 PM4/27/21
to
Joe Pfeiffer <pfei...@cs.nmsu.edu> writes:
>Juha Nieminen <nos...@thanks.invalid> writes:
>
>> However, the undetermined size of basic types (especially before
>> standardization) makes it more likely for C programs, especially ones
>> written back then, to be non-portable. Sure, even back when K&R first
>> "soft-standardized" the C language you shouldn't have assumed a certain
>> size for any basic type.
>
>Everybody "knew" longs were 32 bits, shorts and ints were both 16 bits,
>chars were 8 bits and we coded under that assumption. Lots and lots of
>code broke when we moved from 16 bit to 32 bit machines and the size of
>an int changed as a result.
>

And, unfortunately, the types often were not abstracted behind
a typedef. The process identifer, group identifier and user
identifers were all 16-bit as well. This made for a painful
conversion to 32-bit PID/GID/UID in SVR4 (accompanied by abstract
types pid_t, gid_t, uid_t which, when used correctly, mean that
applications only needed a simple recompile to handle a change in width).

Joe Pfeiffer

unread,
Apr 27, 2021, 12:55:53 PM4/27/21
to
There were no typedefs back then (at least I don't remember them that
far back, and they don't appear in either Ritchie's reference manual nor
Kernighan's tutorial).

Kaz Kylheku

unread,
Apr 27, 2021, 1:11:42 PM4/27/21
to
On 2021-04-27, Scott Lurndal <sc...@slp53.sl.home> wrote:
> Joe Pfeiffer <pfei...@cs.nmsu.edu> writes:
>>Juha Nieminen <nos...@thanks.invalid> writes:
>>
>>> However, the undetermined size of basic types (especially before
>>> standardization) makes it more likely for C programs, especially ones
>>> written back then, to be non-portable. Sure, even back when K&R first
>>> "soft-standardized" the C language you shouldn't have assumed a certain
>>> size for any basic type.
>>
>>Everybody "knew" longs were 32 bits, shorts and ints were both 16 bits,
>>chars were 8 bits and we coded under that assumption. Lots and lots of
>>code broke when we moved from 16 bit to 32 bit machines and the size of
>>an int changed as a result.
>>
>
> And, unfortunately, the types often were not abstracted behind
> a typedef.

Equally unfortunately, types were often abstracted behind bad typedefs.

:)

Kaz Kylheku

unread,
Apr 27, 2021, 1:13:40 PM4/27/21
to
On 2021-04-27, Joe Pfeiffer <pfei...@cs.nmsu.edu> wrote:
> sc...@slp53.sl.home (Scott Lurndal) writes:
>> And, unfortunately, the types often were not abstracted behind
>> a typedef. The process identifer, group identifier and user
>> identifers were all 16-bit as well. This made for a painful
>> conversion to 32-bit PID/GID/UID in SVR4 (accompanied by abstract
>> types pid_t, gid_t, uid_t which, when used correctly, mean that
>> applications only needed a simple recompile to handle a change in width).
>
> There were no typedefs back then (at least I don't remember them that
> far back, and they don't appear in either Ritchie's reference manual nor
> Kernighan's tutorial).

The preprocessor was used for type definitions; that's why FILE is capitalized.

#define FILE struct _iobuf

or something like that. The Indian Hill naming (typedefs are all caps)
that spilled into MS Windows is probably inspired by that.

Lew Pitcher

unread,
Apr 27, 2021, 1:29:05 PM4/27/21
to
FWIW, my copy of K&R (Copyright 1978) has a whole section (Chapter 6.9) on typedefs
The "C Programming Language - Reference Manual" (by Dennis M. Richie) published as
Appendix A of that edition of K&R also includes a section (8.8 Typedef) on typedef,
along with the BNF for it in section 18.2 ("Declarations").


--
Lew Pitcher
"In Skills, We Trust"

Scott Lurndal

unread,
Apr 27, 2021, 3:27:23 PM4/27/21
to
At the time of the SVR4 conversion in 1989, typedef was part of the
language, and pid_t, gid_t, and uid_t were used subsequently for
those purposes respectively. In addition, all API's were modified
to use those abstract types both in SVR4 and via the SVID into
POSIX.

Keith Thompson

unread,
Apr 27, 2021, 4:33:24 PM4/27/21
to
https://www.bell-labs.com/usr/dmr/www/cman.pdf (1975) doesn't mention typedef.
K&R1 (1978) does. That's as far as I've been able to narrow it down.

David Brown

unread,
Apr 28, 2021, 3:01:34 AM4/28/21
to
The TIOBE index is well-known for being no more than a vague indication
of the popularity of a language - it is certainly not the ranking some
people would believe.

But let us assume that C is significantly more used than C++ - it is not
an unreasonable assumption, regardless of TIOBE.

The question asked was "what companies want to reduce C++ by using more
C?" It was not "what companies use C?".

In my experience, companies and developers sometimes move from C to C++.
They rarely move back. But if you know differently, tell us.

Manfred

unread,
Apr 28, 2021, 8:54:19 AM4/28/21
to
Agreed that the TIOBE index should be taken with a grain of salt
(possibly even with a few pounds of it)
However, the /trend/ shows that C is raising as of recent, unlike C++ -
while in the long run C++ shows a more significant decrease, vs C being
pretty much stable.
If this were a /usage/ index it would tell something about your point -
the fact still is that TIOBE is a popularity index, it measures how much
people /talk/ about some programming language, which is hardly coupled
with any actual professional usage, IMHO (I'm no expert in
marketing/social research).

Öö Tiib

unread,
Apr 28, 2021, 12:57:11 PM4/28/21
to
On Monday, 26 April 2021 at 06:34:52 UTC+3, wyn...@gmail.com wrote:
> Öö Tiib wrote:
> > But that wasn't even a question as I was particularly asking who
> > wants to migrate/translate/switch from C++ product/code base to any of those
> > now and why as I know outright totally none cases.
> Who? Who can say that specifically except themselves?

I specifically asked for quote or cite of concrete "themselves" as technology
companies say such kind of things (like their trend in choice of technology stacks)
about themselves quite often. If it is something noteworthy like moving to C
or Fortran then I expect similar amount of noise like people do who about
themselves (and their kids) being vegetarians. It is because everybody want
talk about themselves and to find and to cooperate with others who share
their attitude.

Juha Nieminen

unread,
Apr 29, 2021, 12:47:38 AM4/29/21
to
Manfred <non...@add.invalid> wrote:
> Agreed that the TIOBE index should be taken with a grain of salt

Isn't it mainly based on how much people ask questions in forums about
the language in question?

If that's so, then maybe the index shouldn't be interpreted as how
popular the language is, but how many questions it raises when people
try to use it.

Paavo Helde

unread,
Apr 29, 2021, 4:43:15 AM4/29/21
to
Some of my recent programming related google searches:

man recv
man send
man setsockopt
man wcsncmp
man fileno
man ftello64
man fabs
man scanf
std::deque
c++ inline variable

So I can easily see how someone might mistakenly consider me a C programmer.

One of my frequent questions about C functions is what the correct
include header is, these are sometimes quite chaotic, but one must get
them correct when e.g. moving around some existing code.

wij

unread,
Apr 29, 2021, 1:56:42 PM4/29/21
to
Many 'larger' companies in my place are production companies.
If C++ is listed in the job requirement, often associated with are database,
web,HMI,AI,motor,vehicle,...,for hardware verification/simulation/analysis.
Rare companies like these would intentionally announce changes of their tools,
internal affairs.

E.g. Garmin (because the job requirement is in English, and is typical form)
wants C/C++ people.

1. Good understanding in C/C++ programming.
2. Fitness GPS product design – Running Wearables, cycling products and wellness products.
3. Good English communication ability to communicate with foreign engineers.

Will be a plus if have experience about:
1. Computer architecture and ARM Compiler.
2. Have experience about embedded system or RTOS.
3. Knowledge about how to use logic analyzer and Jtag ICE.
4. Other OS related knowledge.
----------------------------
In this case, normal C++ applicant would headache with ARM compiler, RTOS, ICE,
and hardware things. And, "C/C++" is a general term.
I can not think of any reason, any company like this would say anything too
specific about their production tools.

Christian Gollwitzer

unread,
Apr 29, 2021, 5:04:37 PM4/29/21
to
Am 29.04.21 um 10:42 schrieb Paavo Helde:
> 29.04.2021 07:47 Juha Nieminen kirjutas:
>> Manfred <non...@add.invalid> wrote:
>>> Agreed that the TIOBE index should be taken with a grain of salt
>>
>> Isn't it mainly based on how much people ask questions in forums about
>> the language in question?
>>
>> If that's so, then maybe the index shouldn't be interpreted as how
>> popular the language is, but how many questions it raises when people
>> try to use it.
>
> Some of my recent programming related google searches:
>
> man recv
> man send
> man setsockopt
> man wcsncmp
> man fileno
> man ftello64
> man fabs
> man scanf
> std::deque
> c++ inline variable
>
> So I can easily see how someone might mistakenly consider me a C
> programmer.

This is not how TIOBE works. They run searches on Google and other
search engines with the name of the programming language and count the hits:
https://www.tiobe.com/tiobe-index/programming-languages-definition/

If you run a google search yourself for scanf, that doesn't influence
TIOBE at all, not even if you post a question about scanf to
stackoverflow, unless you (or someone else) also mentions "C" or "C++"
on the same page.

So yes, it is true, the more questions a language raises, the more
you'll find pages referencing it, but also the more people use it, the
more will be questions.... it is definitely not a tool to run a
competition between the second and third place - that is rather
arbitrary. But you can be sure that a language on the 25th place
(Prolog) is used less than one from the 3rd place (Python) - though in
this realm, you'll find languages like Bash at the 42nd place. Bash is
definitely a language without which the world would be a different
place, yet people dont write that much LOC in it and therefore pose
fewer questions - hence, TIOBE place doesn't equal importance.

Concerning C and C++, the situation is especially bad, because many
search engines cannot really distinguish between those two, let alone
the users always post the correct term - some refer to C++ simply as C
(as opposed to Python, e.g.). Therefore, TIOBE tells practically nothing
if C or C++ is more commonly used.

A similar ranking with maybe more sound methodology is this:
https://madnight.github.io/githut/#/pull_requests/2021/1

It's counting the number of pull requests on Github. I'm inclinded to
believe this more than TIOBE overall. But that is only my gut feeling.

Christian
0 new messages