Structure clash - problem statement & supporting code

Alf P. Steinbach

unread,

Nov 9, 2015, 9:58:05 AM11/9/15

to

From somewhere on the net:

-------------------------------------------------------------------------
Peter Naur's "Telegram Problem"

1 "Write a program that takes a number w, then accepts lines of text
and outputs lines of text, where the output lines have as many words as
possible but are never longer than w characters. Words may not be split,
but you may assume that no single word is too long for a line."
2 Originally described in 197x by Peter Naur, 2005 Turing Award Winner.
3 This is essentially word-wrapping within a paragraph of a text
editor. "As such, it is a classic ObjectOriented problem, used as an
example in the Design Patterns Book."
4 It is surprisingly difficult to specify correctly, and to design a
correct program in Java or C++.
5 Do a requirement analysis. E.g.: What assumptions must we make so
that this problem is doable?
-------------------------------------------------------------------------

I haven't the foggiest idea about 5, and I don't see why 4, and 3 is
pretty baffling to me, and regarding 2 I think I first saw this problem
in a COBOL course where it was used to illustrate Jackson Structured
Programming, but, I've coded up three C++ solutions which I'll post, I
think in separate threads, so that you sharks can rip them to pieces. :)

All three solutions use the following support code, coded up for the
exercise, which I KNOW has its problems, and may not even be correct.

So ;-)

<file "cppx.hpp">
#pragma once

#include <assert.h> // assert
#include <condition_variable> // std::condition_variable
#include <deque> // std::deque
#include <iostream>
#include <mutex> // std::mutex, std::unique_lock
#include <stddef.h> // ptrdiff_t
#include <stdexcept> // std::runtime_error
#include <string> // std::string, std::wstring,
std::getline
#include <utility> // std::move
#include <vector> // std::vector

namespace cppx {
using std::condition_variable;
using std::deque;
using std::move;
using std::mutex;
using std::runtime_error;
using std::string;
using std::unique_lock;
using std::vector;
using std::wcin;
using std::wistream;
using std::wstring;

using Byte_string = string;
using Size = ptrdiff_t;
using String = wstring;

auto hopefully( bool const condition )
-> bool
{ return condition; }

auto fail( Byte_string const& s )
-> bool
{ throw runtime_error( s ); }

template< class C >
auto n_items( C const& c )
-> Size
{ return c.size(); }

template< class Item >
class Boxed
{
private:
vector<Item> vec_;

public:
auto is_empty() const
-> bool
{ return n_items( vec_ ) == 0; }

explicit
operator bool() const { return !is_empty(); }

auto item()
-> Item&
{ return vec_.at( 0 ); }

auto item() const
-> Item const&
{ return vec_.at( 0 ); }

auto operator=( Boxed&& other )
-> Boxed&
{
vec_ = move( other.vec_ );
return *this;
}

static // Named creation for readability.
auto empty() -> Boxed { return {}; }

Boxed(){}

explicit
Boxed( Item o )
{
vec_.push_back( move( o ) );
}

Boxed( Boxed&& other )
: vec_( move( other.vec_ ) )
{}
};

auto input_line( wistream& stream = wcin )
-> Boxed<String>
{
String line;
if( getline( stream, line ) )
{
return Boxed<String>( move( line ) );
}
return Boxed<String>::empty();
}

class Non_copyable
{
private:
Non_copyable( Non_copyable const& ) = delete;
auto operator=( Non_copyable const& ) -> Non_copyable& = delete;
public:
auto operator=( Non_copyable&& ) -> Non_copyable& = default;
Non_copyable() = default;
Non_copyable( Non_copyable&& ) = default;
};

class Non_movable
{
private:
Non_movable( Non_movable&& ) = delete;
auto operator=( Non_movable&& ) -> Non_movable = delete;
public:
auto operator=( Non_movable const& ) -> Non_movable& = default;
Non_movable() = default;
Non_movable( Non_movable const& ) = default;
};

struct ConditionVariable
: public Non_copyable
, public Non_movable
{
private:
mutex mx_;
condition_variable cv_;
int n_signals_;
int n_waits_;

public:
void signal()
{
unique_lock<mutex> lock( mx_ );
++n_signals_;
if( n_waits_ > 0 )
{
cv_.notify_one();
}
}

void wait()
{
unique_lock<mutex> lock( mx_ );
if( n_signals_ == 0 )
{
++n_waits_;
cv_.wait( lock, [&]() -> bool { return n_signals_ > 0; } );
--n_waits_;
}
--n_signals_;
}

ConditionVariable( int const initial_signals_count = 0 )
: n_signals_( initial_signals_count )
, n_waits_( 0 )
{}
};

// Move-construction of Item must be non-throwing.
template< class Item, Size capacity = 1 >
class Pipe
{
private:
deque<Item> q_;
mutex q_mx_;
ConditionVariable q_room_;
ConditionVariable q_items_;

public:
void put( Item item )
{
q_room_.wait();
{
unique_lock<mutex> lock( q_mx_ );
q_.push_back( move( item ) );
assert( n_items( q_ ) <= capacity );
}
q_items_.signal();
}

auto get()
-> Item
{
q_items_.wait();
{
unique_lock<mutex> lock( q_mx_ );
assert( n_items( q_ ) > 0 );
Item result( move( q_.front() ) );
q_.pop_front();
lock.unlock(); // Unclean.

q_room_.signal();
return result;
}
}

Pipe()
: q_room_( capacity )
, q_items_()
{
// q_.reserve( capacity ); // There is no such
operation :(
}
};

} // namespace cppx
</file>

Scott Lurndal

unread,

Nov 9, 2015, 11:12:56 AM11/9/15

to

"Alf P. Steinbach" <alf.p.stein...@gmail.com> writes:
> From somewhere on the net:
>
>-------------------------------------------------------------------------
>Peter Naur's "Telegram Problem"
>
>1 "Write a program that takes a number w, then accepts lines of text
>and outputs lines of text, where the output lines have as many words as
>possible but are never longer than w characters. Words may not be split,
>but you may assume that no single word is too long for a line."

#!/bin/ksh
cat - | fmt -w ${1} -g 97

Alf P. Steinbach

unread,

Nov 9, 2015, 11:07:31 PM11/9/15

to

On 11/9/2015 3:57 PM, Alf P. Steinbach wrote:
> [snip]

>
> auto hopefully( bool const condition )
> -> bool
> { return condition; }
>

This and other functions should be `inline`. I just noticed. This
resulted from writing the code in an implementation file first, and
moving it wholesale to a header when I decided to do more variations of
the problem.

Cheers,

- Alf

Öö Tiib

unread,

Nov 10, 2015, 2:31:26 AM11/10/15

to

It can be that you have already elaborated purpose of that 'hopefully'
and 'fail' in text that I haven't read. If so then please post a
link.

Otherwise ... my impression is that these are made for indicating error
handling syntax. That syntax:

if ( !condition )
throw runtime_error( "condition not met" );

Replaced with that syntax:

hopefully( condition )
|| fail( "condition not met" );

I can't see a benefit. Typical project-specific generic error handling
doesn't use neither syntax. Most often I have seen something like
'R_VERIFY(condition,text);' where 'R_VERIFY' is some project-specific
macro name.

Juha Nieminen

unread,

Nov 10, 2015, 4:38:48 AM11/10/15

to

Alf P. Steinbach <alf.p.stein...@gmail.com> wrote:
> using std::condition_variable;
> using std::deque;
> using std::move;
> using std::mutex;
> using std::runtime_error;
> using std::string;
> using std::unique_lock;
> using std::vector;
> using std::wcin;
> using std::wistream;
> using std::wstring;

Are you serious?

Just use those prefixes. It's not that bad. On the contrary, they will make
your code more readable (for a very similar reason as why code coloring
does.)

--- news://freenews.netfront.net/ - complaints: ne...@netfront.net ---

Alf P. Steinbach

unread,

Nov 10, 2015, 6:40:10 AM11/10/15

to

On 11/10/2015 8:30 AM, Öö Tiib wrote:
> [snip]

> That syntax:
>
> if ( !condition )
> throw runtime_error( "condition not met" );
>
> Replaced with that syntax:
>
> hopefully( condition )
> || fail( "condition not met" );
>
> I can't see a benefit.

I like it. :)

Well, first, when the function call whose result should be tested,
produces a convertible-to-boolean value, one avoids using an
if-condition with side effects or introducing a logically redundant
variable to hold the function result.

bool const success = foo();
if( not success )
{
throw runtime_error( "foo: failed" );
}

or

if( not foo() )
{
throw runtime_error( "foo: failed" );
}

versus just

foo()
|| fail( "foo: failed" );

Second, it makes failure generation stand out visually. Even more so
when one adopts a convention of using "and", "or" and "not" for ordinary
boolean expressions that produce not ignored values.

Third, with just a little more machinery it makes it easy to pick up the
function result value and apply any condition to it, or embed in the
failure message, whatever.

So, the "hopefully" function is simply in support of "fail", to make it
possible and practical to use "fail" also where the function is not
producing a convertible-to-boolean, and more in general, to use "fail"
in all kinds of situations without introducing more advanced machinery.

I.e. a single convention (I like single conventions).

> Typical project-specific generic error handling
> doesn't use neither syntax. Most often I have seen something like
> 'R_VERIFY(condition,text);' where 'R_VERIFY' is some project-specific
> macro name.

Uhm, proliferation of macros = evil, IMHO :)

But I think a /single macro/ to pick up filename, function name and line
number can be a good idea.

Such a macro can be used with any scheme, given proper support, and then
few if any will wonder exactly what it does, for it will be known and
familiar to all programmers on the project. I believe. Uh, hope! :)

Cheers,

- Alf

Alf P. Steinbach

unread,

Nov 10, 2015, 6:58:55 AM11/10/15

to

On 11/10/2015 11:56 AM, Stefan Ram wrote:
> "Alf P. Steinbach" <alf.p.stein...@gmail.com> writes:

>> 4 It is surprisingly difficult to specify correctly, and to design a
>> correct program in Java or C++.
>

> For an algorithm by Knuth load
>
> ftp.cs.stanford.edu/tex/tex/tex.web
>
> (via HTTP) and then search for
>
> Breaking paragraphs into lines

Thanks for the link. ITYM "via FTP", but anyway. Is this part of Knuth's
self-documented code initiative?

It's doesn't look very clear to me, but then, TeX never did look clear
to me! :)

Anyway I don't share the lecturer's opinion about the difficulty of the
problem (by modern standards it's pretty basic); I just quoted it.

* * *

I think the task is interesting for a very different reason, namely that
there are at least 3 very different approaches to it, corresponding to
what one thinks is the natural top level goal:

* process a bunch of input lines, or
* generate a bunch of output lines, or
* looking at it as a data flow problem.

The latter can be expressed with OO techniques but is more naturally
expressed as parallel processing, either with threads or with
coroutines. I chose threads since the C++ standard library, very
unfortunately, doesn't support coroutines. I believe a coroutine
solution would be much simpler since one avoids the synchronization.

Cheers,

- Alf

Öö Tiib

unread,

Nov 10, 2015, 12:21:08 PM11/10/15

to

On Tuesday, 10 November 2015 13:40:10 UTC+2, Alf P. Steinbach wrote:
> On 11/10/2015 8:30 AM, Öö Tiib wrote:
> > [snip]
> > That syntax:
> >
> > if ( !condition )
> > throw runtime_error( "condition not met" );
> >
> > Replaced with that syntax:
> >
> > hopefully( condition )
> > || fail( "condition not met" );
> >
> > I can't see a benefit.
>
> I like it. :)

That I ... already assumed. :)

>
> Well, first, when the function call whose result should be tested,
> produces a convertible-to-boolean value, one avoids using an
> if-condition with side effects or introducing a logically redundant
> variable to hold the function result.
>
> bool const success = foo();
> if( not success )
> {
> throw runtime_error( "foo: failed" );
> }
>
> or
>
> if( not foo() )
> {
> throw runtime_error( "foo: failed" );
> }
>
> versus just
>
> foo()
> || fail( "foo: failed" );

IOW, it can be used as more terse compared to explicit 'throw'. That is
always good reason.

>
> Second, it makes failure generation stand out visually. Even more so
> when one adopts a convention of using "and", "or" and "not" for ordinary
> boolean expressions that produce not ignored values.

Yes, it stands out from 'if'-s.

>
> Third, with just a little more machinery it makes it easy to pick up the
> function result value and apply any condition to it, or embed in the
> failure message, whatever.

Doesn't such a machinery result with loss of terseness?

>
> So, the "hopefully" function is simply in support of "fail", to make it
> possible and practical to use "fail" also where the function is not
> producing a convertible-to-boolean, and more in general, to use "fail"
> in all kinds of situations without introducing more advanced machinery.
>
> I.e. a single convention (I like single conventions).

That again is good reason.

>
> > Typical project-specific generic error handling
> > doesn't use neither syntax. Most often I have seen something like
> > 'R_VERIFY(condition,text);' where 'R_VERIFY' is some project-specific
> > macro name.
>
> Uhm, proliferation of macros = evil, IMHO :)
>
> But I think a /single macro/ to pick up filename, function name and line
> number can be a good idea.

Yes, such meta-information is often desired in failures and diagnostic traces
and it isn't available to functions. Some of meta-information can be added
by replacing 'fail( text )' with 'FAIL( text )' that gathers it.

Macro with two parameters covers also all your reasons. Macro can be single
convention. It is relatively terse. It is idiomatically capitalized so it also stands
out in C++ where we generally avoid macros. Benefit of macro is that some
projects may want also that 'condition' stringized. I'm unsure about various
sorts of other "machinery" (fit well into macro).

>
> Such a macro can be used with any scheme, given proper support, and then
> few if any will wonder exactly what it does, for it will be known and
> familiar to all programmers on the project. I believe. Uh, hope! :)

What failure generation does may change during project or depend on target
platform or build reason (unit test/debug/release). I need to dig around in
codebases and think about it a bit but I hope that I see your point now, thank
you.

Christian Gollwitzer

unread,

Nov 10, 2015, 1:10:17 PM11/10/15

to

Am 09.11.15 um 15:57 schrieb Alf P. Steinbach:

> From somewhere on the net:
>
> -------------------------------------------------------------------------
> Peter Naur's "Telegram Problem"
>
> 1 "Write a program that takes a number w, then accepts lines of text
> and outputs lines of text, where the output lines have as many words as
> possible but are never longer than w characters. Words may not be split,
> but you may assume that no single word is too long for a line."
> 2 Originally described in 197x by Peter Naur, 2005 Turing Award Winner.
> 3 This is essentially word-wrapping within a paragraph of a text
> editor. "As such, it is a classic ObjectOriented problem, used as an
> example in the Design Patterns Book."

Design Patterns? WTF?

> 4 It is surprisingly difficult to specify correctly, and to design a
> correct program in Java or C++.

???

> All three solutions use the following support code, coded up for the
> exercise, which I KNOW has its problems, and may not even be correct.
>
> So ;-)
>
> <file "cppx.hpp">

> [...]

What an utter complicated mess. Even if C++ is not as good at string
processing as, say, awk, perl or Tcl, I cannot believe that you need
coroutines or threads (WTF? Mutexes???) for such a simple problem. What
is wrong with the following solution, implemented as a Unix filter:

Apfelkiste:Tests chris$ cat wordwrap.cpp
#include <iostream>
#include <string>

int main() {
const int wraplength=80;

std::string outputline;
while (true) {
std::string word;
std::cin >> word;

if (!std::cin) { break; }

if (outputline.length()+1+word.length() > wraplength) {
std::cout<<outputline<<std::endl;
outputline="";
}

if (outputline == "") {
outputline = word;
} else {
outputline = outputline + " " + word;
}
}

std::cout<<outputline<<std::endl;

return 0;
}

Apfelkiste:Tests chris$ g++ wordwrap.cpp
Apfelkiste:Tests chris$ ./a.out < wordwrap.cpp
#include <iostream> #include <string> int main() { const int wraplength=80;
std::string outputline; while (true) { std::string word; std::cin >>
word; if
(!std::cin) { break; } if (outputline.length()+1+word.length() >
wraplength) {
std::cout<<outputline<<std::endl; outputline=""; } if (outputline == "") {
outputline = word; } else { outputline = outputline + " " + word; } }
std::cout<<outputline<<std::endl; return 0; }
Apfelkiste:Tests chris$

Christian

Alf P. Steinbach

unread,

Nov 10, 2015, 5:00:10 PM11/10/15

to

On 11/10/2015 7:09 PM, Christian Gollwitzer wrote:
> Am 09.11.15 um 15:57 schrieb Alf P. Steinbach:
>> From somewhere on the net:
>>
>> -------------------------------------------------------------------------
>> Peter Naur's "Telegram Problem"
>>
>> 1 "Write a program that takes a number w, then accepts lines of text
>> and outputs lines of text, where the output lines have as many words as
>> possible but are never longer than w characters. Words may not be split,
>> but you may assume that no single word is too long for a line."
>> 2 Originally described in 197x by Peter Naur, 2005 Turing Award
>> Winner.
>> 3 This is essentially word-wrapping within a paragraph of a text
>> editor. "As such, it is a classic ObjectOriented problem, used as an
>> example in the Design Patterns Book."
>
> Design Patterns? WTF?
>
>
>> 4 It is surprisingly difficult to specify correctly, and to design a
>> correct program in Java or C++.
>
> ???
>
>> All three solutions use the following support code, coded up for the
>> exercise, which I KNOW has its problems, and may not even be correct.
>>
>> So ;-)
>>
>> <file "cppx.hpp">
>> [...]
>
> What an utter complicated mess.

It would be nice if you could supply some of your reasons for landing on
this conclusion.

Most of that header is very simple stuff as I see it.

The "Pipe" class is possibly more complicated than strictly necessary.

> Even if C++ is not as good at string
> processing as, say, awk, perl or Tcl, I cannot believe that you need
> coroutines or threads (WTF? Mutexes???) for such a simple problem.

You are correct sir. Two of the three posted solutions did not use that.

> What is wrong with the following solution, implemented as a Unix filter:

Not so much if it only targets Unix-land, with an assumption of ASCII
encoding.

For Windows you'd better use wide strings and wide streams. This has to
do with recognition of whitespace as such. Unicode has some 25
characters designated as whitespace. I don't know how well the standard
library supports that, but it can't support it with SBCS encoding.

Or, one may simply roll that problem up in one's definition of "word". ;-)

For an empty input this code outputs a blank line, but also that's
ultimately a matter of specification.

Also it has a hardcoded max line length instead of the problem
statement's "takes a number w", which is not a matter of specification,
but is trivial to fix.

Apparently the above code strives for shortness, leveraging the
word-splitting ability of the input stream, and in general the C++
standard library.

I didn't cover that angle because it was not so interesting for what I
was investigating.

But I think the following is shorter, while clear, don't you agree?

#include <iostream>
#include <string>
using namespace std;

auto main( int, char** args ) -> int
{
int const max = stoi( args[1] );
wstring word, separator, line;
while( wcin >> word ) {
if( int(line.length() + 1 + word.length()) > max ) {
wcout << line << endl;
line.clear();
}
line += (line.length()? L" " : L"") + word;
}
wcout << line << endl;
}

In case you'd think there a missing "return": nope.

Cheers & hth.,

- Alf (dang, should really have posted short code also, thanks!)

cda...@gmail.com

unread,

Nov 10, 2015, 11:59:52 PM11/10/15

to

I think you need to lay off the ganja when coding.

Alf P. Steinbach

unread,

Nov 11, 2015, 1:49:00 AM11/11/15

to

On 11/11/2015 5:59 AM, cda...@gmail.com wrote:
> I think you need to lay off the ganja when coding.

Usenet (this is Usenet) is a distributed service, with no guaranteed
delivery and where articles can be received out of order.

Therefore it's important that you QUOTE what you're referring to.

For those who don't see that context, Chad is referring to this code:

#include <iostream>
#include <string>
using namespace std;

auto main( int, char** args ) -> int
{
int const max = stoi( args[1] );
wstring word, separator, line;
while( wcin >> word ) {
if( int(line.length() + 1 + word.length()) > max ) {
wcout << line << endl;
line.clear();
}
line += (line.length()? L" " : L"") + word;
}
wcout << line << endl;
}

So, Chad, can you elaborate on why you think this is coded under the
influence of marihuana? What in particular strikes you as odd? Or what
is it that appears difficult to understand?

Apparently you're a novice C++ programmer, having started some years ago
with Python and Ruby. Remember that C++ is not just a different language
than Python or Ruby, but is a different /kind/ of language. We can help
you learn, in fact it's what this group is all about, but to do that we
need to know more than just that you find some code odd.

Ian Collins

unread,

Nov 11, 2015, 2:22:15 AM11/11/15

to

Alf P. Steinbach wrote:

> Apparently the above code strives for shortness, leveraging the
> word-splitting ability of the input stream, and in general the C++
> standard library.
>
> I didn't cover that angle because it was not so interesting for what I
> was investigating.
>
> But I think the following is shorter, while clear, don't you agree?
>
> #include <iostream>
> #include <string>
> using namespace std;
>
> auto main( int, char** args ) -> int
> {
> int const max = stoi( args[1] );

For on who likes to (mis)use auto, there's a lost opportunity...

--
Ian Collins

Alf P. Steinbach

unread,

Nov 11, 2015, 2:54:34 AM11/11/15

to

If the type had been specified explicitly in the initializer, then I
would have used `auto` in order to have a single uniform convention for
that and in order to minimize redundancy, the DRY principle. E.g.

auto const p = reinterpret_cast<Foobar*>( zzz );

Likewise if the specific type had been irrelevant, e.g.

for( auto it = c.begin(); it != c.end(); ++it ) { ... }

I try to use `auto` for all function declarations, but I discovered that
in some cases current compilers don't accept that unless one uses a
logically redundant type naming. E.g.

std::function< auto()->int > f( foo );

may not necessarily compile with g++ -std=c++14 or Visual C++ 2015 or
later, then requiring an accommodate-the-silly-compiler rewrite like

using Int_func = auto() -> int;
std::function< Int_func > f( foo );

and then one may feel that that's so absurd & verbose that one writes

std::function<int()> f( foo );

Cheers!,

- Alf

David Brown

unread,

Nov 11, 2015, 4:19:38 AM11/11/15

to

On 10/11/15 12:58, Alf P. Steinbach wrote:
> On 11/10/2015 11:56 AM, Stefan Ram wrote:
>> "Alf P. Steinbach" <alf.p.stein...@gmail.com> writes:
>>> 4 It is surprisingly difficult to specify correctly, and to design a
>>> correct program in Java or C++.
>>
>> For an algorithm by Knuth load
>>
>> ftp.cs.stanford.edu/tex/tex/tex.web
>>
>> (via HTTP) and then search for
>>
>> Breaking paragraphs into lines
>
> Thanks for the link. ITYM "via FTP", but anyway. Is this part of Knuth's
> self-documented code initiative?
>
> It's doesn't look very clear to me, but then, TeX never did look clear
> to me! :)
>

One issue here is that TeX's paragraph algorithm is a great deal more
sophisticated than the simple one described in this problem. It covers
hyphenation, stretchable spaces (with different degrees of stretchiness
for different types of space - the space between letters has very little
stretchiness, while the the space after a period has lots), configurable
parameters, good/bad break hints, and so on.

Juha Nieminen

unread,

Nov 11, 2015, 4:35:48 AM11/11/15

to

Christian Gollwitzer <auri...@gmx.de> wrote:
> What an utter complicated mess. Even if C++ is not as good at string
> processing as, say, awk, perl or Tcl, I cannot believe that you need
> coroutines or threads (WTF? Mutexes???) for such a simple problem.

Clearly you haven't been programming enterprise solutions enough.

Rosario19

unread,

Nov 11, 2015, 2:20:58 PM11/11/15

to

this is my try. i'm not so sicure it is right...

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

#define u32 unsigned
#define i32 int
#define u8 unsigned char

#define R return
#define P printf
#define F for
#define GG(a,b) if(a)goto b
#define G goto

int main(int c, char** a)
{u8 *buf;
u32 w, i, j, iWrd;
i32 k, r, v;

if( c>1&&a[1]&&isdigit((u8) a[1][0]) )
w=atoi(a[1]);
else w=80;
if(w<1||w>0xFFF0)
{P("Argument number out of range:\n");
l0: P("Usage: thisProg number <filein >fileout\n");
P("Usage: thisProg <filein >fileout\n Number=80\n");
l1: R 1; // wrong
}
buf=(u8*) malloc(w+8);
if(buf==0){P("Memory insufficient\n"); G l0;}
F(i=0, iWrd=0; ; )
{k=getchar();
if(k==-1||!(v=isalpha(k))) // EOF or ERROR or not alpha
{if(iWrd){//Svuota buffer word;
if( (iWrd+i)> w ) // aaaa==4
{P("\n"); i=0;}
F(j=0; j<iWrd; ++j)
{r=putchar(buf[j]); ++i;
GG(r==-1||i>w, l1);
}
iWrd=0;
}
}
GG(k==-1,l3); // exit ok
if(v){// scrivi nel buffer
buf[iWrd++]=(u8)k;
GG(iWrd>w, l1);
}
else {// scrivi nel output
if(i>=w)
{if(k!='\n') P("\n");
i=0;
}
// \13\10=\n conta 0
r=putchar(k);
if(k!=13) ++i; // il 13 non conta
if(k=='\n') i=0;

GG(r==-1, l1);
}
}
l3:
if(ferror(stdin)||ferror(stdout)) G l1;
else R 0; //ok
}

Rosario19

unread,

Nov 11, 2015, 2:49:10 PM11/11/15

to

On Wed, 11 Nov 2015 20:20:44 +0100, Rosario19 wrote:

> F(j=0; j<iWrd; ++j)
> {r=putchar(buf[j]); ++i;
> GG(r==-1||i>w, l1);

pheraps it is better r==EOF and not r==-1
but here i remember EOF is -1

Rosario19

unread,

Nov 11, 2015, 3:20:37 PM11/11/15

to

On Wed, 11 Nov 2015 20:48:58 +0100, Rosario19 wrote:
>> F(j=0; j<iWrd; ++j)
>> {r=putchar(buf[j]); ++i;
>> GG(r==-1||i>w, l1);
>
>pheraps it is better r==EOF and not r==-1
>but here i remember EOF is -1

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

#define u32 unsigned
#define i32 int
#define u8 unsigned char

#define R return
#define P printf
#define F for
#define GG(a,b) if(a)goto b
#define G goto

int main(int c, char** a)

{u8 *buf=0;

u32 w, i, j, iWrd;
i32 k, r, v;

if( c>1&&a[1]&&isdigit((u8) a[1][0]) )
w=atoi(a[1]);
else w=80;
if(w<1||w>0xFFF0)
{P("Argument number out of range:\n");
l0: P("Usage: thisProg number <filein >fileout\n");
P("Usage: thisProg <filein >fileout\n Number=80\n");

l1: free(buf);

R 1; // wrong
}
buf=(u8*) malloc(w+8);
if(buf==0){P("Memory insufficient\n"); G l0;}
F(i=0, iWrd=0; ; )
{k=getchar();

if(k==EOF||!(v=isalpha(k))) // EOF or ERROR or not alpha

{if(iWrd){//Svuota buffer word;
if( (iWrd+i)> w ) // aaaa==4
{P("\n"); i=0;}

F(j=0; j<iWrd; ++j)
{r=putchar(buf[j]); ++i;

GG(r==EOF||i>w, l1);
}
iWrd=0;
}
}
GG(k==EOF,l3); // exit ok

if(v){// scrivi nel buffer
buf[iWrd++]=(u8)k;
GG(iWrd>w, l1);
}
else {// scrivi nel output
if(i>=w)
{if(k!='\n') P("\n");
i=0;
}
// \13\10=\n conta 0
r=putchar(k);
if(k!=13) ++i; // il 13 non conta
if(k=='\n') i=0;

GG(r==EOF, l1);
}
}
l3:
GG( ferror(stdin)||ferror(stdout), l1);
free(buf);
R 0; //ok
}

Rosario19

unread,

Nov 11, 2015, 3:22:45 PM11/11/15

to

On Wed, 11 Nov 2015 21:20:22 +0100, Rosario19 wrote:

>#define GG(a,b) if(a)goto b
>#define G goto

only one macro for goto is not sufficient...

Rosario19

unread,

Nov 11, 2015, 4:00:10 PM11/11/15

to

your programs are all
+ wrong than my
because variable in input have not a limit...

for example "w" in a computer with finite resource
can not to be in a range 1..+oo

Christian Gollwitzer

unread,

Nov 11, 2015, 4:54:32 PM11/11/15

to

Am 10.11.15 um 22:59 schrieb Alf P. Steinbach:

> On 11/10/2015 7:09 PM, Christian Gollwitzer wrote:
>> Am 09.11.15 um 15:57 schrieb Alf P. Steinbach:

>>> <file "cppx.hpp">
>>> [...]
>>
>> What an utter complicated mess.
>
> It would be nice if you could supply some of your reasons for landing on
> this conclusion.

I am sorry for badmouthing your solution, which is perfectly fine (I
asssume; didn't have the motivation to work through it in detail). Only
that it solves a different task, that was not apparent to me from the
problem description. I can see lots of applications for pipelines in
programming. After looking up the telegram problem, I could understand
why you would solve it using your own implementation of pipes. This is
the problem with homework-style problems: there is often a much simpler
solution, which leaves aside the task which the problem is meant to
demonstrate. Implemening pipes in a langugage with coroutines is indeed
a very simple thing, in fact Python uses generators for almost all for
loops today. Ignoring the "use pipes" requirement, I am still failing to
see how this is surprisingly difficult.

I am not even convinced that the streaming is part of the specification.
Loading all of the file into memory and working from there, which leaves
aside all the buffering issues, would also be a correct solution,
wouldn't it?

>> What is wrong with the following solution, implemented as a Unix filter:
>
> Not so much if it only targets Unix-land, with an assumption of ASCII
> encoding.
> For Windows you'd better use wide strings and wide streams. This has to
> do with recognition of whitespace as such. Unicode has some 25
> characters designated as whitespace. I don't know how well the standard
> library supports that, but it can't support it with SBCS encoding.
>
> Or, one may simply roll that problem up in one's definition of "word". ;-)

That applies to your code also: you use operator >> to tokenize the
input. AFAIK there is no way to influence operator >> how it should
treat whitespace. In unicode this is a non-trivial problem, since there
exist non-breaking spaces and spaces with zero-width, direction-changer
and similar complicated stuff.

> For an empty input this code outputs a blank line, but also that's
> ultimately a matter of specification.

Correct. This is underspecified.

> Also it has a hardcoded max line length instead of the problem
> statement's "takes a number w", which is not a matter of specification,
> but is trivial to fix.
>

Yes, but your fix introduces many bugs.

> int const max = stoi( args[1] );

What happens if you pass negative number? What happens if you pass text?
What happens if you pass more than two arguments or no argument?
In the latter case I think you are lucky that the code does not invoke
UB. argv[argc] contains a single NULL byte, i.e. an empty string IIRC.

> Apparently the above code strives for shortness, leveraging the
> word-splitting ability of the input stream, and in general the C++
> standard library.
>
> I didn't cover that angle because it was not so interesting for what I
> was investigating.
>
> But I think the following is shorter, while clear, don't you agree?

This code is almost the same as mine;

> #include <iostream>
> #include <string>
> using namespace std;
>
> auto main( int, char** args ) -> int
> {
> int const max = stoi( args[1] );

see above
> wstring word, separator, line;
separator is not used, is it?

> while( wcin >> word ) {
> if( int(line.length() + 1 + word.length()) > max ) {
> wcout << line << endl;
> line.clear();
> }
> line += (line.length()? L" " : L"") + word;

I don't like treating integers as boolean values; I would rather write
line.length() > 0, assuming that you did not do that just for brevity?

> }
> wcout << line << endl;
> }
>
> In case you'd think there a missing "return": nope.

I don't understand why you can leave it off; again I hope you haven't
the false impression that I was code-golfing. I believe that this
solution, at least to the problem as stated, is easier to understand
than any of your pipeline versions.

Christian

Christian Gollwitzer

unread,

Nov 11, 2015, 4:59:56 PM11/11/15

to

Am 11.11.15 um 10:35 schrieb Juha Nieminen:

> Christian Gollwitzer <auri...@gmx.de> wrote:
>> What an utter complicated mess. Even if C++ is not as good at string
>> processing as, say, awk, perl or Tcl, I cannot believe that you need
>> coroutines or threads (WTF? Mutexes???) for such a simple problem.
>
> Clearly you haven't been programming enterprise solutions enough.
>

You are correct. I never did that.

http://www.ariel.com.au/jokes/The_Evolution_of_a_Programmer.html

Christian

asetof...@gmail.com

unread,

Nov 11, 2015, 5:00:48 PM11/11/15

to

Your solution has no tab as result
in output even if there could
be in input

asetof...@gmail.com

unread,

Nov 11, 2015, 5:11:38 PM11/11/15

to

Your Program is wrong
because words are alphanumeric+symbols+other
delimited only by spaces

but in my country words are
alphabetic only

Ian Collins

unread,

Nov 11, 2015, 5:19:34 PM11/11/15

to

asetof...@gmail.com wrote:
> Your Program is wrong

What program?

If you are going to continue to post, do it properly.

--
Ian Collins

Alf P. Steinbach

unread,

Nov 12, 2015, 3:07:51 AM11/12/15

to

On 11/11/2015 10:54 PM, Christian Gollwitzer wrote:
>
> [snip]

> After looking up the telegram problem, I could understand
> why you would solve it using your own implementation of pipes.

Well that's a difference between Q&A sites like SO, and discussion
groups like clc++: in the Q&A sites (like SO) there's usually LESS than
meets the eye, while in the discussion groups (like clc++) there's often
MORE than meets the eye.

Or at least it's so in the periods when the groups work as intended, and
for clc++ it was so originally.

Historically clc++ has had two long periods of extreme waywardness. The
first such period almost turned the group into a Windows programming
group, resulting in the creation of the moderated clc++m. In the second
such period, which apparently is just over, the group was dominated by
non-technical people; only Odin knows where they came from.

> [snip] Implemening pipes in a langugage with coroutines is indeed

> a very simple thing, in fact Python uses generators for almost all for
> loops today.

Well, Python probably has some coroutine facility among its included
batteries, but the generators are, as far as I know, all based on
/continuations/. The difference is that a continuation function yields
only in code within that function, so that it only needs a single stack
frame of state, and can be manually "inverted" (by the compiler) to be
expressed as a member function of an object with that state, plus just a
little more. In contrast, a coroutine can yield also within a function
that it calls, or anywhere, which means that it requires its own
full-blown stack: it's more heavy-weight, and a bit more general.

> Ignoring the "use pipes" requirement, I am still failing to
> see how this is surprisingly difficult.

Yes, as I mentioned in the original posting, I also fail to see any
difficulty. So then that's two. I think the lecturer blindly passed on
an evaluation from the 1950's or something.

> I am not even convinced that the streaming is part of the specification.
> Loading all of the file into memory and working from there, which leaves
> aside all the buffering issues, would also be a correct solution,
> wouldn't it?

That would probably be necessary for the more sophisticated approach of
trying to optimize some global measure of nice line-justification.

One problem with such an approach is that in a worst case editing can
cause rippling both ways, up and down, trough megabytes of text.

Two inter-related and more pressing problems: global justification can
be /unpredictable/ for the user, and it can be so complex that it
attracts bugs. For example, using Microsoft Word I sometimes struggle to
get it to adopt a sensible formatting of a paragraph. Sometimes I have
to insert a manual line break, which is very undesirable because it's
invisible in normal editing and then can wreak havoc with later edits,
especially automated ones such as global search and replace. And
regarding bugs, Word has a tendency to not handle page breaks correctly.
Again that requires manual formatting intervention. :(

> [snip]

> That applies to your code also: you use operator >> to tokenize the
> input. AFAIK there is no way to influence operator >> how it should
> treat whitespace. In unicode this is a non-trivial problem, since there
> exist non-breaking spaces and spaces with zero-width, direction-changer
> and similar complicated stuff.

Yeah. But for sure, using Unicode is necessary to do it at all. And in
Windows that means using wide strings and streams (with proper setup).

> [snip]

> but your fix introduces many bugs.
>
>> int const max = stoi( args[1] );
>
> What happens if you pass negative number? What happens if you pass text?
> What happens if you pass more than two arguments or no argument?
> In the latter case I think you are lucky that the code does not invoke UB.
> argv[argc] contains a single NULL byte, i.e. an empty string IIRC.

The code does indeed provoke UB if the program is run without an
argument. Such an invocation is a breach of /contract/. When the
contract is breached, anything can happen. This is not a bug. It's
ordinary good design, according to the principles of (1) clear contracts
and (2) not using time on functionality that may never be needed (Knuth
described how a very elaborate piece of code to handle a special case,
lay dormant for several years, and failed on first call).

The code expresses the contract very neatly. I chose to express just the
minimal possible contract compatible with the problem requirements.

There are several possible more elaborate contracts, e.g. guaranteed
exception for breach, but IMO this kind of usability is irrelevant.

> [snip]

> This code is almost the same as mine;

Well, your code, which was almost identical to my earlier function
"words_to_lines" in the flow based solution, was pretty close to
reasonably shortest -- it just needed a little pruning for the goal of
shortness.

>> #include <iostream>
>> #include <string>
>> using namespace std;
>>
>> auto main( int, char** args ) -> int
>> {
>> int const max = stoi( args[1] );
> see above
>> wstring word, separator, line;
> separator is not used, is it?
>> while( wcin >> word ) {
>> if( int(line.length() + 1 + word.length()) > max ) {
>> wcout << line << endl;
>> line.clear();
>> }
>> line += (line.length()? L" " : L"") + word;
>
> I don't like treating integers as boolean values; I would rather write
> line.length() > 0, assuming that you did not do that just for brevity?

Heh, you caught me. :)

>> }
>> wcout << line << endl;
>> }
>>
>> In case you'd think there a missing "return": nope.
>
> I don't understand why you can leave it off

"main" is a very special function and among other special properties, it
has a default function result of 0 in both C (since C99, its
§5.1.2.2.3/1, but not in earlier C89/C90) and C++ (since the first
standard C++98, its §3.6.1/5).

> I believe that this
> solution, at least to the problem as stated, is easier to understand
> than any of your pipeline versions.

Oh, there was just 1 pipeline version.

And as mentioned, that's the version that's almost identical to your
code, or vice versa. ;-)

That's because it expresses the same fundamental idea of treating the
problem as a flow of words between a word extractor and a line composer.
The word flow is the main focus in the code. In contrast, the other two
solutions I posted focused on input lines, and output lines.

Rosario19

unread,

Nov 14, 2015, 6:21:07 AM11/14/15

to

On Wed, 11 Nov 2015 20:20:44 +0100, Rosario19 <R...@invalid.invalid>
wrote:

lines of w lenght
words max lenght w-1, if word lenght >= w-1 break the word in 2 pieces
using one "-" in between

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

#define u32 unsigned
#define i32 int
#define u8 unsigned char

#define R return
#define P printf
#define F for
#define GG(a,b) if(a)goto b
#define G goto

i32 WrdLetter(i32 a){R isalpha(a);}

// Programma per rompere le linee secondo argomento

int main(int c, char** a)

{u8 *buf=0;

u32 w, i, j, iWrd;
i32 k, r, v;

if( c>1&&a[1]&&isdigit((u8) a[1][0]) )
w=atoi(a[1]);
else w=80;
if(w<1||w>0xFFF0)
{P("Argument number out of range:\n");
l0: P("Usage: thisProg number <filein >fileout\n");
P("Usage: thisProg <filein >fileout\n Number=80\n");

l1: free(buf);

R 1; // wrong
}
buf=(u8*) malloc(w+8);
if(buf==0){P("Memory insufficient\n"); G l0;}
F(i=0, iWrd=0; ; )
{k=getchar();

if(k==EOF||!(v=WrdLetter(k))) // EOF or ERROR or not alpha
{if(iWrd){//Svuota buffer word;
svuotaBuf: if( (iWrd+i)>w ) // aaaa==4

{P("\n"); i=0;}
F(j=0; j<iWrd; ++j)
{r=putchar(buf[j]); ++i;

GG(r==EOF, l1);
if(i>=w-1&&v)
{P("-\n"); i=0;}
}
iWrd=0;
}
}
GG(k==EOF,l3); // exit ok

if(v){// scrivi nel buffer

GG(iWrd>=w-1, svuotaBuf);
buf[iWrd++]=(u8)k;

}
else {// scrivi nel output
if(i>=w)
{if(k!='\n') P("\n");
i=0;
}
// \13\10=\n conta 0
r=putchar(k);
if(k!=13) ++i; // il 13 non conta
if(k=='\n') i=0;

Rosario19

unread,

Nov 14, 2015, 6:23:15 AM11/14/15

to

On Sat, 14 Nov 2015 12:20:41 +0100, Rosario19 <R...@invalid.invalid>
wrote:

>lines of w lenght
>words max lenght w-1, if word lenght >= w-1 break the word in 2 pieces

^^^^^

>using one "-" in between

if word lenght > w-1 break etc

Rosario19

unread,

Nov 14, 2015, 6:38:14 AM11/14/15

to

On Sat, 14 Nov 2015 12:20:41 +0100, Rosario19 wrote:

> if(i>=w)
> {if(k!='\n') P("\n");
> i=0;

pheraps better
if(i>=w)
{if(k!='\n'&&k!=13) P("\n");
i=0;

Rosario19

unread,

Nov 22, 2015, 3:42:05 AM11/22/15

to

r=putchar(k);
++i;

Melzzzzz

unread,

Nov 22, 2015, 3:57:13 AM11/22/15

to

On Sun, 22 Nov 2015 09:41:43 +0100
Rosario19 <R...@invalid.invalid> wrote:

> #include <stdio.h>
> #include <stdlib.h>
> #include <ctype.h>
>
> #define u32 unsigned
> #define i32 int
> #define u8 unsigned char
>
> #define R return
> #define P printf
> #define F for
> #define GG(a,b) if(a)goto b
> #define G goto

./..,

What an ugly piece of crap !

Rosario19

unread,

Nov 22, 2015, 3:59:31 AM11/22/15

to

it is the crap that run ok

Vir Campestris

unread,

Nov 22, 2015, 4:16:26 PM11/22/15

to

It's still ugly. And it doesn't look like C++.

Andy