Change GNUmakefile default to -O3?

51 views
Skip to first unread message

Jeffrey Walton

unread,
Jul 3, 2015, 7:39:47 AM7/3/15
to cryptop...@googlegroups.com
The following changes the GNUmakefile to use -O3 by default, except for Cygwin. IS_DARWIN was added for consistency.

Any comments or objections?

*********

diff --git a/GNUmakefile b/GNUmakefile
index 3defa68..3a364ba 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -1,5 +1,5 @@
-CXXFLAGS = -DNDEBUG -g -O2
-# -O3 fails to link on Cygwin GCC version 4.5.3
+CXXFLAGS = -DNDEBUG -g2
+OPTIMIZE = -O3
 # -fPIC is supported, and enabled by default for x86_64.
 # CXXFLAGS += -fPIC
 # the following options reduce code size, but breaks link or makes link very slow on some systems
@@ -13,23 +13,33 @@ EGREP = egrep
 UNAME = $(shell uname)
 IS_X86 = $(shell uname -m | $(EGREP) -c "i.86|x86|i86|amd64")
 IS_X86_64 = $(shell uname -m | $(EGREP) -c "_64|d64")
+IS_DARWIN = $(shell uname -s | $(EGREP) -i -c "darwin")
 IS_SUN_CC = $(shell $(CXX) -V 2>&1 | $(EGREP) -c "CC: Sun")
 IS_LINUX = $(shell $(CXX) -dumpmachine 2>&1 | $(EGREP) -i -c "linux")
 IS_MINGW = $(shell $(CXX) -dumpmachine 2>&1 | $(EGREP) -i -c "mingw")
 IS_CYGWIN = $(shell $(CXX) -dumpmachine 2>&1 | $(EGREP) -i -c "cygwin")
 CLANG_COMPILER = $(shell $(CXX) --version 2>&1 | $(EGREP) -i -c "clang")
+GCC45_COMPILER = $(shell $(CXX) -v 2>&1 | $(EGREP) -c "^gcc version (4\.5)")
 
 # Default prefix for make install
 ifeq ($(PREFIX),)
 PREFIX = /usr
 endif
 
-# For some reason CXX is gcc on cygwin 1.1.4
+# Cygwin work arounds
 ifneq ($(IS_CYGWIN),0)
+# For some reason CXX is gcc on Cygwin 1.1.4
 ifeq ($(CXX),gcc)
 CXX = g++
 endif
+# -O3 fails to link on Cygwin GCC version 4.5.3
+ifneq ($(GCC45_COMPILER),0)
+OPTIMIZE = -O2
+endif
 endif
+# End Cygwin work arounds
+
+CXXFLAGS += $(OPTIMIZE)
 
 ifeq ($(IS_X86),1)
 
@@ -40,7 +50,7 @@ GAS210_OR_LATER = $(shell $(CXX) -xc -c /dev/null -Wa,-v -o/dev/null 2>&1 | $(EG
 GAS217_OR_LATER = $(shell $(CXX) -xc -c /dev/null -Wa,-v -o/dev/null 2>&1 | $(EGREP) -c "GNU assembler version (2\.1[7-9]|2\.[2-9]|[3-9])")
 GAS219_OR_LATER = $(shell $(CXX) -xc -c /dev/null -Wa,-v -o/dev/null 2>&1 | $(EGREP) -c "GNU assembler version (2\.19|2\.[2-9]|[3-9])")
 
-#Enable PIC for x86_64 targets
+# Enable PIC for x86_64 targets
 ifneq ($(IS_X86_64),0)
 # But don't enable it on Cygwin x86_64
 ifeq ($(IS_CYGWIN),0)
@@ -49,7 +59,7 @@ endif
 endif
 
 ifneq ($(GCC42_OR_LATER),0)
-ifeq ($(UNAME),Darwin)
+ifneq ($(IS_DARWIN),0)
 CXXFLAGS += -arch x86_64 -arch i386
 else
 CXXFLAGS += -march=native
@@ -99,7 +109,7 @@ M32OR64 = -m64
 endif
 endif
 
-ifeq ($(UNAME),Darwin)
+ifneq ($(IS_DARWIN),0)
 AR = libtool
 ARFLAGS = -static -o
 CXX = c++

Jeffrey Walton

unread,
Jul 3, 2015, 8:29:09 AM7/3/15
to cryptop...@googlegroups.com
Here's an update to this patch. It only changes Cygwin if it detects -O3.

It also allows a user to override the optimization on the command line using the ?= operator (https://www.gnu.org/software/make/manual/html_node/Setting.html#Setting). For example:

   make

Uses -O3, but:

   make OPTIMIZE = -Og

Uses -Og.

diff --git a/GNUmakefile b/GNUmakefile
index 3defa68..da04503 100644

--- a/GNUmakefile
+++ b/GNUmakefile
@@ -1,5 +1,5 @@
-CXXFLAGS = -DNDEBUG -g -O2
-# -O3 fails to link on Cygwin GCC version 4.5.3
+CXXFLAGS = -DNDEBUG -g2
+OPTIMIZE ?= -O3

 # -fPIC is supported, and enabled by default for x86_64.
 # CXXFLAGS += -fPIC
 # the following options reduce code size, but breaks link or makes link very slow on some systems
@@ -13,23 +13,38 @@ EGREP = egrep
+ifeq ($(findstring -O3,$(OPTIMIZE)), -O3)
+OPTIMIZE = -O2
+endif
+endif
+

 endif
+# End Cygwin work arounds
+
+CXXFLAGS += $(OPTIMIZE)
 
 ifeq ($(IS_X86),1)
 
@@ -40,7 +55,7 @@ GAS210_OR_LATER = $(shell $(CXX) -xc -c /dev/null -Wa,-v -o/dev/null 2>&1 | $(EG

 GAS217_OR_LATER = $(shell $(CXX) -xc -c /dev/null -Wa,-v -o/dev/null 2>&1 | $(EGREP) -c "GNU assembler version (2\.1[7-9]|2\.[2-9]|[3-9])")
 GAS219_OR_LATER = $(shell $(CXX) -xc -c /dev/null -Wa,-v -o/dev/null 2>&1 | $(EGREP) -c "GNU assembler version (2\.19|2\.[2-9]|[3-9])")
 
-#Enable PIC for x86_64 targets
+# Enable PIC for x86_64 targets
 ifneq ($(IS_X86_64),0)
 # But don't enable it on Cygwin x86_64
 ifeq ($(IS_CYGWIN),0)
@@ -49,7 +64,7 @@ endif

 endif
 
 ifneq ($(GCC42_OR_LATER),0)
-ifeq ($(UNAME),Darwin)
+ifneq ($(IS_DARWIN),0)
 CXXFLAGS += -arch x86_64 -arch i386
 else
 CXXFLAGS += -march=native
@@ -99,7 +114,7 @@ M32OR64 = -m64

 endif
 endif
 
-ifeq ($(UNAME),Darwin)
+ifneq ($(IS_DARWIN),0)
 AR = libtool
 ARFLAGS = -static -o
 CXX = c++

Jeffrey Walton

unread,
Jul 3, 2015, 10:45:40 AM7/3/15
to cryptop...@googlegroups.com
And yet another change after more testing... It appears Cygwin's GCC still has problems in 4.9.

**********
diff --git a/GNUmakefile b/GNUmakefile
index 3defa68..a579e3e 100644

--- a/GNUmakefile
+++ b/GNUmakefile
@@ -1,5 +1,5 @@
-CXXFLAGS = -DNDEBUG -g -O2
-# -O3 fails to link on Cygwin GCC version 4.5.3
+CXXFLAGS = -DNDEBUG -g2
+OPTIMIZE ?= -O3

 # -fPIC is supported, and enabled by default for x86_64.
 # CXXFLAGS += -fPIC
 # the following options reduce code size, but breaks link or makes link very slow on some systems
@@ -13,6 +13,7 @@ EGREP = egrep

 UNAME = $(shell uname)
 IS_X86 = $(shell uname -m | $(EGREP) -c "i.86|x86|i86|amd64")
 IS_X86_64 = $(shell uname -m | $(EGREP) -c "_64|d64")
+IS_DARWIN = $(shell uname -s | $(EGREP) -i -c "darwin")
 IS_SUN_CC = $(shell $(CXX) -V 2>&1 | $(EGREP) -c "CC: Sun")
 IS_LINUX = $(shell $(CXX) -dumpmachine 2>&1 | $(EGREP) -i -c "linux")
 IS_MINGW = $(shell $(CXX) -dumpmachine 2>&1 | $(EGREP) -i -c "mingw")
@@ -24,13 +25,25 @@ ifeq ($(PREFIX),)

 PREFIX = /usr
 endif
 
-# For some reason CXX is gcc on cygwin 1.1.4
+# Cygwin work arounds
 ifneq ($(IS_CYGWIN),0)
+
+# For some reason CXX is gcc on Cygwin 1.1.4
 ifeq ($(CXX),gcc)
 CXX = g++
 endif
+
+# -O3 fails to link with GCC 4.5.3
+# -O3 causes a core dump with GCC 4.9
+ifeq ($(findstring -O3,$(OPTIMIZE)),-O3)
+OPTIMIZE = -O2
 endif
 
+endif

+# End Cygwin work arounds
+
+CXXFLAGS += $(OPTIMIZE)
+
 ifeq ($(IS_X86),1)
 
 GCC42_OR_LATER = $(shell $(CXX) -v 2>&1 | $(EGREP) -c "^gcc version (4.[2-9]|[5-9])")
@@ -40,7 +53,7 @@ GAS210_OR_LATER = $(shell $(CXX) -xc -c /dev/null -Wa,-v -o/dev/null 2>&1 | $(EG

 GAS217_OR_LATER = $(shell $(CXX) -xc -c /dev/null -Wa,-v -o/dev/null 2>&1 | $(EGREP) -c "GNU assembler version (2\.1[7-9]|2\.[2-9]|[3-9])")
 GAS219_OR_LATER = $(shell $(CXX) -xc -c /dev/null -Wa,-v -o/dev/null 2>&1 | $(EGREP) -c "GNU assembler version (2\.19|2\.[2-9]|[3-9])")
 
-#Enable PIC for x86_64 targets
+# Enable PIC for x86_64 targets
 ifneq ($(IS_X86_64),0)
 # But don't enable it on Cygwin x86_64
 ifeq ($(IS_CYGWIN),0)
@@ -49,7 +62,7 @@ endif

 endif
 
 ifneq ($(GCC42_OR_LATER),0)
-ifeq ($(UNAME),Darwin)
+ifneq ($(IS_DARWIN),0)
 CXXFLAGS += -arch x86_64 -arch i386
 else
 CXXFLAGS += -march=native
@@ -99,10 +112,10 @@ M32OR64 = -m64

 endif
 endif
 
-ifeq ($(UNAME),Darwin)
+ifneq ($(IS_DARWIN),0)
 AR = libtool
 ARFLAGS = -static -o
-CXX = c++
+CXX ?= c++
 IS_GCC2 = $(shell $(CXX) -v 2>&1 | $(EGREP) -c gcc-932)
 ifeq ($(IS_GCC2),1)
 CXXFLAGS += -fno-coalesce-templates -fno-coalesce-static-vtables



On Friday, July 3, 2015 at 7:39:47 AM UTC-4, Jeffrey Walton wrote:

Jeffrey Walton

unread,
Jul 6, 2015, 6:39:18 AM7/6/15
to cryptop...@googlegroups.com
If there are no objections, I am going to commit this today.

Mobile Mouse

unread,
Jul 6, 2015, 9:35:06 AM7/6/15
to Jeffrey Walton, cryptop...@googlegroups.com
FWIW, I've had problems with GCC above 4.8.* on Mac OS X - everything builds fine, but the cryptest.exe dies with SEGV. Same with 4.9.*, same with 5.1. So I'm building with clang, which doesn't seem to exhibit such problems. 

On the thread subject - I think it's OK. Whoever for whatever reasons needs lower optimization level, can accomplish it with vi. :-)

Sent from my iPad
--
--
You received this message because you are subscribed to the "Crypto++ Users" Google Group.
To unsubscribe, send an email to cryptopp-user...@googlegroups.com.
More information about Crypto++ and this group is available at http://www.cryptopp.com.
---
You received this message because you are subscribed to the Google Groups "Crypto++ Users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to cryptopp-user...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Jeffrey Walton

unread,
Jul 6, 2015, 10:12:15 AM7/6/15
to Mobile Mouse, cryptop...@googlegroups.com
On Mon, Jul 6, 2015 at 9:35 AM, Mobile Mouse <mous...@gmail.com> wrote:
> FWIW, I've had problems with GCC above 4.8.* on Mac OS X - everything builds
> fine, but the cryptest.exe dies with SEGV. Same with 4.9.*, same with 5.1.
> So I'm building with clang, which doesn't seem to exhibit such problems.

OK, so it sounds like I need to do more testing.

Where did you get you GCC for OS X? MacPorts or Brew? Or did you build
it yourself? (I'm suspect of GCC on OS X because Apple abandoned it.
Even when it was being developed, they were not actively supplying
patches upstream).

I have not been able to duplicate the SEGV on Debian, Ubuntu, Mint,
Oracle or Fedora. And I have i386 and x86_64 test systems set up for
them just for testing compilers.

I was able to duplicate it on Cygwin. But they don't have a bug
reporter, so I think there's little hope it will be fixed. I think its
easier just to keep them at -O2 until they fix their processes.

> On the thread subject - I think it's OK. Whoever for whatever reasons needs
> lower optimization level, can accomplish it with vi. :-)

Its even easier than vi: `make OPTIMIZE = -Ox` will work just fine.
That's the benefit of using GNU Make or any non-anemic Make (Posix
Make is pathetically anemic).

Maybe breaking out the OPTIMIZE and setting it to -O2 is all we need.
(Though it may not be apparent, this will help with the Crypto++
engineering process when CI is cut-in)

Jeff

> On Jul 6, 2015, at 06:39, Jeffrey Walton <nolo...@gmail.com> wrote:
>
> If there are no objections, I am going to commit this today.
>
> On Friday, July 3, 2015 at 10:45:40 AM UTC-4, Jeffrey Walton wrote:
>>
>> And yet another change after more testing... It appears Cygwin's GCC still
>> has problems in 4.9.
>>
>> ...

Mobile Mouse

unread,
Jul 6, 2015, 10:25:00 AM7/6/15
to nolo...@gmail.com, cryptop...@googlegroups.com
Macports, doing only x64 build (haven't even tried Universal or i386 alone, and not planning to).

Sent from my iPad

Jeffrey Walton

unread,
Jul 6, 2015, 11:00:37 AM7/6/15
to cryptop...@googlegroups.com, nolo...@gmail.com


On Monday, July 6, 2015 at 10:25:00 AM UTC-4, Mouse wrote:
Macports, doing only x64 build (haven't even tried Universal or i386 alone, and not planning to).
Forgive my ignorance... What's the secret sauce for Macports:

    $ sudo port install gcc g++
    Error: Port gcc not found

I would have expected Google to return a bunch of relevant hits for "Macports install g++", but its not (or I am cannot see the forest through the trees).

I also tried searching for "g++" on their package page (https://www.macports.org/ports.php?by=name&substr=g%2B%2B), but I got 0 results.
 
Jeff

Jeffrey Walton

unread,
Jul 12, 2015, 10:26:21 PM7/12/15
to cryptop...@googlegroups.com, nolo...@gmail.com


On Monday, July 6, 2015 at 9:35:06 AM UTC-4, Mouse wrote:
FWIW, I've had problems with GCC above 4.8.* on Mac OS X - everything builds fine, but the cryptest.exe dies with SEGV. Same with 4.9.*, same with 5.1. So I'm building with clang, which doesn't seem to exhibit such problems.

Here's the proposed fix (for now). I'm investigating it further.

Any suggestions (besides wrapping it in a guard for GCC 4.8 and 4.9)?

$ cat misc.diff
diff --git a/misc.cpp b/misc.cpp
index 3c2c2a5..a32ab41 100644
--- a/misc.cpp
+++ b/misc.cpp
@@ -14,6 +14,11 @@
 
 NAMESPACE_BEGIN(CryptoPP)
 
+// GCC 4.9 generates code using vmovdqa instructions under -O3, which cause a segfault due to 128-bit word alignment requirements.
+// Refer http://stackoverflow.com/q/31373765 for more details.
+#pragma GCC push_options
+#pragma GCC optimize ("-O2")
+
 void xorbuf(byte *buf, const byte *mask, size_t count)
 {
     size_t i;
@@ -44,6 +49,8 @@ void xorbuf(byte *buf, const byte *mask, size_t count)
         buf[i] ^= mask[i];
 }
 
+#pragma GCC pop_options
+
 void xorbuf(byte *output, const byte *input, const byte *mask, size_t count)
 {
     size_t i;
 

Mobile Mouse

unread,
Jul 12, 2015, 11:42:02 PM7/12/15
to Jeffrey Walton, cryptop...@googlegroups.com
Your patch works. Compiled with GCC-5.1 (macports version, in turn built by Xcode-6.4 on Yosemite), "cryptest.exe v” now succeeds.


Jeffrey Walton

unread,
Jul 12, 2015, 11:49:54 PM7/12/15
to Mobile Mouse, Crypto++ Users List
On Sun, Jul 12, 2015 at 11:41 PM, Mobile Mouse <mous...@gmail.com> wrote:
> Your patch works. Compiled with GCC-5.1 (macports version, in turn built by
> Xcode-6.4 on Yosemite), "cryptest.exe v” now succeeds.
>

Yeah, at -O3, GCC uses SSE instructions and vectorizes the XOR. The
vectoriztion includes vmovdqu, vinsertf128, vmovdqa, and vxorps.

The problem appears to be, vmovdqa has a 128-bit alignment
requirement, but the code only provides a 64-bit alignment guarantee.

I'm researching how we can make it play well with GCC now and moving forward.

*****

void xorbuf(byte *buf, const byte *mask, size_t count)
{
size_t i;

if (IsAligned<word32>(buf) && IsAligned<word32>(mask))
{
if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsAligned<word64>(buf) &&
IsAligned<word64>(mask))
{
for (i=0; i<count/8; i++)
((word64*)buf)[i] ^= ((word64*)mask)[i];
count -= 8*i;
if (!count)
return;
buf += 8*i;
mask += 8*i;
}

for (i=0; i<count/4; i++)
((word32*)buf)[i] ^= ((word32*)mask)[i];
count -= 4*i;
if (!count)
return;
buf += 4*i;
mask += 4*i;
}

for (i=0; i<count; i++)

Jeffrey Walton

unread,
Jul 13, 2015, 1:35:54 AM7/13/15
to cryptop...@googlegroups.com, nolo...@gmail.com


On Sunday, July 12, 2015 at 11:49:54 PM UTC-4, Jeffrey Walton wrote:
On Sun, Jul 12, 2015 at 11:41 PM, Mobile Mouse wrote:
> Your patch works. Compiled with GCC-5.1 (macports version, in turn built by
> Xcode-6.4 on Yosemite), "cryptest.exe v” now succeeds.
>

Yeah, at -O3, GCC uses SSE instructions and vectorizes the XOR. The
vectoriztion includes vmovdqu, vinsertf128, vmovdqa, and vxorps.

The problem appears to be, vmovdqa has a 128-bit alignment
requirement, but the code only provides a 64-bit alignment guarantee.

I'm researching how we can make it play well with GCC now and moving forward.

This is a somewhat unsettling sign... Under Clang, I'm getting the following from the Undefined Behavior sanitizer.

The word64 issues refer to:


    ((word64*)buf)[i] ^= ((word64*)mask)[i];

The word32 issues refer to:


    ((word32*)buf)[i] ^= ((word32*)mask)[i];

********

.misc.cpp:31:26: runtime error: load of misaligned address 0x7ffcc0a2ba5f for type 'word64' (aka 'unsigned long long'), which requires 8 byte alignment
0x7ffcc0a2ba5f: note: pointer points here
 2b a8 e1 08 54  68 65 20 71 75 69 63 6b  20 62 72 6f 77 6e 20 66  6f 78 20 6a 75 6d 70 73  20 6f 76
             ^
.misc.cpp:40:25: runtime error: load of misaligned address 0x7ffcc0a2ba7d for type 'word32' (aka 'unsigned int'), which requires 4 byte alignment
0x7ffcc0a2ba7d: note: pointer points here
 61 7a 79 20 64 6f 67  2e 20 6c 61 7a 79 20 64  6f 67 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00
             ^
...misc.cpp:31:5: runtime error: store to misaligned address 0x000003d5dc3a for type 'word64' (aka 'unsigned long long'), which requires 8 byte alignment
0x000003d5dc3a: note: pointer points here
 6e 6f  69 6a 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00 00 00
              ^
misc.cpp:40:4: runtime error: store to misaligned address 0x000003d5dc5a for type 'word32' (aka 'unsigned int'), which requires 4 byte alignment
0x000003d5dc5a: note: pointer points here
 72 73  6d 6e 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00 00 00

 
Reply all
Reply to author
Forward
0 new messages