Originally I was going to do this all in C++, but since Google Groups no longer allows posting to groups with a symbol in their name, and since I can't find a free Usenet NNTP server that allows posting, I'm gonna do this all in C. I'll keep the code as close to the ANSI/ISO Standard as I can except where I need a POSIX function for something.
Let's say we have a program that works 1% of the time. For this thought experiment, I'll choose a program that produces the SHA256 hashsum of the string passed to it as a command line argument.
So let's say that the program in question segfaults 33% of time, gives an incorrect hashsum 66% of the time, and gives the correct answer 1% of the time.
When the program gives an incorrect answer, the incorrect answer is extremely random. Since a SHA256 hashum is 256-Bit, and since we consider 128-Bit UUID's impossible to duplicate, it's safe to assume that this program will never give the same incorrect answer twice.
So I've taken the original non-buggy code for the SHA256 program, and I've renamed "main" to "real_main". Then I've written my own new 'main' as follows:
#include <assert.h> /* assert */
#include <time.h> /* time, timespec, clock_gettime */
#include <stdlib.h> /* rand, RAND_MAX */
/* Returns an integer in the range [0, n).
*
* Uses rand(), and so is affected-by/affects the same seed.
*/
int randint(int const n)
{
if ( (n - 1) == RAND_MAX )
{
return rand();
}
else
{
// Supporting larger values for n would requires an even more
// elaborate implementation that combines multiple calls to rand()
assert(n <= RAND_MAX);
// Chop off all of the values that would cause skew...
int end = RAND_MAX / n; // truncate skew
assert(end > 0);
end *= n;
// ... and ignore results from rand() that fall above that limit.
// (Worst case the loop condition should succeed 50% of the time,
// so we can expect to bail out of this loop pretty quickly.)
int r;
while ( (r = rand()) >= end );
return r % n;
}
}
int Get_Tick(void) /* Similar to GetTickCount in MS-Windows */
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_nsec;
}
int main(int const argc, char **const argv)
{
/* 1% of the time the correct answer is given
33% of the time the program segfaults
66% of the time the program gives the wrong answer */
unsigned i;
srand( time(NULL) ^ Get_Tick() );
int const num = randint(100);
if ( 0 == num ) return real_main(argc,argv);
if ( num < 33 ) *(int*)0 = 88; /* SEGFAULT */
unsigned const ints_per_digest = 32u / sizeof(int);
int digest[ints_per_digest];
for (i = 0u; i != ints_per_digest; ++i)
{
digest[i] = rand();
}
char unsigned const *const p = (char unsigned*)&digest;
for (i = 0u; i != 32u; ++i)
{
printf("%02x", p[i]);
}
printf("\n");
}
So we compile this C program to a binary file, and then we discard the source code for it. So now we somehow have to turn this 1% successful program into a 99.9999% successful program even though we only have the binary for it. Also let's say that we can't disassemble the machine code because the CPU is proprietary (and the instruction set is heavily obfuscated).
So here's what I'm thinking, in a few steps:
(1) Write a second helper program that invokes the SHA256 program
(2) If the SHA256 program segfaults then discard the output and run it again
(3) If the SHA256 program returns a hex string of a 32-Byte hashsum then save the hashsum in cache. Run the program several more times until the same hashsum is outputted a second time -- then take this to be the correct hashsum.
I haven't started coding the second helper program yet, but I've finished the buggy version of the SHA256 program.
Anyone got an thoughts on this?
Here's the buggy SHA256 program:
#include <stdio.h> /* printf */
#include <stdlib.h> /* size_t, malloc, free */
#include <string.h> /* strlen */
#include <limits.h> /* CHAR_BIT, UINT_MAX, ULONG_MAX */
/* Compiler for 16-Bit DOS hasn't got stdint.h */
#if CHAR_BIT == 8
typedef char unsigned u8;
#else
# error "Please tell me about your computer museum"
#endif
#if UINT_MAX == 4294967295UL
typedef unsigned u32;
#elif ULONG_MAX == 4294967295UL
typedef long unsigned u32;
#else
# error "Cannot find a 32-Bit integer type"
#endif
#define CHUNK_SIZE 64
#define TOTAL_LEN_LEN 8
/*
* ABOUT bool: this file does not use bool in order to be as pre-C99 compatible as possible.
*/
/*
* Comments from pseudo-code at
https://en.wikipedia.org/wiki/SHA-2 are reproduced here.
* When useful for clarification, portions of the pseudo-code are reproduced here too.
*/
/*
* Initialize array of round constants:
* (first 32 bits of the fractional parts of the cube roots of the first 64 primes 2..311):
*/
static const u32 k[] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
struct buffer_state {
const u8 * p;
size_t len;
size_t total_len;
int single_one_delivered; /* bool */
int total_len_delivered; /* bool */
};
static u32 right_rot(u32 value, unsigned int count)
{
/*
* Defined behaviour in standard C for all count where 0 < count < 32,
* which is what we need here.
*/
return value >> count | value << (32 - count);
}
static void init_buf_state(struct buffer_state * state, const void * input, size_t len)
{
state->p = (u8 const *)input;
state->len = len;
state->total_len = len;
state->single_one_delivered = 0;
state->total_len_delivered = 0;
}
/* Return value: bool */
static int calc_chunk(u8 chunk[CHUNK_SIZE], struct buffer_state * state)
{
size_t space_in_chunk;
if (state->total_len_delivered) {
return 0;
}
if (state->len >= CHUNK_SIZE) {
memcpy(chunk, state->p, CHUNK_SIZE);
state->p += CHUNK_SIZE;
state->len -= CHUNK_SIZE;
return 1;
}
memcpy(chunk, state->p, state->len);
chunk += state->len;
space_in_chunk = CHUNK_SIZE - state->len;
state->p += state->len;
state->len = 0;
/* If we are here, space_in_chunk is one at minimum. */
if (!state->single_one_delivered) {
*chunk++ = 0x80;
space_in_chunk -= 1;
state->single_one_delivered = 1;
}
/*
* Now:
* - either there is enough space left for the total length, and we can conclude,
* - or there is too little space left, and we have to pad the rest of this chunk with zeroes.
* In the latter case, we will conclude at the next invokation of this function.
*/
if (space_in_chunk >= TOTAL_LEN_LEN) {
const size_t left = space_in_chunk - TOTAL_LEN_LEN;
size_t len = state->total_len;
int i;
memset(chunk, 0x00, left);
chunk += left;
/* Storing of len * 8 as a big endian 64-bit without overflow. */
chunk[7] = (u8) (len << 3);
len >>= 5;
for (i = 6; i >= 0; i--) {
chunk[i] = (u8) len;
len >>= 8;
}
state->total_len_delivered = 1;
} else {
memset(chunk, 0x00, space_in_chunk);
}
return 1;
}
/*
* Limitations:
* - Since input is a pointer in RAM, the data to hash should be in RAM, which could be a problem
* for large data sizes.
* - SHA algorithms theoretically operate on bit strings. However, this implementation has no support
* for bit string lengths that are not multiples of eight, and it really operates on arrays of bytes.
* In particular, the len parameter is a number of bytes.
*/
static u32 const g_h[] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
void calc_sha_256(u8 hash[32], const void * input, size_t len)
{
/*
* Note 1: All integers (expect indexes) are 32-bit unsigned integers and addition is calculated modulo 2^32.
* Note 2: For each round, there is one round constant k[i] and one entry in the message schedule array w[i], 0 = i = 63
* Note 3: The compression function uses 8 working variables, a through h
* Note 4: Big-endian convention is used when expressing the constants in this pseudocode,
* and when parsing message block data from bytes to words, for example,
* the first word of the input message "abc" after padding is 0x61626380
*/
/*
* Initialize hash values:
* (first 32 bits of the fractional parts of the square roots of the first 8 primes 2..19):
*/
u32 h[8];
unsigned i, j;
/* 512-bit chunks is what we will operate on. */
u8 chunk[64];
struct buffer_state state;
memcpy(h, g_h, sizeof h);
init_buf_state(&state, input, len);
while (calc_chunk(chunk, &state)) {
u32 ah[8];
const u8 *p = chunk;
/* Initialize working variables to current hash value: */
for (i = 0; i < 8; i++)
ah[i] = h[i];
/* Compression function main loop: */
for (i = 0; i < 4; i++) {
/*
* The w-array is really w[64], but since we only need
* 16 of them at a time, we save stack by calculating
* 16 at a time.
*
* This optimization was not there initially and the
* rest of the comments about w[64] are kept in their
* initial state.
*/
/*
* create a 64-entry message schedule array w[0..63] of 32-bit words
* (The initial values in w[0..63] don't matter, so many implementations zero them here)
* copy chunk into first 16 words w[0..15] of the message schedule array
*/
u32 w[16];
for (j = 0; j < 16; j++) {
u32 s1, ch, temp1, s0, maj, temp2;
if (i == 0) {
w[j] = (u32) p[0] << 24 | (u32) p[1] << 16 |
(u32) p[2] << 8 | (u32) p[3];
p += 4;
} else {
/* Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array: */
const u32 s0 = right_rot(w[(j + 1) & 0xf], 7) ^ right_rot(w[(j + 1) & 0xf], 18) ^ (w[(j + 1) & 0xf] >> 3);
const u32 s1 = right_rot(w[(j + 14) & 0xf], 17) ^ right_rot(w[(j + 14) & 0xf], 19) ^ (w[(j + 14) & 0xf] >> 10);
w[j] = w[j] + s0 + w[(j + 9) & 0xf] + s1;
}
s1 = right_rot(ah[4], 6) ^ right_rot(ah[4], 11) ^ right_rot(ah[4], 25);
ch = (ah[4] & ah[5]) ^ (~ah[4] & ah[6]);
temp1 = ah[7] + s1 + ch + k[i << 4 | j] + w[j];
s0 = right_rot(ah[0], 2) ^ right_rot(ah[0], 13) ^ right_rot(ah[0], 22);
maj = (ah[0] & ah[1]) ^ (ah[0] & ah[2]) ^ (ah[1] & ah[2]);
temp2 = s0 + maj;
ah[7] = ah[6];
ah[6] = ah[5];
ah[5] = ah[4];
ah[4] = ah[3] + temp1;
ah[3] = ah[2];
ah[2] = ah[1];
ah[1] = ah[0];
ah[0] = temp1 + temp2;
}
}
/* Add the compressed chunk to the current hash value: */
for (i = 0; i < 8; i++)
h[i] += ah[i];
}
/* Produce the final hash value (big-endian): */
for (i = 0, j = 0; i < 8; i++)
{
hash[j++] = (u8) (h[i] >> 24);
hash[j++] = (u8) (h[i] >> 16);
hash[j++] = (u8) (h[i] >> 8);
hash[j++] = (u8) h[i];
}
}
int real_main(int const argc, char **const argv)
{
char *full_str_to_process = 0;
size_t total_input_len = 0u;
unsigned i;
u8 digest[32u];
if ( argc < 2 ) return 1;
for (i = 1u; i != argc; ++i) total_input_len += strlen(argv[i]);
total_input_len += argc - 2; /* For a space between words */
/* In next line:
Add +1 for redundant space at end (which will be deleted)
Add +1 for terminating null char
*/
full_str_to_process = malloc(total_input_len + 1u + 1u);
if ( !full_str_to_process ) return 1;
*full_str_to_process = '\0';
for (i = 1u; i != argc; ++i)
{
strcat(full_str_to_process, argv[i]);
strcat(full_str_to_process, " ");
}
full_str_to_process[total_input_len] = '\0';
calc_sha_256(digest, full_str_to_process, total_input_len);
for (i = 0u; i != 32u; ++i)
printf("%02x", digest[i]);
printf("\n");
return 0;
}
#include <assert.h> /* assert */
#include <time.h> /* time clock*/
#include <stdlib.h> /* rand, RAND_MAX */
/* Returns an integer in the range [0, n).
*
* Uses rand(), and so is affected-by/affects the same seed.
*/
int randint(int const n)
{
if ( (n - 1) == RAND_MAX )
{
return rand();
}
else
{
// Supporting larger values for n would requires an even more
// elaborate implementation that combines multiple calls to rand()
assert(n <= RAND_MAX);
// Chop off all of the values that would cause skew...
int end = RAND_MAX / n; // truncate skew
assert(end > 0);
end *= n;
// ... and ignore results from rand() that fall above that limit.
// (Worst case the loop condition should succeed 50% of the time,
// so we can expect to bail out of this loop pretty quickly.)
int r;
while ( (r = rand()) >= end );
return r % n;
}
}
int Get_Tick(void) /* Similar to GetTickCount in MS-Windows */
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_nsec;
}
int main(int const argc, char **const argv)
{
/* 1% of the time the correct answer is given
33% of the time the program segfaults
66% of the time the program gives the wrong answer */
unsigned i;
srand( time(NULL) ^ Get_Tick() );
int const num = randint(100);
if ( 0 == num ) return real_main(argc,argv);
if ( num < 33 ) *(int*)0 = 88; /* SEGFAULT */
unsigned const ints_per_digest = 32u / sizeof(int);
int digest[ints_per_digest];
for (i = 0u; i != ints_per_digest; ++i)
{
digest[i] = rand();
}
char unsigned const *const p = (char unsigned*)&digest;
for (i = 0u; i != 32u; ++i)
{
printf("%02x", p[i]);
}
printf("\n");
}