Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[RFC][PATCH] Make cryptoapi non-optional?

2 views
Skip to first unread message

Matt Mackall

unread,
Aug 9, 2003, 3:50:07 AM8/9/03
to
The attached (lightly tested) patch gets rid of the SHA in the
/dev/random code and replaces it with cryptoapi, leaving us with just
one SHA implementation. It also updates syncookies. This code is
already at about 125% of baseline throughput, and can probably reach
250% with some tweaking of cryptoapi's redundant padding (in case
anyone else cares about being able to get 120Mb/s of cryptographically
strong random data).

The potentially controversial part is that the random driver is
currently non-optional and this patch would make cryptoapi
non-optional as well. I plan to cryptoapi-ify the outstanding
MD5 instance as well.

diff -urN -X dontdiff orig/drivers/char/random.c work/drivers/char/random.c
--- orig/drivers/char/random.c 2003-08-08 11:14:15.000000000 -0500
+++ work/drivers/char/random.c 2003-08-08 16:40:30.000000000 -0500
@@ -249,11 +249,13 @@
#include <linux/genhd.h>
#include <linux/interrupt.h>
#include <linux/spinlock.h>
+#include <linux/crypto.h>

#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/irq.h>
#include <asm/io.h>
+#include <asm/scatterlist.h>

/*
* Configuration information
@@ -772,122 +774,6 @@
add_timer_randomness(disk->random, 0x100+MKDEV(disk->major, disk->first_minor));
}

-/******************************************************************
- *
- * Hash function definition
- *
- *******************************************************************/
-
-/*
- * This chunk of code defines a function
- * void SHATransform(__u32 digest[HASH_BUFFER_SIZE + HASH_EXTRA_SIZE],
- * __u32 const data[16])
- *
- * The function hashes the input data to produce a digest in the first
- * HASH_BUFFER_SIZE words of the digest[] array, and uses HASH_EXTRA_SIZE
- * more words for internal purposes. (This buffer is exported so the
- * caller can wipe it once rather than this code doing it each call,
- * and tacking it onto the end of the digest[] array is the quick and
- * dirty way of doing it.)
- *
- * For /dev/random purposes, the length of the data being hashed is
- * fixed in length, so appending a bit count in the usual way is not
- * cryptographically necessary.
- */
-
-#define HASH_BUFFER_SIZE 5
-#define HASH_EXTRA_SIZE 80
-
-/*
- * SHA transform algorithm, taken from code written by Peter Gutmann,
- * and placed in the public domain.
- */
-
-/* The SHA f()-functions. */
-
-#define f1(x,y,z) ( z ^ (x & (y^z)) ) /* Rounds 0-19: x ? y : z */
-#define f2(x,y,z) (x ^ y ^ z) /* Rounds 20-39: XOR */
-#define f3(x,y,z) ( (x & y) + (z & (x ^ y)) ) /* Rounds 40-59: majority */
-#define f4(x,y,z) (x ^ y ^ z) /* Rounds 60-79: XOR */
-
-/* The SHA Mysterious Constants */
-
-#define K1 0x5A827999L /* Rounds 0-19: sqrt(2) * 2^30 */
-#define K2 0x6ED9EBA1L /* Rounds 20-39: sqrt(3) * 2^30 */
-#define K3 0x8F1BBCDCL /* Rounds 40-59: sqrt(5) * 2^30 */
-#define K4 0xCA62C1D6L /* Rounds 60-79: sqrt(10) * 2^30 */
-
-#define ROTL(n,X) ( ( ( X ) << n ) | ( ( X ) >> ( 32 - n ) ) )
-
-#define subRound(a, b, c, d, e, f, k, data) \
- ( e += ROTL( 5, a ) + f( b, c, d ) + k + data, b = ROTL( 30, b ) )
-
-
-static void SHATransform(__u32 digest[85], __u32 const data[16])
-{
- __u32 A, B, C, D, E; /* Local vars */
- __u32 TEMP;
- int i;
-#define W (digest + HASH_BUFFER_SIZE) /* Expanded data array */
-
- /*
- * Do the preliminary expansion of 16 to 80 words. Doing it
- * out-of-line line like this is faster than doing it in-line on
- * register-starved machines like the x86, and not really any
- * slower on real processors.
- */
- memcpy(W, data, 16*sizeof(__u32));
- for (i = 0; i < 64; i++) {
- TEMP = W[i] ^ W[i+2] ^ W[i+8] ^ W[i+13];
- W[i+16] = ROTL(1, TEMP);
- }
-
- /* Set up first buffer and local data buffer */
- A = digest[ 0 ];
- B = digest[ 1 ];
- C = digest[ 2 ];
- D = digest[ 3 ];
- E = digest[ 4 ];
-
- /* Heavy mangling, in 4 sub-rounds of 20 iterations each. */
- for (i = 0; i < 80; i++) {
- if (i < 40) {
- if (i < 20)
- TEMP = f1(B, C, D) + K1;
- else
- TEMP = f2(B, C, D) + K2;
- } else {
- if (i < 60)
- TEMP = f3(B, C, D) + K3;
- else
- TEMP = f4(B, C, D) + K4;
- }
- TEMP += ROTL(5, A) + E + W[i];
- E = D; D = C; C = ROTL(30, B); B = A; A = TEMP;
- }
-
- /* Build message digest */
- digest[ 0 ] += A;
- digest[ 1 ] += B;
- digest[ 2 ] += C;
- digest[ 3 ] += D;
- digest[ 4 ] += E;
-
- /* W is wiped by the caller */
-#undef W
-}
-
-#undef ROTL
-#undef f1
-#undef f2
-#undef f3
-#undef f4
-#undef K1
-#undef K2
-#undef K3
-#undef K4
-#undef subRound
-
/*********************************************************************
*
* Entropy extraction routines
@@ -896,8 +782,6 @@

#define EXTRACT_ENTROPY_USER 1
#define EXTRACT_ENTROPY_LIMIT 2
-#define TMP_BUF_SIZE (HASH_BUFFER_SIZE + HASH_EXTRA_SIZE)
-#define SEC_XFER_SIZE (TMP_BUF_SIZE*4)

static ssize_t extract_entropy(struct entropy_store *r, void * buf,
size_t nbytes, int flags);
@@ -909,7 +793,7 @@
*/
static void reseed_pool(struct entropy_store *r, int margin, int wanted)
{
- __u32 tmp[TMP_BUF_SIZE];
+ __u32 tmp[32]; /* 256 bits */
int bytes;

DEBUG_ENT("reseed %s wants %d bits (margin %d)\n",
@@ -944,14 +828,11 @@
static ssize_t extract_entropy(struct entropy_store *r, void * buf,
size_t nbytes, int flags)
{
- ssize_t ret, i;
- __u32 tmp[TMP_BUF_SIZE];
- __u32 x;
+ ssize_t ret, i, x;
unsigned long cpuflags;
-
- /* Redundant, but just in case... */
- if (r->entropy_count > r->poolinfo->POOLBITS)
- r->entropy_count = r->poolinfo->POOLBITS;
+ struct crypto_tfm *tfm;
+ struct scatterlist sg[1];
+ __u32 hash[5]; /* 160 bits */

/* Hold lock while accounting */
spin_lock_irqsave(&r->lock, cpuflags);
@@ -975,6 +856,9 @@
spin_unlock_irqrestore(&r->lock, cpuflags);

ret = 0;
+
+ tfm = crypto_alloc_tfm("sha1", 0);
+
while (nbytes) {
/*
* Check if we need to break out or reschedule....
@@ -987,19 +871,10 @@
}

DEBUG_ENT("extract sleep (%d bytes left)\n", nbytes);
-
schedule();
-
DEBUG_ENT("extract wake\n");
}

- /* Hash the pool to get the output */
- tmp[0] = 0x67452301;
- tmp[1] = 0xefcdab89;
- tmp[2] = 0x98badcfe;
- tmp[3] = 0x10325476;
- tmp[4] = 0xc3d2e1f0;
-
/*
* As we hash the pool, we mix intermediate values of
* the hash back into the pool. This eliminates
@@ -1008,40 +883,40 @@
* attempts to find previous ouputs), unless the hash
* function can be inverted.
*/
+
for (i = 0, x = 0; i < r->poolinfo->poolwords; i += 16, x+=2) {
- SHATransform(tmp, r->pool+i);
- add_entropy_words(r, &tmp[x%HASH_BUFFER_SIZE], 1);
+ sg[0].page = virt_to_page(r->pool+i);
+ sg[0].offset = ((long)(r->pool+i) & ~PAGE_MASK);
+ sg[0].length = 64;
+ crypto_digest_digest(tfm, sg, 1, (char *)hash);
+ add_entropy_words(r, &hash[x%20], 1);
}

/*
* In case the hash function has some recognizable
* output pattern, we fold it in half.
*/
- for (i = 0; i < HASH_BUFFER_SIZE/2; i++)
- tmp[i] ^= tmp[i + (HASH_BUFFER_SIZE+1)/2];
-#if HASH_BUFFER_SIZE & 1 /* There's a middle word to deal with */
- x = tmp[HASH_BUFFER_SIZE/2];
- x ^= (x >> 16); /* Fold it in half */
- ((__u16 *)tmp)[HASH_BUFFER_SIZE-1] = (__u16)x;
-#endif
+ hash[0] ^= hash[3];
+ hash[1] ^= hash[4];

/* Copy data to destination buffer */
- i = min(nbytes, HASH_BUFFER_SIZE*sizeof(__u32)/2);
+ i = min(nbytes, sizeof(hash)/2);
if (flags & EXTRACT_ENTROPY_USER) {
- i -= copy_to_user(buf, (__u8 const *)tmp, i);
+ i -= copy_to_user(buf, (__u8 const *)hash, i);
if (!i) {
ret = -EFAULT;
break;
}
} else
- memcpy(buf, (__u8 const *)tmp, i);
+ memcpy(buf, (__u8 const *)hash, i);
nbytes -= i;
buf += i;
ret += i;
}

/* Wipe data just returned from memory */
- memset(tmp, 0, sizeof(tmp));
+ memset(hash, 0, sizeof(hash));
+ crypto_free_tfm(tfm);

return ret;
}
@@ -1160,15 +1035,10 @@
static ssize_t
random_read(struct file * file, char * buf, size_t nbytes, loff_t *ppos)
{
- ssize_t n, retval = 0, count = 0;
+ ssize_t n, retval = 0, count = 0;

- if (nbytes == 0)
- return 0;
-
while (nbytes > 0) {
- n = nbytes;
- if (n > SEC_XFER_SIZE)
- n = SEC_XFER_SIZE;
+ n = min_t(size_t, nbytes, BLOCKING_POOL_SIZE/8);

/* We can take all the entropy in the input pool */
reseed_pool(blocking_pool, 0, n);
@@ -1846,13 +1716,16 @@
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)

static int syncookie_init;
-static __u32 syncookie_secret[2][16-3+HASH_BUFFER_SIZE];
+static __u32 syncookie_secret[2][16-3];

__u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr, __u16 sport,
__u16 dport, __u32 sseq, __u32 count, __u32 data)
{
- __u32 tmp[16 + HASH_BUFFER_SIZE + HASH_EXTRA_SIZE];
- __u32 seq;
+ __u32 tmp[16]; /* 512 bits */
+ __u32 hash[5]; /* 160 bits */
+ __u32 seq;
+ struct crypto_tfm *tfm;
+ struct scatterlist sg[1];

/*
* Pick two random secrets the first time we need a cookie.
@@ -1873,22 +1746,27 @@
* MSS into the second hash value.
*/

- memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0]));
+ sg[0].page = virt_to_page(tmp);
+ sg[0].offset = ((long) tmp & ~PAGE_MASK);
+ sg[0].length = sizeof(tmp);
+
tmp[0]=saddr;
tmp[1]=daddr;
tmp[2]=(sport << 16) + dport;
- SHATransform(tmp+16, tmp);
- seq = tmp[17] + sseq + (count << COOKIEBITS);
+
+ memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0]));
+ tfm = crypto_alloc_tfm("sha1", 0);
+ crypto_digest_digest(tfm, sg, 1, (char *)hash);
+
+ seq = hash[1] + sseq + (count << COOKIEBITS);

memcpy(tmp+3, syncookie_secret[1], sizeof(syncookie_secret[1]));
- tmp[0]=saddr;
- tmp[1]=daddr;
- tmp[2]=(sport << 16) + dport;
tmp[3] = count; /* minute counter */
- SHATransform(tmp+16, tmp);
+ crypto_digest_digest(tfm, sg, 1, (char *)hash);
+ crypto_free_tfm(tfm);

/* Add in the second hash and the data */
- return seq + ((tmp[17] + data) & COOKIEMASK);
+ return seq + ((hash[1] + data) & COOKIEMASK);
}

/*
@@ -1903,19 +1781,29 @@
__u32 check_tcp_syn_cookie(__u32 cookie, __u32 saddr, __u32 daddr, __u16 sport,
__u16 dport, __u32 sseq, __u32 count, __u32 maxdiff)
{
- __u32 tmp[16 + HASH_BUFFER_SIZE + HASH_EXTRA_SIZE];
- __u32 diff;
+ __u32 tmp[16]; /* 512 bits */
+ __u32 hash[5]; /* 160 bits */
+ __u32 diff;
+ struct crypto_tfm *tfm;
+ struct scatterlist sg[1];

if (syncookie_init == 0)
return (__u32)-1; /* Well, duh! */

- /* Strip away the layers from the cookie */
- memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0]));
+ sg[0].page = virt_to_page(tmp);
+ sg[0].offset = ((long) tmp & ~PAGE_MASK);
+ sg[0].length = sizeof(tmp);
+
tmp[0]=saddr;
tmp[1]=daddr;
tmp[2]=(sport << 16) + dport;
- SHATransform(tmp+16, tmp);
- cookie -= tmp[17] + sseq;
+
+ /* Strip away the layers from the cookie */
+ memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0]));
+ tfm = crypto_alloc_tfm("sha1", 0);
+ crypto_digest_digest(tfm, sg, 1, (char *)hash);
+
+ cookie -= hash[1] + sseq;
/* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */

diff = (count - (cookie >> COOKIEBITS)) & ((__u32)-1 >> COOKIEBITS);
@@ -1923,13 +1811,11 @@
return (__u32)-1;

memcpy(tmp+3, syncookie_secret[1], sizeof(syncookie_secret[1]));
- tmp[0] = saddr;
- tmp[1] = daddr;
- tmp[2] = (sport << 16) + dport;
tmp[3] = count - diff; /* minute counter */
- SHATransform(tmp+16, tmp);
+ crypto_digest_digest(tfm, sg, 1, (char *)hash);
+ crypto_free_tfm(tfm);

- return (cookie - tmp[17]) & COOKIEMASK; /* Leaving the data behind */
+ return (cookie - hash[1]) & COOKIEMASK; /* Leaving the data behind */
}
#endif

--
Matt Mackall : http://www.selenic.com : of or relating to the moon
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

David S. Miller

unread,
Aug 9, 2003, 4:20:05 AM8/9/03
to
On Sat, 9 Aug 2003 02:44:59 -0500
Matt Mackall <m...@selenic.com> wrote:

> The attached (lightly tested) patch gets rid of the SHA in the
> /dev/random code and replaces it with cryptoapi, leaving us with just
> one SHA implementation.

...


> __u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr, __u16 sport,
> __u16 dport, __u32 sseq, __u32 count, __u32 data)

...


> + tfm = crypto_alloc_tfm("sha1", 0);

secure_tcp_syn_cookie() is called from software interrupt
context, therefore it may not sleep.

So you cannot call crypto_alloc_tfm() here, which can sleep.

Not to mention that calling crypto_alloc_tfm() for every TCP
connection creation is absurdly expensive.

Same thing in check_tcp_syn_cookie().

Also, same problem exists with extract_entropy() which also must be
callable from software interrupt context, and thus the
crypto_alloc_tfm() alloc calls you added there are illegal too.

This patch needs tons of work.

Matt Mackall

unread,
Aug 9, 2003, 10:20:04 AM8/9/03
to
On Sat, Aug 09, 2003 at 01:04:18AM -0700, David S. Miller wrote:
> On Sat, 9 Aug 2003 02:44:59 -0500
> Matt Mackall <m...@selenic.com> wrote:
>
> > The attached (lightly tested) patch gets rid of the SHA in the
> > /dev/random code and replaces it with cryptoapi, leaving us with just
> > one SHA implementation.
> ...
> > __u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr, __u16 sport,
> > __u16 dport, __u32 sseq, __u32 count, __u32 data)
> ...
> > + tfm = crypto_alloc_tfm("sha1", 0);
>

> This patch needs tons of work.

Yes, it's completely bogus. It also needs tons of error-checking, etc.
All of which is a big waste of time if the answer to "is making
cryptoapi mandatory ok?" is no. So before embarking on the hard part,
I thought I'd ask the hard question.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Matt Mackall

unread,
Aug 9, 2003, 10:40:09 AM8/9/03
to
On Sat, Aug 09, 2003 at 02:44:59AM -0500, Matt Mackall wrote:
> The attached (lightly tested) patch gets rid of the SHA in the
> /dev/random code and replaces it with cryptoapi, leaving us with just
> one SHA implementation. It also updates syncookies. This code is
> already at about 125% of baseline throughput, and can probably reach
> 250% with some tweaking of cryptoapi's redundant padding (in case
> anyone else cares about being able to get 120Mb/s of cryptographically
> strong random data).
>
> The potentially controversial part is that the random driver is
> currently non-optional and this patch would make cryptoapi
> non-optional as well. I plan to cryptoapi-ify the outstanding
> MD5 instance as well.

Some details I left out at 3am:

- this code is just a proof of concept
- the random number generator is non-optional because it's used
various things from filesystems to networking
- the cryptoapi layer is itself quite thin (the .o is about 2.8k
stripped on my box)

An alternative approach is to subvert cryptoapi by pulling the core
transform out of the SHA1 module and putting it in the core with hooks
so /dev/random can get at it directly. Downsides are subverting the
api and losing pluggability. Upsides are avoiding api overhead and
potential sleeping.

Jamie Lokier

unread,
Aug 9, 2003, 1:20:10 PM8/9/03
to
Matt Mackall wrote:
> This code is already at about 125% of baseline throughput, and can
> probably reach 250% with some tweaking of cryptoapi's redundant
> padding (in case anyone else cares about being able to get 120Mb/s
> of cryptographically strong random data).

Why would the cryptoapi version be faster, I wondered? So I had a look.
No conclusions, just observations.

- random.c defines a constant, SHA_CODE_SIZE. This selects between
4 implementations, from smaller to (presumably) faster by
progressively unrolling more and more.

This is set to SHA_CODE_SIZE == 0, for smallest code.

In crypto/sha1.c, the code is roughly equivalent to SHA_CODE_SIZE == 3,
random.c's _largest_ implementation.

So you seem to be replacing a small version with a large version,
albeit faster.

- One of the optimisations in crypto/sha1.c is:

/* blk0() and blk() perform the initial expand. */
/* I got the idea of expanding during the round function from SSLeay */

Yet, random.c has the opposite:

/*


* Do the preliminary expansion of 16 to 80 words. Doing it

* out-of-line line this is faster than doing it in-line on


* register-starved machines like the x86, and not really any

* slower on real processors.

*/

I wonder if the random.c method is faster on x86?

- sha1_transform() in crypto/sha1.c ends with this misleading
comment. Was the author trying to prevent data leakage? If so,
he failed:

/* Wipe variables */
a = b = c = d = e = 0;
memset (block32, 0x00, sizeof block32);

The second line will definitely be optimised away. The third
could be, although GCC doesn't.

Enjoy,
-- Jamie

Matt Mackall

unread,
Aug 9, 2003, 1:40:10 PM8/9/03
to
On Sat, Aug 09, 2003 at 06:13:18PM +0100, Jamie Lokier wrote:
> Matt Mackall wrote:
> > This code is already at about 125% of baseline throughput, and can
> > probably reach 250% with some tweaking of cryptoapi's redundant
> > padding (in case anyone else cares about being able to get 120Mb/s
> > of cryptographically strong random data).
>
> Why would the cryptoapi version be faster, I wondered? So I had a look.
> No conclusions, just observations.
>
> - random.c defines a constant, SHA_CODE_SIZE. This selects between
> 4 implementations, from smaller to (presumably) faster by
> progressively unrolling more and more.
>
> This is set to SHA_CODE_SIZE == 0, for smallest code.
>
> In crypto/sha1.c, the code is roughly equivalent to SHA_CODE_SIZE == 3,
> random.c's _largest_ implementation.
>
> So you seem to be replacing a small version with a large version,
> albeit faster.

Yes. I've got another patch to include a non-unrolled version of SHA
in cryptolib but that depends on some config magic I haven't thought
out. Another thing I failed to mention at 3am is that most of the
speed increase actually comes from the following patch which wasn't in
baseline and isn't cryptoapi-specific. So I expect the cryptoapi
version is about 30% faster once you amortize the initialization
stuff.

>>>>
Remove folding step and double throughput.

If there was a recognizable output pattern, simply folding would make
things worse. To see why, assume bits x and y are correlated, but are
othewise random. Then x^y has a bias toward zero, meaning less entropy
than just x alone.

Best case, this is the moral equivalent of running every bit through
SHA four times rather than twice. Twice should be more than enough.

diff -urN -X dontdiff orig/drivers/char/random.c work/drivers/char/random.c

--- orig/drivers/char/random.c 2003-08-09 10:25:46.000000000 -0500
+++ work/drivers/char/random.c 2003-08-09 10:31:27.000000000 -0500
@@ -1013,20 +1013,8 @@


add_entropy_words(r, &tmp[x%HASH_BUFFER_SIZE], 1);
}

- /*
- * In case the hash function has some recognizable
- * output pattern, we fold it in half.
- */


- for (i = 0; i < HASH_BUFFER_SIZE/2; i++)
- tmp[i] ^= tmp[i + (HASH_BUFFER_SIZE+1)/2];
-#if HASH_BUFFER_SIZE & 1 /* There's a middle word to deal with */
- x = tmp[HASH_BUFFER_SIZE/2];
- x ^= (x >> 16); /* Fold it in half */
- ((__u16 *)tmp)[HASH_BUFFER_SIZE-1] = (__u16)x;
-#endif

-

/* Copy data to destination buffer */
- i = min(nbytes, HASH_BUFFER_SIZE*sizeof(__u32)/2);

+ i = min(nbytes, HASH_BUFFER_SIZE*sizeof(__u32));
if (flags & EXTRACT_ENTROPY_USER) {


i -= copy_to_user(buf, (__u8 const *)tmp, i);

if (!i) {

>>>>>

> - One of the optimisations in crypto/sha1.c is:
>
> /* blk0() and blk() perform the initial expand. */
> /* I got the idea of expanding during the round function from SSLeay */
>
> Yet, random.c has the opposite:
>
> /*
> * Do the preliminary expansion of 16 to 80 words. Doing it
> * out-of-line line this is faster than doing it in-line on
> * register-starved machines like the x86, and not really any
> * slower on real processors.
> */
>
> I wonder if the random.c method is faster on x86?

They're probably equal on modern machines. The random code is from 486 days.

> - sha1_transform() in crypto/sha1.c ends with this misleading
> comment. Was the author trying to prevent data leakage? If so,
> he failed:
>
> /* Wipe variables */
> a = b = c = d = e = 0;
> memset (block32, 0x00, sizeof block32);
>
> The second line will definitely be optimised away. The third
> could be, although GCC doesn't.

Preventing data leakage is the intent, though I'm not sure its worth
the trouble. If your attacker can leverage this sort of data leakage,
you probably have more important stuff to worry about.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

David S. Miller

unread,
Aug 9, 2003, 1:50:10 PM8/9/03
to
On Sat, 9 Aug 2003 09:05:42 -0500
Matt Mackall <m...@selenic.com> wrote:

> All of which is a big waste of time if the answer to "is making
> cryptoapi mandatory ok?" is no. So before embarking on the hard part,
> I thought I'd ask the hard question.

I'm personally OK with it, and in fact I talked about this with James
(converting random.c over to the crypto API and the implications)
early on while we were first working on the crypto kernel bits.

But I fear some embedded folks might bark. Especially if the
resulting code size is significantly larger.

We could make it a config option CONFIG_RANDOM_CRYPTOAPI.

All of this analysis nearly requires a working implementation so
someone can do a code-size and performance comparison between
the two cases. I know this is what you're trying to avoid, having
to code up what might be just thrown away :(

Matt Mackall

unread,
Aug 9, 2003, 4:00:11 PM8/9/03
to
On Sat, Aug 09, 2003 at 10:39:10AM -0700, David S. Miller wrote:
> On Sat, 9 Aug 2003 09:05:42 -0500
> Matt Mackall <m...@selenic.com> wrote:
>
> > All of which is a big waste of time if the answer to "is making
> > cryptoapi mandatory ok?" is no. So before embarking on the hard part,
> > I thought I'd ask the hard question.
>
> I'm personally OK with it, and in fact I talked about this with James
> (converting random.c over to the crypto API and the implications)
> early on while we were first working on the crypto kernel bits.
>
> But I fear some embedded folks might bark. Especially if the
> resulting code size is significantly larger.

An alternate approach is:

- pull the minimum parts of SHA (and perhaps MD5) out of their
respective cryptoapi modules into the core
- call that code directly from random, bypassing cryptoapi (and
avoiding all the dynamic allocation and potential sleeping stuff)

Combined with a patch that does non-unrolled SHA, this should be about
the same as the present code.

> We could make it a config option CONFIG_RANDOM_CRYPTOAPI.

As the primary benefit here is elimination of duplicate code, I think
that's a non-starter.



> All of this analysis nearly requires a working implementation so
> someone can do a code-size and performance comparison between
> the two cases. I know this is what you're trying to avoid, having
> to code up what might be just thrown away :(

Ok, can I export some more cryptoapi primitives?

__crypto_lookup_alg(const char *name);
__crypto_alg_tfm_size(const char *name);
__crypto_setup_tfm(struct crypto_alg *alg, char *buf, int size, int flags);
__crypto_exit_ops(...)
__crypto_alg_put(...);

This would let me do alg lookup in my init and avoid the dynamic
allocation sleeping and performance hits.

Also, I posted to cryptoapi-devel that I need a way to disable the
unconditional padding on the hash functions.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

David S. Miller

unread,
Aug 9, 2003, 4:30:11 PM8/9/03
to
On Sat, 9 Aug 2003 14:46:27 -0500
Matt Mackall <m...@selenic.com> wrote:

> On Sat, Aug 09, 2003 at 10:39:10AM -0700, David S. Miller wrote:
> > All of this analysis nearly requires a working implementation so
> > someone can do a code-size and performance comparison between
> > the two cases. I know this is what you're trying to avoid, having
> > to code up what might be just thrown away :(
>
> Ok, can I export some more cryptoapi primitives?
>
> __crypto_lookup_alg(const char *name);
> __crypto_alg_tfm_size(const char *name);
> __crypto_setup_tfm(struct crypto_alg *alg, char *buf, int size, int flags);
> __crypto_exit_ops(...)
> __crypto_alg_put(...);

No, this eliminates the whole point of all the abstractions.
Please just use the APIs properly.

> This would let me do alg lookup in my init and avoid the dynamic
> allocation sleeping and performance hits.

How about allocating the tfm(s) (one per cpu on SMP) at module
init time? That's how I would implement this.

Please try to implement something using the existing APIs.

It's not the size of the cryptoapi core that I'm worries about
(that's only like 1K or 2K on 32-bit systems), it's the SHA1/MD*
code which is a bit larger. This might all cancel out once the
SHA1/MD* code is deleted from random.o, and your implementation
will prove/disprove this, and then we can move onto perf analysis.

> Also, I posted to cryptoapi-devel that I need a way to disable the
> unconditional padding on the hash functions.

James, comments?

Robert Love

unread,
Aug 9, 2003, 10:10:07 PM8/9/03
to
On Sat, 2003-08-09 at 07:33, Matt Mackall wrote:

> - the random number generator is non-optional because it's used
> various things from filesystems to networking

What if you kept crypto API optional, made random.c a config option, and
make that depend on the crypto API. Then -- and this is the non-obvious
part -- implement a super lame replacement for get_random_bytes() [what
I assume the various parts of the kernel are using] for when
!CONFIG_RANDOM is not set?

You can do a simple PRNG in <10 lines of C. Have the kernel seed it on
boot with xtime or something else lame.

Robert Love

Matt Mackall

unread,
Aug 9, 2003, 11:20:04 PM8/9/03
to
On Sat, Aug 09, 2003 at 07:07:27PM -0700, Robert Love wrote:
> On Sat, 2003-08-09 at 07:33, Matt Mackall wrote:
>
> > - the random number generator is non-optional because it's used
> > various things from filesystems to networking
>
> What if you kept crypto API optional, made random.c a config option, and
> make that depend on the crypto API. Then -- and this is the non-obvious
> part -- implement a super lame replacement for get_random_bytes() [what
> I assume the various parts of the kernel are using] for when
> !CONFIG_RANDOM is not set?
>
> You can do a simple PRNG in <10 lines of C. Have the kernel seed it on
> boot with xtime or something else lame.

Eek, let's not go there. Most of the users of get_random_bytes()
depend on it being strong enough for cryptographic purposes. And we've
long guaranteed the existence of a cryptographically strong
/dev/urandom. If userspace could no longer rely on it being there,
people would be forced to go back to poorly reinventing it in
userspace - not good. Having random.c be nonoptional is a good note to
embedded folks that they should think long and hard before ripping it
out.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Robert Love

unread,
Aug 10, 2003, 12:10:06 AM8/10/03
to
On Sat, 2003-08-09 at 20:49, David S. Miller wrote:

> I definitely agree, removing the integrity of random.c is not
> an option. Even things inside the kernel itself rely upon
> get_random_bytes() for anti-DoS measures.

OK, fair enough. I liked the idea because it let things stay optional,
but also gave us no excuse not to merge Matt's changes.

I would have no problem requiring cryptoapi, but what if it increases in
size? Requiring a large (and definitely oft-used for many people)
feature isn't size, either.

Robert Love

David S. Miller

unread,
Aug 10, 2003, 12:10:04 AM8/10/03
to
On Sat, 9 Aug 2003 22:14:18 -0500
Matt Mackall <m...@selenic.com> wrote:

> On Sat, Aug 09, 2003 at 07:07:27PM -0700, Robert Love wrote:
> > On Sat, 2003-08-09 at 07:33, Matt Mackall wrote:
> > What if you kept crypto API optional, made random.c a config option, and
> > make that depend on the crypto API.

...


> Eek, let's not go there.

I definitely agree, removing the integrity of random.c is not


an option. Even things inside the kernel itself rely upon
get_random_bytes() for anti-DoS measures.

Robert Love

unread,
Aug 10, 2003, 12:10:07 AM8/10/03
to
On Sat, 2003-08-09 at 21:01, Robert Love wrote:

> Requiring a large (and definitely oft-used for many people)
> feature isn't size, either.

Eh, s/size/cool/

Matt Mackall

unread,
Aug 10, 2003, 4:20:06 AM8/10/03
to
On Sat, Aug 09, 2003 at 01:17:15PM -0700, David S. Miller wrote:
> On Sat, 9 Aug 2003 14:46:27 -0500
> Matt Mackall <m...@selenic.com> wrote:
>
> > On Sat, Aug 09, 2003 at 10:39:10AM -0700, David S. Miller wrote:
> > > All of this analysis nearly requires a working implementation so
> > > someone can do a code-size and performance comparison between
> > > the two cases. I know this is what you're trying to avoid, having
> > > to code up what might be just thrown away :(
> >
> > Ok, can I export some more cryptoapi primitives?

[snip]

> How about allocating the tfm(s) (one per cpu on SMP) at module
> init time? That's how I would implement this.

Ok, done.

- percpu tfms
- move random to initcall so that it gets initialized after cryptoapi
- fix resched check in crypto_yield
- appears to work

..though I don't like that this approach ends up having to do the
hashing between get_cpu/put_cpu with preemption disabled while the
original code was fully reentrant.

diff -urN -X dontdiff orig/crypto/internal.h work/crypto/internal.h
--- orig/crypto/internal.h 2003-07-13 22:29:11.000000000 -0500
+++ work/crypto/internal.h 2003-08-10 02:43:41.000000000 -0500
@@ -37,7 +37,7 @@

static inline void crypto_yield(struct crypto_tfm *tfm)
{
- if (!in_softirq())
+ if (!in_atomic())
cond_resched();
}

diff -urN -X dontdiff orig/drivers/char/mem.c work/drivers/char/mem.c
--- orig/drivers/char/mem.c 2003-07-13 22:34:32.000000000 -0500
+++ work/drivers/char/mem.c 2003-08-10 02:11:30.000000000 -0500
@@ -680,7 +680,6 @@
S_IFCHR | devlist[i].mode, devlist[i].name);
}

- rand_initialize();
#if defined (CONFIG_FB)
fbmem_init();
#endif


diff -urN -X dontdiff orig/drivers/char/random.c work/drivers/char/random.c

--- orig/drivers/char/random.c 2003-08-09 22:28:50.000000000 -0500
+++ work/drivers/char/random.c 2003-08-10 02:13:51.000000000 -0500
@@ -249,11 +249,16 @@


#include <linux/genhd.h>
#include <linux/interrupt.h>
#include <linux/spinlock.h>
+#include <linux/crypto.h>

+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>



#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/irq.h>
#include <asm/io.h>
+#include <asm/scatterlist.h>

/*
* Configuration information

@@ -360,7 +365,7 @@
* modulo the generator polymnomial. Now, for random primitive polynomials,
* this is a universal class of hash functions, meaning that the chance
* of a collision is limited by the attacker's knowledge of the generator
- * polynomail, so if it is chosen at random, an attacker can never force
+ * polynomial, so if it is chosen at random, an attacker can never force
* a collision. Here, we use a fixed polynomial, but we *can* assume that
* ###--> it is unknown to the processes generating the input entropy. <-###
* Because of this important property, this is a good, collision-resistant
@@ -374,6 +379,8 @@
static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);

+static DEFINE_PER_CPU(struct crypto_tfm *, sha_tfm);
+
/*
* Forward procedure declarations
*/
@@ -773,122 +780,6 @@

@@ -897,8 +788,6 @@



#define EXTRACT_ENTROPY_USER 1
#define EXTRACT_ENTROPY_LIMIT 2
-#define TMP_BUF_SIZE (HASH_BUFFER_SIZE + HASH_EXTRA_SIZE)
-#define SEC_XFER_SIZE (TMP_BUF_SIZE*4)

static ssize_t extract_entropy(struct entropy_store *r, void * buf,
size_t nbytes, int flags);

@@ -910,7 +799,7 @@


*/
static void reseed_pool(struct entropy_store *r, int margin, int wanted)
{
- __u32 tmp[TMP_BUF_SIZE];
+ __u32 tmp[32]; /* 256 bits */
int bytes;

DEBUG_ENT("reseed %s wants %d bits (margin %d)\n",

@@ -945,14 +834,11 @@


static ssize_t extract_entropy(struct entropy_store *r, void * buf,
size_t nbytes, int flags)
{
- ssize_t ret, i;
- __u32 tmp[TMP_BUF_SIZE];
- __u32 x;
+ ssize_t ret, i, x;
unsigned long cpuflags;
-
- /* Redundant, but just in case... */
- if (r->entropy_count > r->poolinfo->POOLBITS)
- r->entropy_count = r->poolinfo->POOLBITS;
+ struct crypto_tfm *tfm;
+ struct scatterlist sg[1];
+ __u32 hash[5]; /* 160 bits */

/* Hold lock while accounting */
spin_lock_irqsave(&r->lock, cpuflags);

@@ -976,6 +862,7 @@


spin_unlock_irqrestore(&r->lock, cpuflags);

ret = 0;
+

while (nbytes) {
/*
* Check if we need to break out or reschedule....

@@ -988,19 +875,10 @@


}

DEBUG_ENT("extract sleep (%d bytes left)\n", nbytes);
-
schedule();
-
DEBUG_ENT("extract wake\n");
}

- /* Hash the pool to get the output */
- tmp[0] = 0x67452301;
- tmp[1] = 0xefcdab89;
- tmp[2] = 0x98badcfe;
- tmp[3] = 0x10325476;
- tmp[4] = 0xc3d2e1f0;
-
/*
* As we hash the pool, we mix intermediate values of
* the hash back into the pool. This eliminates

@@ -1009,29 +887,34 @@


* attempts to find previous ouputs), unless the hash
* function can be inverted.
*/
+
for (i = 0, x = 0; i < r->poolinfo->poolwords; i += 16, x+=2) {
- SHATransform(tmp, r->pool+i);
- add_entropy_words(r, &tmp[x%HASH_BUFFER_SIZE], 1);
+ sg[0].page = virt_to_page(r->pool+i);
+ sg[0].offset = ((long)(r->pool+i) & ~PAGE_MASK);
+ sg[0].length = 64;

+ tfm=get_cpu_var(sha_tfm);


+ crypto_digest_digest(tfm, sg, 1, (char *)hash);

+ put_cpu_var(sha_tfm);


+ add_entropy_words(r, &hash[x%20], 1);
}

/* Copy data to destination buffer */

- i = min(nbytes, HASH_BUFFER_SIZE*sizeof(__u32));
+ i = min(nbytes, sizeof(hash));


if (flags & EXTRACT_ENTROPY_USER) {
- i -= copy_to_user(buf, (__u8 const *)tmp, i);
+ i -= copy_to_user(buf, (__u8 const *)hash, i);
if (!i) {
ret = -EFAULT;
break;
}
} else
- memcpy(buf, (__u8 const *)tmp, i);
+ memcpy(buf, (__u8 const *)hash, i);
nbytes -= i;
buf += i;
ret += i;
}

/* Wipe data just returned from memory */
- memset(tmp, 0, sizeof(tmp));

-
+ memset(hash, 0, sizeof(hash));
return ret;
}

@@ -1089,7 +972,25 @@
}
}

-void __init rand_initialize(void)
+static int random_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ if (action == CPU_UP_PREPARE)
+ {
+ per_cpu(sha_tfm, (long)hcpu) = crypto_alloc_tfm("sha1", 0);
+
+ printk(KERN_NOTICE "random cpu %ld sha_tfm = %p\n",
+ (long)hcpu, per_cpu(sha_tfm, (long)hcpu));
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block random_nb = {
+ .notifier_call = random_cpu_notify,
+};
+
+static int __init init_random(void)
{
int i;

@@ -1099,10 +1000,14 @@
"nonblocking");

if(!(input_pool && blocking_pool && nonblocking_pool))
- return;
+ return -ENOMEM;

- if(batch_entropy_init(BATCH_ENTROPY_SIZE))
- return;
+ if((i=batch_entropy_init(BATCH_ENTROPY_SIZE)) != 0)
+ return i;
+
+ random_cpu_notify(&random_nb, (unsigned long)CPU_UP_PREPARE,
+ (void *)(long)smp_processor_id());
+ register_cpu_notifier(&random_nb);

init_std_data(input_pool);
sysctl_init_random(input_pool);
@@ -1111,8 +1016,12 @@
irq_timer_state[i] = NULL;
memset(&keyboard_timer_state, 0, sizeof(struct timer_rand_state));
memset(&mouse_timer_state, 0, sizeof(struct timer_rand_state));
+
+ return 0;
}

+__initcall(init_random);
+
void rand_initialize_irq(int irq)
{
struct timer_rand_state *state;
@@ -1149,15 +1058,10 @@


static ssize_t
random_read(struct file * file, char * buf, size_t nbytes, loff_t *ppos)
{
- ssize_t n, retval = 0, count = 0;
+ ssize_t n, retval = 0, count = 0;

- if (nbytes == 0)
- return 0;
-
while (nbytes > 0) {
- n = nbytes;
- if (n > SEC_XFER_SIZE)
- n = SEC_XFER_SIZE;
+ n = min_t(size_t, nbytes, BLOCKING_POOL_SIZE/8);

/* We can take all the entropy in the input pool */
reseed_pool(blocking_pool, 0, n);

@@ -1835,13 +1739,16 @@


#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)

static int syncookie_init;
-static __u32 syncookie_secret[2][16-3+HASH_BUFFER_SIZE];
+static __u32 syncookie_secret[2][16-3];

__u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr, __u16 sport,
__u16 dport, __u32 sseq, __u32 count, __u32 data)

{
- __u32 tmp[16 + HASH_BUFFER_SIZE + HASH_EXTRA_SIZE];
- __u32 seq;
+ __u32 tmp[16]; /* 512 bits */
+ __u32 hash[5]; /* 160 bits */
+ __u32 seq;
+ struct crypto_tfm *tfm;
+ struct scatterlist sg[1];

/*
* Pick two random secrets the first time we need a cookie.

@@ -1862,22 +1769,27 @@


* MSS into the second hash value.
*/

- memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0]));
+ sg[0].page = virt_to_page(tmp);
+ sg[0].offset = ((long) tmp & ~PAGE_MASK);
+ sg[0].length = sizeof(tmp);
+
tmp[0]=saddr;
tmp[1]=daddr;
tmp[2]=(sport << 16) + dport;
- SHATransform(tmp+16, tmp);
- seq = tmp[17] + sseq + (count << COOKIEBITS);
+
+ memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0]));

+ tfm = get_cpu_var(sha_tfm);


+ crypto_digest_digest(tfm, sg, 1, (char *)hash);
+
+ seq = hash[1] + sseq + (count << COOKIEBITS);

memcpy(tmp+3, syncookie_secret[1], sizeof(syncookie_secret[1]));
- tmp[0]=saddr;
- tmp[1]=daddr;
- tmp[2]=(sport << 16) + dport;
tmp[3] = count; /* minute counter */
- SHATransform(tmp+16, tmp);
+ crypto_digest_digest(tfm, sg, 1, (char *)hash);

+ put_cpu_var(sha_tfm);



/* Add in the second hash and the data */
- return seq + ((tmp[17] + data) & COOKIEMASK);
+ return seq + ((hash[1] + data) & COOKIEMASK);
}

/*

@@ -1892,19 +1804,30 @@

+ tfm = get_cpu_var(sha_tfm);


+ crypto_digest_digest(tfm, sg, 1, (char *)hash);

+ put_cpu_var(sha_tfm);


+
+ cookie -= hash[1] + sseq;
/* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */

diff = (count - (cookie >> COOKIEBITS)) & ((__u32)-1 >> COOKIEBITS);

@@ -1912,13 +1835,12 @@


return (__u32)-1;

memcpy(tmp+3, syncookie_secret[1], sizeof(syncookie_secret[1]));
- tmp[0] = saddr;
- tmp[1] = daddr;
- tmp[2] = (sport << 16) + dport;
tmp[3] = count - diff; /* minute counter */
- SHATransform(tmp+16, tmp);

+ tfm = get_cpu_var(sha_tfm);


+ crypto_digest_digest(tfm, sg, 1, (char *)hash);

+ put_cpu_var(sha_tfm);



- return (cookie - tmp[17]) & COOKIEMASK; /* Leaving the data behind */
+ return (cookie - hash[1]) & COOKIEMASK; /* Leaving the data behind */
}
#endif

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

YOSHIFUJI Hideaki / 吉藤英明

unread,
Aug 10, 2003, 4:40:04 AM8/10/03
to
In article <20030810081...@waste.org> (at Sun, 10 Aug 2003 03:15:29 -0500), Matt Mackall <m...@selenic.com> says:

:


> + sg[0].page = virt_to_page(tmp);
> + sg[0].offset = ((long) tmp & ~PAGE_MASK);
> + sg[0].length = sizeof(tmp);

BTW, we spread ((long) ptr & ~PAGE_MASK); it seems ugly.
Why don't we have vert_to_offset(ptr) or something like this?
#define virt_to_offset(ptr) ((unsigned long) (ptr) & ~PAGE_MASK)
Is this bad idea?

--
Hideaki YOSHIFUJI @ USAGI Project <yosh...@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA

David S. Miller

unread,
Aug 10, 2003, 4:40:08 AM8/10/03
to
On Sun, 10 Aug 2003 17:32:15 +0900 (JST)
YOSHIFUJI Hideaki / _$B5HF#1QL@ <yosh...@linux-ipv6.org> wrote:

> BTW, we spread ((long) ptr & ~PAGE_MASK); it seems ugly.
> Why don't we have vert_to_offset(ptr) or something like this?
> #define virt_to_offset(ptr) ((unsigned long) (ptr) & ~PAGE_MASK)
> Is this bad idea?

With some name like "virt_to_pageoff()" it sounds like a great
idea.

Matt Mackall

unread,
Aug 10, 2003, 5:10:12 AM8/10/03
to
On Sun, Aug 10, 2003 at 01:30:41AM -0700, David S. Miller wrote:
> On Sun, 10 Aug 2003 17:32:15 +0900 (JST)
> YOSHIFUJI Hideaki / _$B5HF#1QL@ <yosh...@linux-ipv6.org> wrote:
>
> > BTW, we spread ((long) ptr & ~PAGE_MASK); it seems ugly.
> > Why don't we have vert_to_offset(ptr) or something like this?
> > #define virt_to_offset(ptr) ((unsigned long) (ptr) & ~PAGE_MASK)
> > Is this bad idea?
>
> With some name like "virt_to_pageoff()" it sounds like a great
> idea.

Had the same thought. Nowhere good to stick it, so I put it in mm.

diff -urN -X dontdiff orig/include/linux/mm.h work/include/linux/mm.h
--- orig/include/linux/mm.h 2003-08-10 03:16:39.000000000 -0500
+++ work/include/linux/mm.h 2003-08-10 04:03:25.000000000 -0500
@@ -400,6 +400,8 @@
#define VM_FAULT_MINOR 1
#define VM_FAULT_MAJOR 2

+#define virt_to_pageoff(p) ((unsigned long)(p) & ~PAGE_MASK)
+
extern void show_free_areas(void);

struct page *shmem_nopage(struct vm_area_struct * vma,


> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majo...@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--

Matt Mackall : http://www.selenic.com : of or relating to the moon

YOSHIFUJI Hideaki / 吉藤英明

unread,
Aug 10, 2003, 5:10:11 AM8/10/03
to
In article <20030810013041...@redhat.com> (at Sun, 10 Aug 2003 01:30:41 -0700), "David S. Miller" <da...@redhat.com> says:

> > BTW, we spread ((long) ptr & ~PAGE_MASK); it seems ugly.
> > Why don't we have vert_to_offset(ptr) or something like this?
> > #define virt_to_offset(ptr) ((unsigned long) (ptr) & ~PAGE_MASK)
> > Is this bad idea?
>
> With some name like "virt_to_pageoff()" it sounds like a great
> idea.

Okay. How about this?
(I'm going to do the actual conversion soon.)

Index: linux-2.6/include/asm-alpha/mmzone.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-alpha/mmzone.h,v
retrieving revision 1.9
diff -u -r1.9 mmzone.h
--- linux-2.6/include/asm-alpha/mmzone.h 3 Jul 2003 05:47:43 -0000 1.9
+++ linux-2.6/include/asm-alpha/mmzone.h 10 Aug 2003 07:30:53 -0000
@@ -77,6 +77,7 @@
NODE_DATA(kvaddr_to_nid(kaddr))->valid_addr_bitmap)

#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)

#define VALID_PAGE(page) (((page) - mem_map) < max_mapnr)

Index: linux-2.6/include/asm-alpha/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-alpha/page.h,v
retrieving revision 1.12
diff -u -r1.12 page.h
--- linux-2.6/include/asm-alpha/page.h 13 Jan 2003 23:24:04 -0000 1.12
+++ linux-2.6/include/asm-alpha/page.h 10 Aug 2003 07:30:53 -0000
@@ -90,6 +90,7 @@
#define pfn_to_page(pfn) (mem_map + (pfn))
#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)

#define pfn_valid(pfn) ((pfn) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
Index: linux-2.6/include/asm-arm26/memory.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-arm26/memory.h,v
retrieving revision 1.2
diff -u -r1.2 memory.h
--- linux-2.6/include/asm-arm26/memory.h 8 Jun 2003 16:21:42 -0000 1.2
+++ linux-2.6/include/asm-arm26/memory.h 10 Aug 2003 07:30:53 -0000
@@ -86,6 +86,7 @@
#define pfn_valid(pfn) ((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr))

#define virt_to_page(kaddr) (pfn_to_page(__pa(kaddr) >> PAGE_SHIFT))
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)
#define virt_addr_valid(kaddr) ((int)(kaddr) >= PAGE_OFFSET && (int)(kaddr) < (unsigned long)high_memory)

/*
Index: linux-2.6/include/asm-cris/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-cris/page.h,v
retrieving revision 1.7
diff -u -r1.7 page.h
--- linux-2.6/include/asm-cris/page.h 10 Jul 2003 17:33:07 -0000 1.7
+++ linux-2.6/include/asm-cris/page.h 10 Aug 2003 07:30:53 -0000
@@ -50,6 +50,7 @@
*/

#define virt_to_page(kaddr) (mem_map + (((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT))
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)
#define VALID_PAGE(page) (((page) - mem_map) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid((kaddr) >> PAGE_SHIFT)

Index: linux-2.6/include/asm-h8300/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-h8300/page.h,v
retrieving revision 1.2
diff -u -r1.2 page.h
--- linux-2.6/include/asm-h8300/page.h 17 Apr 2003 23:27:57 -0000 1.2
+++ linux-2.6/include/asm-h8300/page.h 10 Aug 2003 07:30:53 -0000
@@ -84,7 +84,7 @@

#define MAP_NR(addr) (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT)
#define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
-#define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)
#define page_to_virt(page) ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
#define VALID_PAGE(page) ((page - mem_map) < max_mapnr)

Index: linux-2.6/include/asm-i386/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-i386/page.h,v
retrieving revision 1.25
diff -u -r1.25 page.h
--- linux-2.6/include/asm-i386/page.h 31 Mar 2003 22:29:20 -0000 1.25
+++ linux-2.6/include/asm-i386/page.h 10 Aug 2003 07:30:53 -0000
@@ -133,6 +133,7 @@
#define pfn_valid(pfn) ((pfn) < max_mapnr)
#endif /* !CONFIG_DISCONTIGMEM */
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)

#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)

Index: linux-2.6/include/asm-ia64/mmzone.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-ia64/mmzone.h,v
retrieving revision 1.5
diff -u -r1.5 mmzone.h
--- linux-2.6/include/asm-ia64/mmzone.h 6 Mar 2003 21:34:32 -0000 1.5
+++ linux-2.6/include/asm-ia64/mmzone.h 10 Aug 2003 07:30:53 -0000
@@ -63,6 +63,7 @@
(VALID_MEM_KADDR(_kvtp)) \
? BANK_MEM_MAP_BASE(_kvtp) + BANK_MAP_NR(_kvtp) \
: NULL;})
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)

/*
* Given a page struct entry, return the physical address that the page struct represents.
Index: linux-2.6/include/asm-ia64/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-ia64/page.h,v
retrieving revision 1.18
diff -u -r1.18 page.h
--- linux-2.6/include/asm-ia64/page.h 21 Jun 2003 16:19:17 -0000 1.18
+++ linux-2.6/include/asm-ia64/page.h 10 Aug 2003 07:30:53 -0000
@@ -101,6 +101,7 @@
# define pfn_valid(pfn) ((pfn) < max_mapnr)
# endif
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)
#define page_to_pfn(page) ((unsigned long) (page - mem_map))
#define pfn_to_page(pfn) (mem_map + (pfn))
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
Index: linux-2.6/include/asm-m68k/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-m68k/page.h,v
retrieving revision 1.11
diff -u -r1.11 page.h
--- linux-2.6/include/asm-m68k/page.h 13 Apr 2003 09:55:02 -0000 1.11
+++ linux-2.6/include/asm-m68k/page.h 10 Aug 2003 07:30:53 -0000
@@ -179,6 +179,7 @@
#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT)

#define virt_to_page(kaddr) (mem_map + (((unsigned long)(kaddr)-PAGE_OFFSET) >> PAGE_SHIFT))
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)
#define page_to_virt(page) ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)

#define pfn_to_page(pfn) virt_to_page(pfn_to_virt(pfn))
Index: linux-2.6/include/asm-m68knommu/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-m68knommu/page.h,v
retrieving revision 1.4
diff -u -r1.4 page.h
--- linux-2.6/include/asm-m68knommu/page.h 7 Jul 2003 00:20:14 -0000 1.4
+++ linux-2.6/include/asm-m68knommu/page.h 10 Aug 2003 07:30:53 -0000
@@ -85,6 +85,7 @@
#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT)

#define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)
#define page_to_virt(page) ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
#define VALID_PAGE(page) ((page - mem_map) < max_mapnr)

Index: linux-2.6/include/asm-mips/mmzone.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-mips/mmzone.h,v
retrieving revision 1.2
diff -u -r1.2 mmzone.h
--- linux-2.6/include/asm-mips/mmzone.h 1 Aug 2003 05:40:55 -0000 1.2
+++ linux-2.6/include/asm-mips/mmzone.h 10 Aug 2003 07:30:54 -0000
@@ -84,6 +84,7 @@
((((page)-(page)->zone->zone_mem_map) + (page)->zone->zone_start_pfn) \
<< PAGE_SHIFT)
#define virt_to_page(kaddr) pfn_to_page(MIPS64_NR(kaddr))
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)

#define pfn_valid(pfn) ((pfn) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
Index: linux-2.6/include/asm-mips/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-mips/page.h,v
retrieving revision 1.7
diff -u -r1.7 page.h
--- linux-2.6/include/asm-mips/page.h 1 Aug 2003 05:40:55 -0000 1.7
+++ linux-2.6/include/asm-mips/page.h 10 Aug 2003 07:30:54 -0000
@@ -115,6 +115,7 @@
#define pfn_to_page(pfn) (mem_map + (pfn))
#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)

#define pfn_valid(pfn) ((pfn) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
Index: linux-2.6/include/asm-parisc/mmzone.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-parisc/mmzone.h,v
retrieving revision 1.2
diff -u -r1.2 mmzone.h
--- linux-2.6/include/asm-parisc/mmzone.h 30 Oct 2002 23:00:20 -0000 1.2
+++ linux-2.6/include/asm-parisc/mmzone.h 10 Aug 2003 07:30:54 -0000
@@ -24,6 +24,7 @@
+ ((paddr) >> PAGE_SHIFT))

#define virt_to_page(kvaddr) phys_to_page(__pa(kvaddr))
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)

/* This is kind of bogus, need to investigate performance of doing it right */
#define VALID_PAGE(page) ((page - mem_map) < max_mapnr)
Index: linux-2.6/include/asm-parisc/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-parisc/page.h,v
retrieving revision 1.5
diff -u -r1.5 page.h
--- linux-2.6/include/asm-parisc/page.h 13 Jan 2003 23:24:04 -0000 1.5
+++ linux-2.6/include/asm-parisc/page.h 10 Aug 2003 07:30:54 -0000
@@ -106,6 +106,7 @@

#ifndef CONFIG_DISCONTIGMEM
#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT))
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)
#define VALID_PAGE(page) ((page - mem_map) < max_mapnr)
#endif /* !CONFIG_DISCONTIGMEM */

Index: linux-2.6/include/asm-ppc/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-ppc/page.h,v
retrieving revision 1.14
diff -u -r1.14 page.h
--- linux-2.6/include/asm-ppc/page.h 26 Feb 2003 03:12:12 -0000 1.14
+++ linux-2.6/include/asm-ppc/page.h 10 Aug 2003 07:30:54 -0000
@@ -123,6 +123,7 @@
#define pfn_to_page(pfn) (mem_map + ((pfn) - PPC_PGSTART))
#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PPC_PGSTART)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)

#define pfn_valid(pfn) (((pfn) - PPC_PGSTART) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
Index: linux-2.6/include/asm-ppc64/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-ppc64/page.h,v
retrieving revision 1.14
diff -u -r1.14 page.h
--- linux-2.6/include/asm-ppc64/page.h 5 May 2003 00:57:02 -0000 1.14
+++ linux-2.6/include/asm-ppc64/page.h 10 Aug 2003 07:30:54 -0000
@@ -198,6 +198,7 @@
#endif

#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)

#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)

Index: linux-2.6/include/asm-s390/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-s390/page.h,v
retrieving revision 1.10
diff -u -r1.10 page.h
--- linux-2.6/include/asm-s390/page.h 14 Apr 2003 18:27:30 -0000 1.10
+++ linux-2.6/include/asm-s390/page.h 10 Aug 2003 07:30:54 -0000
@@ -174,6 +174,7 @@
#define pfn_to_page(pfn) (mem_map + (pfn))
#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)

#define pfn_valid(pfn) ((pfn) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
Index: linux-2.6/include/asm-sh/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-sh/page.h,v
retrieving revision 1.5
diff -u -r1.5 page.h
--- linux-2.6/include/asm-sh/page.h 2 Jul 2003 05:34:18 -0000 1.5
+++ linux-2.6/include/asm-sh/page.h 10 Aug 2003 07:30:54 -0000
@@ -94,6 +94,7 @@
#define pfn_to_page(pfn) (mem_map + (pfn) - PFN_START)
#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PFN_START)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)
#define pfn_valid(pfn) (((pfn) - PFN_START) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)

Index: linux-2.6/include/asm-sparc/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-sparc/page.h,v
retrieving revision 1.10
diff -u -r1.10 page.h
--- linux-2.6/include/asm-sparc/page.h 3 May 2003 17:28:36 -0000 1.10
+++ linux-2.6/include/asm-sparc/page.h 10 Aug 2003 07:30:54 -0000
@@ -165,6 +165,7 @@
#define pfn_to_page(pfn) (mem_map + (pfn))
#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT))
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)
#define pfn_valid(pfn) ((pfn) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)

Index: linux-2.6/include/asm-sparc64/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-sparc64/page.h,v
retrieving revision 1.13
diff -u -r1.13 page.h
--- linux-2.6/include/asm-sparc64/page.h 13 Jan 2003 23:24:04 -0000 1.13
+++ linux-2.6/include/asm-sparc64/page.h 10 Aug 2003 07:30:54 -0000
@@ -123,6 +123,7 @@
#define pfn_to_page(pfn) (mem_map + ((pfn)-(pfn_base)))
#define page_to_pfn(page) ((unsigned long)(((page) - mem_map) + pfn_base))
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr)>>PAGE_SHIFT)
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)

#define pfn_valid(pfn) (((pfn)-(pfn_base)) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
Index: linux-2.6/include/asm-um/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-um/page.h,v
retrieving revision 1.5
diff -u -r1.5 page.h
--- linux-2.6/include/asm-um/page.h 6 Feb 2003 04:13:38 -0000 1.5
+++ linux-2.6/include/asm-um/page.h 10 Aug 2003 07:30:54 -0000
@@ -36,6 +36,7 @@
extern struct page *phys_to_page(unsigned long phys);

#define virt_to_page(v) (phys_to_page(__pa(v)))
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)

extern struct page *page_mem_map(struct page *page);

Index: linux-2.6/include/asm-v850/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-v850/page.h,v
retrieving revision 1.4
diff -u -r1.4 page.h
--- linux-2.6/include/asm-v850/page.h 13 Apr 2003 09:55:02 -0000 1.4
+++ linux-2.6/include/asm-v850/page.h 10 Aug 2003 07:30:54 -0000
@@ -127,6 +127,7 @@
#define MAP_NR(kaddr) \
(((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT)
#define virt_to_page(kaddr) (mem_map + MAP_NR (kaddr))
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)
#define page_to_virt(page) \
((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)

Index: linux-2.6/include/asm-x86_64/page.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/asm-x86_64/page.h,v
retrieving revision 1.12
diff -u -r1.12 page.h
--- linux-2.6/include/asm-x86_64/page.h 21 Jun 2003 16:18:33 -0000 1.12
+++ linux-2.6/include/asm-x86_64/page.h 10 Aug 2003 07:30:54 -0000
@@ -122,6 +122,7 @@
#endif

#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_to_pageoff(kaddr) ((unsigned long)(kaddr) & ~PAGE_MASK)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)

--
Hideaki YOSHIFUJI @ USAGI Project <yosh...@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA

David S. Miller

unread,
Aug 10, 2003, 5:20:08 AM8/10/03
to
On Sun, 10 Aug 2003 04:05:56 -0500
Matt Mackall <m...@selenic.com> wrote:

> > With some name like "virt_to_pageoff()" it sounds like a great
> > idea.
>
> Had the same thought. Nowhere good to stick it, so I put it in mm.

That looks fine to me.

Yoshfuji, feel free to do your conversions, and use this
linux/mm.h placement of the virt_to_pageoff() macro instead
of having to put it into every asm header.

YOSHIFUJI Hideaki / 吉藤英明

unread,
Aug 10, 2003, 7:10:07 AM8/10/03
to
[2/9] convert cryptoapi to virt_to_pageoff().

Index: linux-2.6/crypto/hmac.c
===================================================================
RCS file: /home/cvs/linux-2.5/crypto/hmac.c,v
retrieving revision 1.3
diff -u -r1.3 hmac.c
--- linux-2.6/crypto/hmac.c 19 Nov 2002 03:02:31 -0000 1.3
+++ linux-2.6/crypto/hmac.c 10 Aug 2003 08:40:52 -0000
@@ -26,7 +26,7 @@
struct scatterlist tmp;

tmp.page = virt_to_page(key);
- tmp.offset = ((long)key & ~PAGE_MASK);
+ tmp.offset = virt_to_pageoff(key);
tmp.length = keylen;
crypto_digest_digest(tfm, &tmp, 1, key);

@@ -71,7 +71,7 @@
ipad[i] ^= 0x36;

tmp.page = virt_to_page(ipad);
- tmp.offset = ((long)ipad & ~PAGE_MASK);
+ tmp.offset = virt_to_pageoff(ipad);
tmp.length = crypto_tfm_alg_blocksize(tfm);

crypto_digest_init(tfm);
@@ -105,14 +105,14 @@
opad[i] ^= 0x5c;

tmp.page = virt_to_page(opad);
- tmp.offset = ((long)opad & ~PAGE_MASK);
+ tmp.offset = virt_to_pageoff(opad);
tmp.length = crypto_tfm_alg_blocksize(tfm);

crypto_digest_init(tfm);
crypto_digest_update(tfm, &tmp, 1);

tmp.page = virt_to_page(out);
- tmp.offset = ((long)out & ~PAGE_MASK);
+ tmp.offset = virt_to_pageoff(out);
tmp.length = crypto_tfm_alg_digestsize(tfm);

crypto_digest_update(tfm, &tmp, 1);
Index: linux-2.6/crypto/tcrypt.c
===================================================================
RCS file: /home/cvs/linux-2.5/crypto/tcrypt.c,v
retrieving revision 1.13
diff -u -r1.13 tcrypt.c
--- linux-2.6/crypto/tcrypt.c 1 Aug 2003 22:02:12 -0000 1.13
+++ linux-2.6/crypto/tcrypt.c 10 Aug 2003 08:40:52 -0000
@@ -96,7 +96,7 @@

p = md5_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = strlen(md5_tv[i].plaintext);

crypto_digest_init(tfm);
@@ -119,12 +119,12 @@

p = &xbuf[IDX1];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 13;

p = &xbuf[IDX2];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 13;

memset(result, 0, sizeof (result));
@@ -173,7 +173,7 @@

p = hmac_md5_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = strlen(hmac_md5_tv[i].plaintext);

klen = strlen(hmac_md5_tv[i].key);
@@ -195,12 +195,12 @@

p = &xbuf[IDX1];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 16;

p = &xbuf[IDX2];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 12;

memset(result, 0, sizeof (result));
@@ -250,7 +250,7 @@

p = hmac_sha1_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = strlen(hmac_sha1_tv[i].plaintext);

klen = strlen(hmac_sha1_tv[i].key);
@@ -274,12 +274,12 @@

p = &xbuf[IDX1];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 16;

p = &xbuf[IDX2];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 12;

memset(result, 0, sizeof (result));
@@ -329,7 +329,7 @@

p = hmac_sha256_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = strlen(hmac_sha256_tv[i].plaintext);

klen = strlen(hmac_sha256_tv[i].key);
@@ -383,7 +383,7 @@

p = md4_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = strlen(md4_tv[i].plaintext);

crypto_digest_digest(tfm, sg, 1, result);
@@ -433,7 +433,7 @@

p = sha1_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = strlen(sha1_tv[i].plaintext);

crypto_digest_init(tfm);
@@ -456,12 +456,12 @@

p = &xbuf[IDX1];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 28;

p = &xbuf[IDX2];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 28;

memset(result, 0, sizeof (result));
@@ -508,7 +508,7 @@

p = sha256_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = strlen(sha256_tv[i].plaintext);

crypto_digest_init(tfm);
@@ -531,12 +531,12 @@

p = &xbuf[IDX1];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 28;

p = &xbuf[IDX2];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 28;

memset(result, 0, sizeof (result));
@@ -584,7 +584,7 @@

p = sha384_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = strlen(sha384_tv[i].plaintext);

crypto_digest_init(tfm);
@@ -636,7 +636,7 @@

p = sha512_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = strlen(sha512_tv[i].plaintext);

crypto_digest_init(tfm);
@@ -701,7 +701,7 @@

p = des_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = len;
ret = crypto_cipher_encrypt(tfm, sg, sg, len);
if (ret) {
@@ -738,12 +738,12 @@

p = &xbuf[IDX1];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 8;

p = &xbuf[IDX2];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 8;

ret = crypto_cipher_encrypt(tfm, sg, sg, 16);
@@ -801,17 +801,17 @@

p = &xbuf[IDX3];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 14;

p = &xbuf[IDX4];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 10;

p = &xbuf[IDX5];
sg[2].page = virt_to_page(p);
- sg[2].offset = ((long) p & ~PAGE_MASK);
+ sg[2].offset = virt_to_pageoff(p);
sg[2].length = 8;

ret = crypto_cipher_encrypt(tfm, sg, sg, 32);
@@ -872,22 +872,22 @@

p = &xbuf[IDX3];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 2;

p = &xbuf[IDX4];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 1;

p = &xbuf[IDX5];
sg[2].page = virt_to_page(p);
- sg[2].offset = ((long) p & ~PAGE_MASK);
+ sg[2].offset = virt_to_pageoff(p);
sg[2].length = 3;

p = &xbuf[IDX6];
sg[3].page = virt_to_page(p);
- sg[3].offset = ((long) p & ~PAGE_MASK);
+ sg[3].offset = virt_to_pageoff(p);
sg[3].length = 18;

ret = crypto_cipher_encrypt(tfm, sg, sg, 24);
@@ -956,27 +956,27 @@

p = &xbuf[IDX3];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 2;

p = &xbuf[IDX4];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 2;

p = &xbuf[IDX5];
sg[2].page = virt_to_page(p);
- sg[2].offset = ((long) p & ~PAGE_MASK);
+ sg[2].offset = virt_to_pageoff(p);
sg[2].length = 2;

p = &xbuf[IDX6];
sg[3].page = virt_to_page(p);
- sg[3].offset = ((long) p & ~PAGE_MASK);
+ sg[3].offset = virt_to_pageoff(p);
sg[3].length = 2;

p = &xbuf[IDX7];
sg[4].page = virt_to_page(p);
- sg[4].offset = ((long) p & ~PAGE_MASK);
+ sg[4].offset = virt_to_pageoff(p);
sg[4].length = 8;

ret = crypto_cipher_encrypt(tfm, sg, sg, 16);
@@ -1040,42 +1040,42 @@

p = &xbuf[IDX1];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 1;

p = &xbuf[IDX2];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 1;

p = &xbuf[IDX3];
sg[2].page = virt_to_page(p);
- sg[2].offset = ((long) p & ~PAGE_MASK);
+ sg[2].offset = virt_to_pageoff(p);
sg[2].length = 1;

p = &xbuf[IDX4];
sg[3].page = virt_to_page(p);
- sg[3].offset = ((long) p & ~PAGE_MASK);
+ sg[3].offset = virt_to_pageoff(p);
sg[3].length = 1;

p = &xbuf[IDX5];
sg[4].page = virt_to_page(p);
- sg[4].offset = ((long) p & ~PAGE_MASK);
+ sg[4].offset = virt_to_pageoff(p);
sg[4].length = 1;

p = &xbuf[IDX6];
sg[5].page = virt_to_page(p);
- sg[5].offset = ((long) p & ~PAGE_MASK);
+ sg[5].offset = virt_to_pageoff(p);
sg[5].length = 1;

p = &xbuf[IDX7];
sg[6].page = virt_to_page(p);
- sg[6].offset = ((long) p & ~PAGE_MASK);
+ sg[6].offset = virt_to_pageoff(p);
sg[6].length = 1;

p = &xbuf[IDX8];
sg[7].page = virt_to_page(p);
- sg[7].offset = ((long) p & ~PAGE_MASK);
+ sg[7].offset = virt_to_pageoff(p);
sg[7].length = 1;

ret = crypto_cipher_encrypt(tfm, sg, sg, 8);
@@ -1117,7 +1117,7 @@

p = des_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = len;

ret = crypto_cipher_decrypt(tfm, sg, sg, sg[0].length);
@@ -1155,12 +1155,12 @@

p = &xbuf[IDX1];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 8;

p = &xbuf[IDX2];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 8;

ret = crypto_cipher_decrypt(tfm, sg, sg, 16);
@@ -1207,17 +1207,17 @@

p = &xbuf[IDX1];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 3;

p = &xbuf[IDX2];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 12;

p = &xbuf[IDX3];
sg[2].page = virt_to_page(p);
- sg[2].offset = ((long) p & ~PAGE_MASK);
+ sg[2].offset = virt_to_pageoff(p);
sg[2].length = 1;

ret = crypto_cipher_decrypt(tfm, sg, sg, 16);
@@ -1284,7 +1284,7 @@
p = des_tv[i].plaintext;

sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = len;

crypto_cipher_set_iv(tfm, des_tv[i].iv,
@@ -1339,12 +1339,12 @@

p = &xbuf[IDX1];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 13;

p = &xbuf[IDX2];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 11;

crypto_cipher_set_iv(tfm, des_tv[i].iv, crypto_tfm_alg_ivsize(tfm));
@@ -1392,7 +1392,7 @@
p = des_tv[i].plaintext;

sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = len;

crypto_cipher_set_iv(tfm, des_tv[i].iv,
@@ -1440,12 +1440,12 @@

p = &xbuf[IDX1];
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = 4;

p = &xbuf[IDX2];
sg[1].page = virt_to_page(p);
- sg[1].offset = ((long) p & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(p);
sg[1].length = 4;

crypto_cipher_set_iv(tfm, des_tv[i].iv, crypto_tfm_alg_ivsize(tfm));
@@ -1516,7 +1516,7 @@

p = des_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = len;
ret = crypto_cipher_encrypt(tfm, sg, sg, len);
if (ret) {
@@ -1559,7 +1559,7 @@

p = des_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = len;
ret = crypto_cipher_decrypt(tfm, sg, sg, len);
if (ret) {
@@ -1622,7 +1622,7 @@

p = bf_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = bf_tv[i].plen;
ret = crypto_cipher_encrypt(tfm, sg, sg, sg[0].length);
if (ret) {
@@ -1664,7 +1664,7 @@

p = bf_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = bf_tv[i].plen;
ret = crypto_cipher_decrypt(tfm, sg, sg, sg[0].length);
if (ret) {
@@ -1713,7 +1713,7 @@
p = bf_tv[i].plaintext;

sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = bf_tv[i].plen;

crypto_cipher_set_iv(tfm, bf_tv[i].iv,
@@ -1758,7 +1758,7 @@
p = bf_tv[i].plaintext;

sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = bf_tv[i].plen;

crypto_cipher_set_iv(tfm, bf_tv[i].iv,
@@ -1827,7 +1827,7 @@

p = tf_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = tf_tv[i].plen;
ret = crypto_cipher_encrypt(tfm, sg, sg, sg[0].length);
if (ret) {
@@ -1869,7 +1869,7 @@

p = tf_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = tf_tv[i].plen;
ret = crypto_cipher_decrypt(tfm, sg, sg, sg[0].length);
if (ret) {
@@ -1918,7 +1918,7 @@
p = tf_tv[i].plaintext;

sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = tf_tv[i].plen;

crypto_cipher_set_iv(tfm, tf_tv[i].iv,
@@ -1964,7 +1964,7 @@
p = tf_tv[i].plaintext;

sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = tf_tv[i].plen;

crypto_cipher_set_iv(tfm, tf_tv[i].iv,
@@ -2028,7 +2028,7 @@

p = serp_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = sizeof(serp_tv[i].plaintext);
ret = crypto_cipher_encrypt(tfm, sg, sg, sg[0].length);
if (ret) {
@@ -2068,7 +2068,7 @@

p = serp_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = sizeof(serp_tv[i].plaintext);
ret = crypto_cipher_decrypt(tfm, sg, sg, sg[0].length);
if (ret) {
@@ -2131,7 +2131,7 @@

p = aes_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = aes_tv[i].plen;
ret = crypto_cipher_encrypt(tfm, sg, sg, sg[0].length);
if (ret) {
@@ -2173,7 +2173,7 @@

p = aes_tv[i].plaintext;
sg[0].page = virt_to_page(p);
- sg[0].offset = ((long) p & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(p);
sg[0].length = aes_tv[i].plen;
ret = crypto_cipher_decrypt(tfm, sg, sg, sg[0].length);
if (ret) {

--
Hideaki YOSHIFUJI @ USAGI Project <yosh...@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA

YOSHIFUJI Hideaki / 吉藤英明

unread,
Aug 10, 2003, 7:10:07 AM8/10/03
to
In article <20030810020444...@redhat.com> (at Sun, 10 Aug 2003 02:04:44 -0700), "David S. Miller" <da...@redhat.com> says:

> Yoshfuji, feel free to do your conversions, and use this
> linux/mm.h placement of the virt_to_pageoff() macro instead
> of having to put it into every asm header.

[1/9] introduce virt_to_pageoff().

Index: linux-2.6/Documentation/DMA-mapping.txt
===================================================================
RCS file: /home/cvs/linux-2.5/Documentation/DMA-mapping.txt,v
retrieving revision 1.17
diff -u -r1.17 DMA-mapping.txt
--- linux-2.6/Documentation/DMA-mapping.txt 1 Aug 2003 19:02:34 -0000 1.17
+++ linux-2.6/Documentation/DMA-mapping.txt 10 Aug 2003 08:40:50 -0000
@@ -689,7 +689,7 @@
and offset using something like this:

struct page *page = virt_to_page(ptr);
- unsigned long offset = ((unsigned long)ptr & ~PAGE_MASK);
+ unsigned long offset = virt_to_pageoff(ptr);

Here are the interfaces:

Index: linux-2.6/include/linux/mm.h
===================================================================
RCS file: /home/cvs/linux-2.5/include/linux/mm.h,v
retrieving revision 1.125
diff -u -r1.125 mm.h
--- linux-2.6/include/linux/mm.h 1 Aug 2003 17:02:32 -0000 1.125
+++ linux-2.6/include/linux/mm.h 10 Aug 2003 08:40:55 -0000


@@ -400,6 +400,8 @@
#define VM_FAULT_MINOR 1
#define VM_FAULT_MAJOR 2

+#define virt_to_pageoff(p) ((unsigned long)(p) & ~PAGE_MASK)
+
extern void show_free_areas(void);

struct page *shmem_nopage(struct vm_area_struct * vma,

--
Hideaki YOSHIFUJI @ USAGI Project <yosh...@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA

YOSHIFUJI Hideaki / 吉藤英明

unread,
Aug 10, 2003, 7:10:12 AM8/10/03
to
[3/9] convert net virt_to_pageoff().

Index: linux-2.6/net/ipv6/addrconf.c
===================================================================
RCS file: /home/cvs/linux-2.5/net/ipv6/addrconf.c,v
retrieving revision 1.48
diff -u -r1.48 addrconf.c
--- linux-2.6/net/ipv6/addrconf.c 25 Jul 2003 23:58:59 -0000 1.48
+++ linux-2.6/net/ipv6/addrconf.c 10 Aug 2003 08:40:55 -0000
@@ -1110,10 +1110,10 @@
struct scatterlist sg[2];

sg[0].page = virt_to_page(idev->entropy);
- sg[0].offset = ((long) idev->entropy & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(idev->entropy);


sg[0].length = 8;

sg[1].page = virt_to_page(eui64);
- sg[1].offset = ((long) eui64 & ~PAGE_MASK);
+ sg[1].offset = virt_to_pageoff(eui64);


sg[1].length = 8;

dev = idev->dev;
Index: linux-2.6/net/sunrpc/auth_gss/gss_krb5_crypto.c
===================================================================
RCS file: /home/cvs/linux-2.5/net/sunrpc/auth_gss/gss_krb5_crypto.c,v
retrieving revision 1.3
diff -u -r1.3 gss_krb5_crypto.c
--- linux-2.6/net/sunrpc/auth_gss/gss_krb5_crypto.c 4 Feb 2003 17:55:46 -0000 1.3
+++ linux-2.6/net/sunrpc/auth_gss/gss_krb5_crypto.c 10 Aug 2003 08:40:55 -0000
@@ -75,7 +75,7 @@

memcpy(out, in, length);
sg[0].page = virt_to_page(out);
- sg[0].offset = ((long)out & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(out);
sg[0].length = length;

ret = crypto_cipher_encrypt(tfm, sg, sg, length);
@@ -114,7 +114,7 @@

memcpy(out, in, length);
sg[0].page = virt_to_page(out);
- sg[0].offset = ((long)out & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(out);
sg[0].length = length;

ret = crypto_cipher_decrypt(tfm, sg, sg, length);
@@ -151,7 +151,7 @@
goto out_free_tfm;
}
sg[0].page = virt_to_page(input->data);
- sg[0].offset = ((long)input->data & ~PAGE_MASK);
+ sg[0].offset = virt_to_pageoff(input->data);
sg[0].length = input->len;

crypto_digest_init(tfm);

--
Hideaki YOSHIFUJI @ USAGI Project <yosh...@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA

YOSHIFUJI Hideaki / 吉藤英明

unread,
Aug 10, 2003, 7:20:06 AM8/10/03
to
[5/9] convert drivers/ide to virt_to_pageoff().

Index: linux-2.6/drivers/ide/ide-dma.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/ide/ide-dma.c,v
retrieving revision 1.60
diff -u -r1.60 ide-dma.c
--- linux-2.6/drivers/ide/ide-dma.c 7 Aug 2003 07:35:12 -0000 1.60
+++ linux-2.6/drivers/ide/ide-dma.c 10 Aug 2003 08:40:52 -0000
@@ -255,7 +255,7 @@
#endif
memset(&sg[nents], 0, sizeof(*sg));
sg[nents].page = virt_to_page(virt_addr);
- sg[nents].offset = (unsigned long) virt_addr & ~PAGE_MASK;
+ sg[nents].offset = virt_to_pageoff(virt_addr);
sg[nents].length = 128 * SECTOR_SIZE;
nents++;
virt_addr = virt_addr + (128 * SECTOR_SIZE);
@@ -263,7 +263,7 @@
}
memset(&sg[nents], 0, sizeof(*sg));
sg[nents].page = virt_to_page(virt_addr);
- sg[nents].offset = (unsigned long) virt_addr & ~PAGE_MASK;
+ sg[nents].offset = virt_to_pageoff(virt_addr);
sg[nents].length = sector_count * SECTOR_SIZE;
nents++;

Index: linux-2.6/drivers/ide/arm/icside.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/ide/arm/icside.c,v
retrieving revision 1.8
diff -u -r1.8 icside.c
--- linux-2.6/drivers/ide/arm/icside.c 19 May 2003 17:48:30 -0000 1.8
+++ linux-2.6/drivers/ide/arm/icside.c 10 Aug 2003 08:40:52 -0000
@@ -233,7 +233,7 @@

memset(sg, 0, sizeof(*sg));
sg->page = virt_to_page(rq->buffer);
- sg->offset = ((unsigned long)rq->buffer) & ~PAGE_MASK;
+ sg->offset = virt_to_pageoff(rq->buffer);
sg->length = rq->nr_sectors * SECTOR_SIZE;
nents = 1;
} else {
Index: linux-2.6/drivers/ide/ppc/pmac.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/ide/ppc/pmac.c,v
retrieving revision 1.13
diff -u -r1.13 pmac.c
--- linux-2.6/drivers/ide/ppc/pmac.c 6 Jul 2003 19:33:43 -0000 1.13
+++ linux-2.6/drivers/ide/ppc/pmac.c 10 Aug 2003 08:40:52 -0000
@@ -971,7 +971,7 @@
if (sector_count > 127) {
memset(&sg[nents], 0, sizeof(*sg));
sg[nents].page = virt_to_page(virt_addr);
- sg[nents].offset = (unsigned long) virt_addr & ~PAGE_MASK;
+ sg[nents].offset = virt_to_pageoff(virt_addr);
sg[nents].length = 127 * SECTOR_SIZE;
nents++;
virt_addr = virt_addr + (127 * SECTOR_SIZE);
@@ -979,7 +979,7 @@
}
memset(&sg[nents], 0, sizeof(*sg));
sg[nents].page = virt_to_page(virt_addr);
- sg[nents].offset = (unsigned long) virt_addr & ~PAGE_MASK;
+ sg[nents].offset = virt_to_pageoff(virt_addr);
sg[nents].length = sector_count * SECTOR_SIZE;
nents++;

--
Hideaki YOSHIFUJI @ USAGI Project <yosh...@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA

YOSHIFUJI Hideaki / 吉藤英明

unread,
Aug 10, 2003, 7:20:05 AM8/10/03
to
[4/9] convert drivers/block to virt_to_pageoff().

Index: linux-2.6/drivers/block/cryptoloop.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/block/cryptoloop.c,v
retrieving revision 1.3
diff -u -r1.3 cryptoloop.c
--- linux-2.6/drivers/block/cryptoloop.c 17 Jul 2003 17:43:53 -0000 1.3
+++ linux-2.6/drivers/block/cryptoloop.c 10 Aug 2003 08:40:52 -0000
@@ -112,11 +112,11 @@
iv[0] = cpu_to_le32(IV & 0xffffffff);

sg_in.page = virt_to_page(in);
- sg_in.offset = (unsigned long)in & ~PAGE_MASK;
+ sg_in.offset = virt_to_pageoff(in);
sg_in.length = sz;

sg_out.page = virt_to_page(out);
- sg_out.offset = (unsigned long)out & ~PAGE_MASK;
+ sg_out.offset = virt_to_pageoff(out);
sg_out.length = sz;

encdecfunc(tfm, &sg_out, &sg_in, sz, (u8 *)iv);

--
Hideaki YOSHIFUJI @ USAGI Project <yosh...@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA

YOSHIFUJI Hideaki / 吉藤英明

unread,
Aug 10, 2003, 7:20:08 AM8/10/03
to
[7/9] convert drivers/scsi to virt_to_pageoff().

Index: linux-2.6/drivers/scsi/3w-xxxx.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/scsi/3w-xxxx.c,v
retrieving revision 1.34
diff -u -r1.34 3w-xxxx.c
--- linux-2.6/drivers/scsi/3w-xxxx.c 17 Jul 2003 17:43:53 -0000 1.34
+++ linux-2.6/drivers/scsi/3w-xxxx.c 10 Aug 2003 09:30:32 -0000
@@ -2112,7 +2112,7 @@
if (cmd->request_bufflen == 0)
return 0;

- mapping = pci_map_page(pdev, virt_to_page(cmd->request_buffer), ((unsigned long)cmd->request_buffer & ~PAGE_MASK), cmd->request_bufflen, dma_dir);
+ mapping = pci_map_page(pdev, virt_to_page(cmd->request_buffer), virt_to_pageoff(cmd->request_buffer), cmd->request_bufflen, dma_dir);

if (mapping == 0) {
printk(KERN_WARNING "3w-xxxx: tw_map_scsi_single_data(): pci_map_page() failed.\n");
Index: linux-2.6/drivers/scsi/ide-scsi.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/scsi/ide-scsi.c,v
retrieving revision 1.80
diff -u -r1.80 ide-scsi.c
--- linux-2.6/drivers/scsi/ide-scsi.c 18 Jul 2003 20:29:54 -0000 1.80
+++ linux-2.6/drivers/scsi/ide-scsi.c 10 Aug 2003 09:30:32 -0000
@@ -761,8 +761,8 @@
printk ("ide-scsi: %s: building DMA table for a single buffer (%dkB)\n", drive->name, pc->request_transfer >> 10);
#endif /* IDESCSI_DEBUG_LOG */
bh->bi_io_vec[0].bv_page = virt_to_page(pc->scsi_cmd->request_buffer);
+ bh->bi_io_vec[0].bv_offset = virt_to_pageoff(pc->scsi_cmd->request_buffer);
bh->bi_io_vec[0].bv_len = pc->request_transfer;
- bh->bi_io_vec[0].bv_offset = (unsigned long) pc->scsi_cmd->request_buffer & ~PAGE_MASK;
bh->bi_size = pc->request_transfer;
}
return first_bh;
Index: linux-2.6/drivers/scsi/megaraid.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/scsi/megaraid.c,v
retrieving revision 1.43
diff -u -r1.43 megaraid.c
--- linux-2.6/drivers/scsi/megaraid.c 17 Jul 2003 17:43:53 -0000 1.43
+++ linux-2.6/drivers/scsi/megaraid.c 10 Aug 2003 09:30:32 -0000
@@ -2275,8 +2275,7 @@
if( !cmd->use_sg ) {

page = virt_to_page(cmd->request_buffer);
-
- offset = ((unsigned long)cmd->request_buffer & ~PAGE_MASK);
+ offset = virt_to_pageoff(cmd->request_buffer);

scb->dma_h_bulkdata = pci_map_page(adapter->dev,
page, offset,
Index: linux-2.6/drivers/scsi/qlogicfc.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/scsi/qlogicfc.c,v
retrieving revision 1.34
diff -u -r1.34 qlogicfc.c
--- linux-2.6/drivers/scsi/qlogicfc.c 17 Jul 2003 17:43:53 -0000 1.34
+++ linux-2.6/drivers/scsi/qlogicfc.c 10 Aug 2003 09:30:32 -0000
@@ -1283,8 +1283,7 @@
}
} else if (Cmnd->request_bufflen && Cmnd->sc_data_direction != PCI_DMA_NONE) {
struct page *page = virt_to_page(Cmnd->request_buffer);
- unsigned long offset = ((unsigned long)Cmnd->request_buffer &
- ~PAGE_MASK);
+ unsigned long offset = virt_to_pageoff(Cmnd->request_buffer);
dma_addr_t busaddr = pci_map_page(hostdata->pci_dev,
page, offset,
Cmnd->request_bufflen,
@@ -1927,8 +1926,7 @@
*/
busaddr = pci_map_page(hostdata->pci_dev,
virt_to_page(&hostdata->control_block),
- ((unsigned long) &hostdata->control_block &
- ~PAGE_MASK),
+ virt_to_pageoff(&hostdata->control_block),
sizeof(hostdata->control_block),
PCI_DMA_BIDIRECTIONAL);

Index: linux-2.6/drivers/scsi/sg.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/scsi/sg.c,v
retrieving revision 1.54
diff -u -r1.54 sg.c
--- linux-2.6/drivers/scsi/sg.c 17 Jul 2003 17:43:53 -0000 1.54
+++ linux-2.6/drivers/scsi/sg.c 10 Aug 2003 09:30:33 -0000
@@ -1813,7 +1813,7 @@
break;
}
sclp->page = virt_to_page(p);
- sclp->offset = (unsigned long) p & ~PAGE_MASK;
+ sclp->offset = virt_to_pageoff(p);
sclp->length = ret_sz;

SCSI_LOG_TIMEOUT(5, printk("sg_build_build: k=%d, a=0x%p, len=%d\n",
Index: linux-2.6/drivers/scsi/sym53c8xx.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/scsi/sym53c8xx.c,v
retrieving revision 1.39
diff -u -r1.39 sym53c8xx.c
--- linux-2.6/drivers/scsi/sym53c8xx.c 18 Jul 2003 16:56:15 -0000 1.39
+++ linux-2.6/drivers/scsi/sym53c8xx.c 10 Aug 2003 09:30:33 -0000
@@ -1162,8 +1162,7 @@

mapping = pci_map_page(pdev,
virt_to_page(cmd->request_buffer),
- ((unsigned long)cmd->request_buffer &
- ~PAGE_MASK),
+ virt_to_pageoff(cmd->request_buffer),
cmd->request_bufflen, dma_dir);
__data_mapped(cmd) = 1;
__data_mapping(cmd) = mapping;
Index: linux-2.6/drivers/scsi/arm/scsi.h
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/scsi/arm/scsi.h,v
retrieving revision 1.2
diff -u -r1.2 scsi.h
--- linux-2.6/drivers/scsi/arm/scsi.h 19 May 2003 17:48:30 -0000 1.2
+++ linux-2.6/drivers/scsi/arm/scsi.h 10 Aug 2003 09:30:33 -0000
@@ -23,7 +23,7 @@
BUG_ON(bufs + 1 > max);

sg->page = virt_to_page(SCp->ptr);
- sg->offset = ((unsigned int)SCp->ptr) & ~PAGE_MASK;
+ sg->offset = virt_to_pageoff(SCp->ptr);
sg->length = SCp->this_residual;

if (bufs)

--
Hideaki YOSHIFUJI @ USAGI Project <yosh...@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA

YOSHIFUJI Hideaki / 吉藤英明

unread,
Aug 10, 2003, 7:20:06 AM8/10/03
to
[6/9] convert drivers/net to virt_to_pageoff().

Index: linux-2.6/drivers/net/acenic.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/net/acenic.c,v
retrieving revision 1.36
diff -u -r1.36 acenic.c
--- linux-2.6/drivers/net/acenic.c 1 Aug 2003 22:07:01 -0000 1.36
+++ linux-2.6/drivers/net/acenic.c 10 Aug 2003 08:40:52 -0000
@@ -1960,7 +1960,7 @@
*/
skb_reserve(skb, 2 + 16);
mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
- ((unsigned long)skb->data & ~PAGE_MASK),
+ virt_to_pageoff(skb->data),
ACE_STD_BUFSIZE - (2 + 16),
PCI_DMA_FROMDEVICE);
ap->skb->rx_std_skbuff[idx].skb = skb;
@@ -2026,7 +2026,7 @@
*/
skb_reserve(skb, 2 + 16);
mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
- ((unsigned long)skb->data & ~PAGE_MASK),
+ virt_to_pageoff(skb->data),
ACE_MINI_BUFSIZE - (2 + 16),
PCI_DMA_FROMDEVICE);
ap->skb->rx_mini_skbuff[idx].skb = skb;
@@ -2087,7 +2087,7 @@
*/
skb_reserve(skb, 2 + 16);
mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
- ((unsigned long)skb->data & ~PAGE_MASK),
+ virt_to_pageoff(skb->data),
ACE_JUMBO_BUFSIZE - (2 + 16),
PCI_DMA_FROMDEVICE);
ap->skb->rx_jumbo_skbuff[idx].skb = skb;
@@ -2743,7 +2743,7 @@
struct tx_ring_info *info;

mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
- ((unsigned long) skb->data & ~PAGE_MASK),
+ virt_to_pageoff(skb->data),
skb->len, PCI_DMA_TODEVICE);

info = ap->skb->tx_skbuff + idx;
Index: linux-2.6/drivers/net/sungem.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/net/sungem.c,v
retrieving revision 1.40
diff -u -r1.40 sungem.c
--- linux-2.6/drivers/net/sungem.c 3 Aug 2003 18:34:10 -0000 1.40
+++ linux-2.6/drivers/net/sungem.c 10 Aug 2003 08:40:53 -0000
@@ -725,8 +725,7 @@
skb_put(new_skb, (ETH_FRAME_LEN + RX_OFFSET));
rxd->buffer = cpu_to_le64(pci_map_page(gp->pdev,
virt_to_page(new_skb->data),
- ((unsigned long) new_skb->data &
- ~PAGE_MASK),
+ virt_to_pageoff(new_skb->data),
RX_BUF_ALLOC_SIZE(gp),
PCI_DMA_FROMDEVICE));
skb_reserve(new_skb, RX_OFFSET);
@@ -873,8 +872,7 @@
len = skb->len;
mapping = pci_map_page(gp->pdev,
virt_to_page(skb->data),
- ((unsigned long) skb->data &
- ~PAGE_MASK),
+ virt_to_pageoff(skb->data),
len, PCI_DMA_TODEVICE);
ctrl |= TXDCTRL_SOF | TXDCTRL_EOF | len;
if (gem_intme(entry))
@@ -898,7 +896,7 @@
*/
first_len = skb_headlen(skb);
first_mapping = pci_map_page(gp->pdev, virt_to_page(skb->data),
- ((unsigned long) skb->data & ~PAGE_MASK),
+ virt_to_pageoff(skb->data),
first_len, PCI_DMA_TODEVICE);
entry = NEXT_TX(entry);

@@ -1464,8 +1462,7 @@
skb_put(skb, (ETH_FRAME_LEN + RX_OFFSET));
dma_addr = pci_map_page(gp->pdev,
virt_to_page(skb->data),
- ((unsigned long) skb->data &
- ~PAGE_MASK),
+ virt_to_pageoff(skb->data),
RX_BUF_ALLOC_SIZE(gp),
PCI_DMA_FROMDEVICE);
rxd->buffer = cpu_to_le64(dma_addr);
Index: linux-2.6/drivers/net/sk98lin/skge.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/net/sk98lin/skge.c,v
retrieving revision 1.24
diff -u -r1.24 skge.c
--- linux-2.6/drivers/net/sk98lin/skge.c 1 Aug 2003 19:02:34 -0000 1.24
+++ linux-2.6/drivers/net/sk98lin/skge.c 10 Aug 2003 08:40:53 -0000
@@ -2142,7 +2142,7 @@
*/
PhysAddr = (SK_U64) pci_map_page(pAC->PciDev,
virt_to_page(pMessage->data),
- ((unsigned long) pMessage->data & ~PAGE_MASK),
+ virt_to_pageoff(pMessage->data),
pMessage->len,
PCI_DMA_TODEVICE);
pTxd->VDataLow = (SK_U32) (PhysAddr & 0xffffffff);
@@ -2259,7 +2259,7 @@
*/
PhysAddr = (SK_U64) pci_map_page(pAC->PciDev,
virt_to_page(pMessage->data),
- ((unsigned long) pMessage->data & ~PAGE_MASK),
+ virt_to_pageoff(pMessage->data),
skb_headlen(pMessage),
PCI_DMA_TODEVICE);

@@ -2518,8 +2518,7 @@
Length = pAC->RxBufSize;
PhysAddr = (SK_U64) pci_map_page(pAC->PciDev,
virt_to_page(pMsgBlock->data),
- ((unsigned long) pMsgBlock->data &
- ~PAGE_MASK),
+ virt_to_pageoff(pMsgBlock->data),
pAC->RxBufSize - 2,
PCI_DMA_FROMDEVICE);

--
Hideaki YOSHIFUJI @ USAGI Project <yosh...@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA

YOSHIFUJI Hideaki / 吉藤英明

unread,
Aug 10, 2003, 7:20:08 AM8/10/03
to
[9/9] convert fs/jbd to virt_to_pageoff().

Index: linux-2.6/fs/jbd/journal.c
===================================================================
RCS file: /home/cvs/linux-2.5/fs/jbd/journal.c,v
retrieving revision 1.62
diff -u -r1.62 journal.c
--- linux-2.6/fs/jbd/journal.c 11 Jul 2003 07:04:11 -0000 1.62
+++ linux-2.6/fs/jbd/journal.c 10 Aug 2003 08:40:54 -0000
@@ -278,9 +278,6 @@
* Bit 1 set == buffer copy-out performed (kfree the data after IO)
*/

-static inline unsigned long virt_to_offset(void *p)
-{return ((unsigned long) p) & ~PAGE_MASK;}
-
int journal_write_metadata_buffer(transaction_t *transaction,
struct journal_head *jh_in,
struct journal_head **jh_out,
@@ -318,10 +315,10 @@
if (jh_in->b_frozen_data) {
done_copy_out = 1;
new_page = virt_to_page(jh_in->b_frozen_data);
- new_offset = virt_to_offset(jh_in->b_frozen_data);
+ new_offset = virt_to_pageoff(jh_in->b_frozen_data);
} else {
new_page = jh2bh(jh_in)->b_page;
- new_offset = virt_to_offset(jh2bh(jh_in)->b_data);
+ new_offset = virt_to_pageoff(jh2bh(jh_in)->b_data);
}

mapped_data = kmap_atomic(new_page, KM_USER0);
@@ -358,7 +355,7 @@
address kmapped so that we can clear the escaped
magic number below. */
new_page = virt_to_page(tmp);
- new_offset = virt_to_offset(tmp);
+ new_offset = virt_to_pageoff(tmp);
done_copy_out = 1;
}

--
Hideaki YOSHIFUJI @ USAGI Project <yosh...@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA

YOSHIFUJI Hideaki / 吉藤英明

unread,
Aug 10, 2003, 7:20:10 AM8/10/03
to
[8/9] convert drivers/usb to virt_to_pageoff().

Index: linux-2.6/drivers/usb/misc/usbtest.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/usb/misc/usbtest.c,v
retrieving revision 1.19
diff -u -r1.19 usbtest.c
--- linux-2.6/drivers/usb/misc/usbtest.c 1 Aug 2003 18:12:47 -0000 1.19
+++ linux-2.6/drivers/usb/misc/usbtest.c 10 Aug 2003 08:40:53 -0000
@@ -271,7 +271,7 @@

/* kmalloc pages are always physically contiguous! */
sg [i].page = virt_to_page (buf);
- sg [i].offset = ((unsigned) buf) & ~PAGE_MASK;
+ sg [i].offset = virt_to_pageoff (buf);
sg [i].length = size;

if (vary) {
Index: linux-2.6/drivers/usb/storage/sddr09.c
===================================================================
RCS file: /home/cvs/linux-2.5/drivers/usb/storage/sddr09.c,v
retrieving revision 1.23
diff -u -r1.23 sddr09.c
--- linux-2.6/drivers/usb/storage/sddr09.c 17 Jul 2003 22:58:33 -0000 1.23
+++ linux-2.6/drivers/usb/storage/sddr09.c 10 Aug 2003 08:40:54 -0000
@@ -1127,7 +1127,7 @@
char *vaddr = kmalloc(alloc_req, GFP_NOIO);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,3)
sg[i].page = virt_to_page(vaddr);
- sg[i].offset = ((unsigned long)vaddr & ~PAGE_MASK);
+ sg[i].offset = virt_to_pageoff(vaddr);
#else
sg[i].address = vaddr;
#endif

--
Hideaki YOSHIFUJI @ USAGI Project <yosh...@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA

Christoph Hellwig

unread,
Aug 10, 2003, 7:40:07 AM8/10/03
to
On Sun, Aug 10, 2003 at 08:10:09PM +0900, YOSHIFUJI Hideaki / ?$B5HF#1QL@?(B wrote:
> [7/9] convert drivers/scsi to virt_to_pageoff().
>
> Index: linux-2.6/drivers/scsi/3w-xxxx.c
> ===================================================================
> RCS file: /home/cvs/linux-2.5/drivers/scsi/3w-xxxx.c,v
> retrieving revision 1.34
> diff -u -r1.34 3w-xxxx.c
> --- linux-2.6/drivers/scsi/3w-xxxx.c 17 Jul 2003 17:43:53 -0000 1.34
> +++ linux-2.6/drivers/scsi/3w-xxxx.c 10 Aug 2003 09:30:32 -0000
> @@ -2112,7 +2112,7 @@
> if (cmd->request_bufflen == 0)
> return 0;
>
> - mapping = pci_map_page(pdev, virt_to_page(cmd->request_buffer), ((unsigned long)cmd->request_buffer & ~PAGE_MASK), cmd->request_bufflen, dma_dir);
> + mapping = pci_map_page(pdev, virt_to_page(cmd->request_buffer), virt_to_pageoff(cmd->request_buffer), cmd->request_bufflen, dma_dir);

You probably want to use pci_map_single here instead..

> --- linux-2.6/drivers/scsi/megaraid.c 17 Jul 2003 17:43:53 -0000 1.43
> +++ linux-2.6/drivers/scsi/megaraid.c 10 Aug 2003 09:30:32 -0000
> @@ -2275,8 +2275,7 @@
> if( !cmd->use_sg ) {
>
> page = virt_to_page(cmd->request_buffer);
> -
> - offset = ((unsigned long)cmd->request_buffer & ~PAGE_MASK);
> + offset = virt_to_pageoff(cmd->request_buffer);

Dito.

> --- linux-2.6/drivers/scsi/qlogicfc.c 17 Jul 2003 17:43:53 -0000 1.34
> +++ linux-2.6/drivers/scsi/qlogicfc.c 10 Aug 2003 09:30:32 -0000
> @@ -1283,8 +1283,7 @@
> }
> } else if (Cmnd->request_bufflen && Cmnd->sc_data_direction != PCI_DMA_NONE) {
> struct page *page = virt_to_page(Cmnd->request_buffer);
> - unsigned long offset = ((unsigned long)Cmnd->request_buffer &
> - ~PAGE_MASK);
> + unsigned long offset = virt_to_pageoff(Cmnd->request_buffer);
> dma_addr_t busaddr = pci_map_page(hostdata->pci_dev,
> page, offset,
> Cmnd->request_bufflen,

Dito.

> @@ -1927,8 +1926,7 @@
> */
> busaddr = pci_map_page(hostdata->pci_dev,
> virt_to_page(&hostdata->control_block),
> - ((unsigned long) &hostdata->control_block &
> - ~PAGE_MASK),
> + virt_to_pageoff(&hostdata->control_block),
> sizeof(hostdata->control_block),
> PCI_DMA_BIDIRECTIONAL);

Dito.

> --- linux-2.6/drivers/scsi/sym53c8xx.c 18 Jul 2003 16:56:15 -0000 1.39
> +++ linux-2.6/drivers/scsi/sym53c8xx.c 10 Aug 2003 09:30:33 -0000
> @@ -1162,8 +1162,7 @@
>
> mapping = pci_map_page(pdev,
> virt_to_page(cmd->request_buffer),
> - ((unsigned long)cmd->request_buffer &
> - ~PAGE_MASK),
> + virt_to_pageoff(cmd->request_buffer),

Dito.

> --- linux-2.6/drivers/scsi/arm/scsi.h 19 May 2003 17:48:30 -0000 1.2
> +++ linux-2.6/drivers/scsi/arm/scsi.h 10 Aug 2003 09:30:33 -0000
> @@ -23,7 +23,7 @@
> BUG_ON(bufs + 1 > max);
>
> sg->page = virt_to_page(SCp->ptr);
> - sg->offset = ((unsigned int)SCp->ptr) & ~PAGE_MASK;
> + sg->offset = virt_to_pageoff(SCp->ptr);
> sg->length = SCp->this_residual;

Dito.

David S. Miller

unread,
Aug 10, 2003, 8:00:13 AM8/10/03
to
On Sun, 10 Aug 2003 12:31:48 +0100
Christoph Hellwig <h...@infradead.org> wrote:

> You probably want to use pci_map_single here instead..

I don't think it's wise to mix two changes at once. Let's get
the straightforward "obvious" shorthand change in, then we can
add your enhancement.

YOSHIFUJI Hideaki / 吉藤英明

unread,
Aug 10, 2003, 8:10:06 AM8/10/03
to
In article <20030810045121...@redhat.com> (at Sun, 10 Aug 2003 04:51:21 -0700), "David S. Miller" <da...@redhat.com> says:

> On Sun, 10 Aug 2003 12:31:48 +0100
> Christoph Hellwig <h...@infradead.org> wrote:
>
> > You probably want to use pci_map_single here instead..
>
> I don't think it's wise to mix two changes at once. Let's get
> the straightforward "obvious" shorthand change in, then we can
> add your enhancement.

Agreed.

BTW, drivers/scsi/3w-xxxx.c says:

1.02.00.029 - Add missing pci_free_consistent() in tw_allocate_memory().
Replace pci_map_single() with pci_map_page() for highmem.
Check for tw_setfeature() failure.

Have problems in pci_map_single() with highmem already gone away?

--
Hideaki YOSHIFUJI @ USAGI Project <yosh...@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA

James Morris

unread,
Aug 10, 2003, 9:30:09 AM8/10/03
to
On Sat, 9 Aug 2003, Matt Mackall wrote:

> out. Another thing I failed to mention at 3am is that most of the
> speed increase actually comes from the following patch which wasn't in
> baseline and isn't cryptoapi-specific. So I expect the cryptoapi
> version is about 30% faster once you amortize the initialization
> stuff.
>
> >>>>
> Remove folding step and double throughput.

Why did you remove this?


- James
--
James Morris
<jmo...@intercode.com.au>

Russell King

unread,
Aug 10, 2003, 10:00:09 AM8/10/03
to
On Sun, Aug 10, 2003 at 12:31:48PM +0100, Christoph Hellwig wrote:
> > --- linux-2.6/drivers/scsi/arm/scsi.h 19 May 2003 17:48:30 -0000 1.2
> > +++ linux-2.6/drivers/scsi/arm/scsi.h 10 Aug 2003 09:30:33 -0000
> > @@ -23,7 +23,7 @@
> > BUG_ON(bufs + 1 > max);
> >
> > sg->page = virt_to_page(SCp->ptr);
> > - sg->offset = ((unsigned int)SCp->ptr) & ~PAGE_MASK;
> > + sg->offset = virt_to_pageoff(SCp->ptr);
> > sg->length = SCp->this_residual;
>
> Dito.

No. DMA mapping is handled later.

--
Russell King (r...@arm.linux.org.uk) The developer of ARM Linux
http://www.arm.linux.org.uk/personal/aboutme.html

Russell King

unread,
Aug 10, 2003, 10:00:10 AM8/10/03
to
On Sun, Aug 10, 2003 at 12:31:48PM +0100, Christoph Hellwig wrote:
> > --- linux-2.6/drivers/scsi/arm/scsi.h 19 May 2003 17:48:30 -0000 1.2
> > +++ linux-2.6/drivers/scsi/arm/scsi.h 10 Aug 2003 09:30:33 -0000
> > @@ -23,7 +23,7 @@
> > BUG_ON(bufs + 1 > max);
> >
> > sg->page = virt_to_page(SCp->ptr);
> > - sg->offset = ((unsigned int)SCp->ptr) & ~PAGE_MASK;
> > + sg->offset = virt_to_pageoff(SCp->ptr);
> > sg->length = SCp->this_residual;
>
> Dito.

Actually, I'd rather see Scsi_Pointer gain page + offset (or even better
a single sg element) and get rid of these conversions.

-

James Morris

unread,
Aug 10, 2003, 11:00:06 AM8/10/03
to
On Sat, 9 Aug 2003, David S. Miller wrote:

> > Also, I posted to cryptoapi-devel that I need a way to disable the
> > unconditional padding on the hash functions.
>
> James, comments?

Yes, a flag could be added for crypto_alloc_tfm() which disables padding
for digests (e.g. CRYPTO_TFM_DIGEST_NOPAD).

Given that there are a still a number of unresolved issues, e.g. making
the crypto api (or a subset thereof) mandatory, perhaps it would be more
appropriate to slate these changes for 2.7 and then backport to 2.6 once
stable.


- James
--
James Morris
<jmo...@intercode.com.au>

Christoph Hellwig

unread,
Aug 10, 2003, 11:00:15 AM8/10/03
to
On Sun, Aug 10, 2003 at 09:03:22PM +0900, YOSHIFUJI Hideaki / ?$B5HF#1QL@?(B wrote:
> BTW, drivers/scsi/3w-xxxx.c says:
>
> 1.02.00.029 - Add missing pci_free_consistent() in tw_allocate_memory().
> Replace pci_map_single() with pci_map_page() for highmem.
> Check for tw_setfeature() failure.
>
> Have problems in pci_map_single() with highmem already gone away?

pci_map_single can't support highmem, but the case you converted can't
be highmem either. There's other places in the driver where we get handed
scatterlists that can contain highmem pages and thus need to be handled
with pci_map_page or better with pci_map_sg.

Christoph Hellwig

unread,
Aug 10, 2003, 11:00:15 AM8/10/03
to
On Sun, Aug 10, 2003 at 02:55:11PM +0100, Russell King wrote:
> Actually, I'd rather see Scsi_Pointer gain page + offset (or even better
> a single sg element) and get rid of these conversions.

Patches are welcome...

Christoph Hellwig

unread,
Aug 10, 2003, 11:00:14 AM8/10/03
to
On Sun, Aug 10, 2003 at 04:51:21AM -0700, David S. Miller wrote:
> On Sun, 10 Aug 2003 12:31:48 +0100
> Christoph Hellwig <h...@infradead.org> wrote:
>
> > You probably want to use pci_map_single here instead..
>
> I don't think it's wise to mix two changes at once. Let's get
> the straightforward "obvious" shorthand change in, then we can
> add your enhancement.

Ok.

Matt Mackall

unread,
Aug 10, 2003, 1:50:08 PM8/10/03
to
On Sun, Aug 10, 2003 at 11:18:43PM +1000, James Morris wrote:
> On Sat, 9 Aug 2003, Matt Mackall wrote:
>
> > out. Another thing I failed to mention at 3am is that most of the
> > speed increase actually comes from the following patch which wasn't in
> > baseline and isn't cryptoapi-specific. So I expect the cryptoapi
> > version is about 30% faster once you amortize the initialization
> > stuff.
> >
> > >>>>
> > Remove folding step and double throughput.
>
> Why did you remove this?

I suppose the comment you deleted was a little light on details, sigh.

The idea with the folding was that we can cover up any systemic
patterns in the returned hash by xoring the first half of the hash
with the second half of the hash. While this might seem like a good
technique intuitively, it's mathematically flawed.

Let's assume the simplest case of a two bit hash xy. "Patterns" here
are going to translate into a correlation between otherwise
well-distributed x and y. A perfectly uncorrelated system xy is going
to have two bits of entropy. A perfectly correlated (or
anti-correlated) system xy is going to have only 1 bit of entropy.

Now what happens when we fold these two bits together z=x^y? In the
uncorrelated case we end up with a well-distributed z. In the
correlated case, we end up with 0 or 1, that is z=x^x=0 or z=x^-x, and
we've eliminated any entropy we once had. If correlation is less than
100%, then we get somewhere between 0 and 1 bits of entropy, but
always less than if we just returned z=x or z=y. This argument
naturally scales up to larger variables x and y.

Ok, so that explains taking out the xor. But I also return xy and not
just x. With the former, every bit of input goes through SHA1 twice,
once in the input pool, and once in the output pool, along with lots
of feedback to foil backtracking attacks. In the latter, every output
bit is going through SHA four times, which is just overkill. If we
don't trust our hash to generate uniform, non-self-correlating output,
running additional rounds does not guarantee that we aren't magnifying
the problem. And it is of course making everything twice as expensive.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Jamie Lokier

unread,
Aug 10, 2003, 10:20:05 PM8/10/03
to
[Ted T'so added to recipients because he wrote this thing :)]

If you return xy, you are returning a strong digest of the pool state.
Even with the backtrack-prevention, if the attacker reads 20 bytes
from /dev/random and sees a _recognised_ pattern, they immediately
know the entire state of the secondary pool.

This can happen if, for example, a machine has just rebooted and
hasn't had time to collect much entropy since the initial state.

(In general, recognising the output of a cryptographic hash tells you
the input, because even though there are many possible inputs for that
output, it's far more likely that a systemic error is the cause of the
recognised output, rather than the cause being randomness).

This means that, for a short time after, they can predict further
output bits exactly. "Catastrophic reseeding" from the primary pool
limits how many bits they can predict like this - unless, if they're
lucky, it tells them the state of the primary pool too.

When you output less of the hash, this reduces the information
available to predict the state of the pool. However, SHA1 is large
enough that even half of the hash is a strong predictor, so returning
half of the hash still has this weakness.

(Ironically, a much weaker hash would prevent this mode of data leakage).

As far as I can tell, folding the hash result doesn't help, because
that simply creates a different kind of hash which can also be
recognised, and which is large enough to predict the pool state.

Which leaves...

I have just convinced myself of a flaw in the existing random.c -
which still holds in your patched version. Folding the result of the
strong hash is useless. Instead, the _input_ to the strong hash
should be folded, so that a single hash result (whether folded or not)
doesn't reveal the pool state even when it is recognised.

In other words, the hash should be calculated over fewer than all of
the pool bits each time. The set of pool bits used, or how multiple
bits are folded, needs to be varied in such a way that many hash
results would need to be all recognised to determine the pool state.

I think this equivalent to saying that the effective hash function
needs to be weak enough not to reveal the pool state when it is
recognised, due to the pool state being recognisably "special"
(e.g. limited time after booting), while at the same time the hash
needs to be strong enough that it does not reveal the pool state when
the pool is random.

Enjoy,
-- Jamie

Jamie Lokier

unread,
Aug 10, 2003, 10:20:10 PM8/10/03
to
Matt Mackall wrote:
> > > Ok, can I export some more cryptoapi primitives?

Why so complicated? Just move the "sha1_transform" function to its
own file in lib, and call it from both drivers/char/random.c and
crypto/sha1.c.

Problem solved.

Matt Mackall

unread,
Aug 10, 2003, 10:50:07 PM8/10/03
to
On Mon, Aug 11, 2003 at 03:15:12AM +0100, Jamie Lokier wrote:
> Matt Mackall wrote:
> > > > Ok, can I export some more cryptoapi primitives?
>
> Why so complicated? Just move the "sha1_transform" function to its
> own file in lib, and call it from both drivers/char/random.c and
> crypto/sha1.c.
>
> Problem solved.

Because I can't eventually put random=sha384,aes and on the command line
and get hardware acceleration.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Matt Mackall

unread,
Aug 10, 2003, 10:50:07 PM8/10/03
to

Except we're only ever hashing part of the pool.

> This can happen if, for example, a machine has just rebooted and
> hasn't had time to collect much entropy since the initial state.

In this case /dev/random simply won't generate output. Of course
/dev/urandom has an issue here, which is why it's always been recommended
practice to reseed the pool at boot as well.

> (In general, recognising the output of a cryptographic hash tells you
> the input, because even though there are many possible inputs for that
> output, it's far more likely that a systemic error is the cause of the
> recognised output, rather than the cause being randomness).

This is just as true of the folded 80 bits as it is of the full 160,
of course.


> This means that, for a short time after, they can predict further
> output bits exactly. "Catastrophic reseeding" from the primary pool
> limits how many bits they can predict like this - unless, if they're
> lucky, it tells them the state of the primary pool too.

That limit is currently 64 bits and in practice is probably zero
because we've just pulled more entropy from the input pool so we
actually can't guess the other half of the output pool even if have
scored a hash hit.

> When you output less of the hash, this reduces the information
> available to predict the state of the pool. However, SHA1 is large
> enough that even half of the hash is a strong predictor, so returning
> half of the hash still has this weakness.
>
> (Ironically, a much weaker hash would prevent this mode of data leakage).

Yes, the ideal implementation is to use a regular 1:1 block cipher
here. I've been looking at doing that for the output pool(s).



> As far as I can tell, folding the hash result doesn't help, because
> that simply creates a different kind of hash which can also be
> recognised, and which is large enough to predict the pool state.
>
> Which leaves...
>
> I have just convinced myself of a flaw in the existing random.c -
> which still holds in your patched version. Folding the result of the
> strong hash is useless. Instead, the _input_ to the strong hash
> should be folded, so that a single hash result (whether folded or not)
> doesn't reveal the pool state even when it is recognised.

Actually, "folding" it is the opposite of what you're looking for:

> In other words, the hash should be calculated over fewer than all of
> the pool bits each time. The set of pool bits used, or how multiple
> bits are folded, needs to be varied in such a way that many hash
> results would need to be all recognised to determine the pool state.

And we're safe here. The default pool size is 1024 bits, of which we
hash 512. I could hash even fewer, say, 480 (and this would deal with the
cryptoapi padding stuff nicely).

> I think this equivalent to saying that the effective hash function
> needs to be weak enough not to reveal the pool state when it is
> recognised, due to the pool state being recognisably "special"
> (e.g. limited time after booting), while at the same time the hash
> needs to be strong enough that it does not reveal the pool state when
> the pool is random.
>
> Enjoy,

--

Matt Mackall : http://www.selenic.com : of or relating to the moon

David Wagner

unread,
Aug 11, 2003, 1:10:04 AM8/11/03
to
Jamie Lokier wrote:
>If you return xy, you are returning a strong digest of the pool state.
>Even with the backtrack-prevention, if the attacker reads 20 bytes
>from /dev/random and sees a _recognised_ pattern, they immediately
>know the entire state of the secondary pool.

Irrelevant. I think you missed something.

If you pick a pattern in advance, the chance that your pattern appears
at the output of SHA1 is about 2^-160 (assuming SHA1 is secure). If you
pick 2^50 patterns (that takes an awfully big RAID array to store them
all!), then the chance that your pattent appears at the output of SHA1
is 2^-110. If you pick 2^50 patterns and poll /dev/urandom 2^50 times
to get 2^50 outputs, the chance that one of your patterns appears as one
of the /dev/urandom outputs is only 2^-60. In other words, your attack
has a success probability that is truly negligible.

You might as well argue that "if a cosmic ray hits memory and turns off
/dev/urandom, then things will fail". This is such an unlikely event
that we all ignore it. Likewise, the risk of special patterns appearing
at the output of SHA1 is also so unlikely that it can also be ignored.

Jamie Lokier

unread,
Aug 11, 2003, 1:10:07 AM8/11/03
to
Matt Mackall wrote:
> And we're safe here. The default pool size is 1024 bits, of which we
> hash 512. I could hash even fewer, say, 480 (and this would deal with the
> cryptoapi padding stuff nicely).

Where is the pool size set to 1024 bits? I'm reading 2.5.75, and it
looks to me like the hash is over the whole pool, of 512 bits for the
primary and 128 bits for the secondary pool:

for (i = 0, x = 0; i < r->poolinfo.poolwords; i += 16, x+=2) {
HASH_TRANSFORM(tmp, r->pool+i);
add_entropy_words(r, &tmp[x%HASH_BUFFER_SIZE], 1);
}

-- Jamie

David S. Miller

unread,
Aug 11, 2003, 1:10:08 AM8/11/03
to
On Mon, 11 Aug 2003 03:15:12 +0100
Jamie Lokier <ja...@shareable.org> wrote:

> Matt Mackall wrote:
> > > > Ok, can I export some more cryptoapi primitives?
>
> Why so complicated? Just move the "sha1_transform" function to its
> own file in lib, and call it from both drivers/char/random.c and
> crypto/sha1.c.

This is also broken.

The whole point of the 'tfm' the Crypto API makes you allocate is
that it provides all of the state and configuration information
needed to do the transforms.

There is no reason why random.c's usage of the crypto-API cannot be
done cleanly and efficiently such that it is both faster and resulting
in smaller code size than what random.c uses now. All of this _WITHOUT_
bypassing and compromising the well designed crypto API interfaces to these
transformations.

Matt Mackall

unread,
Aug 11, 2003, 1:10:10 AM8/11/03
to
On Mon, Aug 11, 2003 at 05:59:47AM +0100, Jamie Lokier wrote:
> Matt Mackall wrote:
> > And we're safe here. The default pool size is 1024 bits, of which we
> > hash 512. I could hash even fewer, say, 480 (and this would deal with the
> > cryptoapi padding stuff nicely).
>
> Where is the pool size set to 1024 bits? I'm reading 2.5.75, and it
> looks to me like the hash is over the whole pool, of 512 bits for the
> primary and 128 bits for the secondary pool:
>
> for (i = 0, x = 0; i < r->poolinfo.poolwords; i += 16, x+=2) {
^^^^

Unfortunately, there's an ugly mix of words, bytes, and bits here (and it
was actually broken for years because of it). The input pool is 4kbits
and the output pools are 1k.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Jamie Lokier

unread,
Aug 11, 2003, 1:20:07 AM8/11/03
to
David S. Miller wrote:
> > Why so complicated? Just move the "sha1_transform" function to its
> > own file in lib, and call it from both drivers/char/random.c and
> > crypto/sha1.c.
>
> This is also broken.

Not wanted, perhaps, but there is nothing broken about it.

> The whole point of the 'tfm' the Crypto API makes you allocate is
> that it provides all of the state and configuration information
> needed to do the transforms.

That'll be the array of 5 u32s, then.

> There is no reason why random.c's usage of the crypto-API cannot be
> done cleanly and efficiently such that it is both faster and resulting
> in smaller code size than what random.c uses now.

I don't disagree that Crypto API is flexible, possibly desirable for
being able to change which algorithm random.c uses, and using it will
certainly remove a lot of lines from random.c.

But the registration and lookup part just so random.c can ultimately
call sha1_transform with its well known 5-word state and 16-word data
input can't possibly be as small as calling sha1_transform directly.

> All of this _WITHOUT_ bypassing and compromising the well designed
> crypto API interfaces to these transformations.

I don't believe you'll get "all of this", i.e. the smaller code size.

The state for sha1_transform is an array of 5 u32s, that's all. Going
via Crypto API is just a complicated way to call it in this case.

random.c does not use it in the conventional way, as a hash over an
arbitrary number of bytes, so that part of CryptoAPI is not relevant.
random.c also uses the intermediate states of the hash, to feed them
back into the pool.

I shall be very surprised if the supposed hardware acceleration can
make random.c faster with its current algorithm, because it operates
on such small chunks or data (16 bytes at a time - more I/O overhead
than crypto time, with hardware). A change of algorithm might do it though.

I shall be even more surprised if calling sha1_transform through
CryptoAPI results in a smaller kernel than what is done now (without
CryptoAPI linked in).

-- Jamie

Jamie Lokier

unread,
Aug 11, 2003, 1:30:09 AM8/11/03
to
Matt Mackall wrote:
> On Mon, Aug 11, 2003 at 05:59:47AM +0100, Jamie Lokier wrote:
> > Matt Mackall wrote:
> > > And we're safe here. The default pool size is 1024 bits, of which we
> > > hash 512. I could hash even fewer, say, 480 (and this would deal with the
> > > cryptoapi padding stuff nicely).
> >
> > Where is the pool size set to 1024 bits? I'm reading 2.5.75, and it
> > looks to me like the hash is over the whole pool, of 512 bits for the
> > primary and 128 bits for the secondary pool:
> >
> > for (i = 0, x = 0; i < r->poolinfo.poolwords; i += 16, x+=2) {
> ^^^^
>
> Unfortunately, there's an ugly mix of words, bytes, and bits here (and it
> was actually broken for years because of it). The input pool is 4kbits
> and the output pools are 1k.

You're right about the sizes. But you said it hashes only half of the
pool. Where is that?

-- Jamie

David S. Miller

unread,
Aug 11, 2003, 1:30:11 AM8/11/03
to
On Sun, 10 Aug 2003 14:55:11 +0100
Russell King <r...@arm.linux.org.uk> wrote:

> Actually, I'd rather see Scsi_Pointer gain page + offset (or even better
> a single sg element) and get rid of these conversions.

I think this is a great idea.

Jamie Lokier

unread,
Aug 11, 2003, 1:50:04 AM8/11/03
to
David Wagner wrote:
> Jamie Lokier wrote:
> >If you return xy, you are returning a strong digest of the pool state.
> >Even with the backtrack-prevention, if the attacker reads 20 bytes
> >from /dev/random and sees a _recognised_ pattern, they immediately
> >know the entire state of the secondary pool.
>
> Irrelevant. I think you missed something.
>
> If you pick a pattern in advance, the chance that your pattern appears
> at the output of SHA1 is about 2^-160 (assuming SHA1 is secure).

This is true if you assume the pool state is random (totally
unpredictable by the attacker). Then, indeed, looking for known
patterns is useless.

However, if you _do_ spot a known pattern, there are two
possibilities: either it came up randomly, or it came up because the
pool state is not really random/unpredictable.

You may safely assume that if this happens, it's far more likely to be
a systemic problem, possibly of unknown type (explained below), than
it is to be due to randomness. This is because random hits are found
on the order of 2^-160, assuming SHA1 is that good, whereas we can't
rule out systemic failures with that much confidence no matter how
carefully we try.

It may be that an attacker knows about a systemic problem with our
machine that we don't know about. For example the attacker might know
our pool state well enough shortly after boot time, to have a chance
at matching a dictionary of 2^32 hashes. The attacker might have had
a chance to read our disk, which reseeding the pool at boot time does
not protect against.

With the right algorithm, we can protect against weaknesses of this kind.
The protection is not absolute, but ruling out a category of attacks
is good if we can do it.

-- Jamie

Matt Mackall

unread,
Aug 11, 2003, 2:00:12 AM8/11/03
to
On Mon, Aug 11, 2003 at 06:20:35AM +0100, Jamie Lokier wrote:
> Matt Mackall wrote:
> > On Mon, Aug 11, 2003 at 05:59:47AM +0100, Jamie Lokier wrote:
> > > Matt Mackall wrote:
> > > > And we're safe here. The default pool size is 1024 bits, of which we
> > > > hash 512. I could hash even fewer, say, 480 (and this would deal with the
> > > > cryptoapi padding stuff nicely).
> > >
> > > Where is the pool size set to 1024 bits? I'm reading 2.5.75, and it
> > > looks to me like the hash is over the whole pool, of 512 bits for the
> > > primary and 128 bits for the secondary pool:
> > >
> > > for (i = 0, x = 0; i < r->poolinfo.poolwords; i += 16, x+=2) {
> > ^^^^
> >
> > Unfortunately, there's an ugly mix of words, bytes, and bits here (and it
> > was actually broken for years because of it). The input pool is 4kbits
> > and the output pools are 1k.
>
> You're right about the sizes. But you said it hashes only half of the
> pool. Where is that?

Hmmm, you may have something. I've been over this code in great depth
and I keep finding bits that didn't work quite the way I (or the
original author) thought they did.

The old version does:

reset hash state
for each 512 bit chunk in pool:
hash 512 bit
mix cumulative 160 bit result back in

fold cumulative result
return 80 bit result

..which is vulnerable to your (entirely theoretical) attack

The cryptoapi version I posted does:

for each 512 bit chunk in pool:
reset hash state
hash 512 bits
mix 160 bits back in

return 160 bit result

..which is less vulnerable, but I won't say immune.

An ideal version would do:

pick an offset into the pool
hash somewhere in the neighborhood of 512 bits
mix 160 bits back in
update offset

return 160 bit result

..which is not vulnerable and faster. I'll whip something up.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Jamie Lokier

unread,
Aug 11, 2003, 2:30:12 AM8/11/03
to

Remember that mixing 160 bits into the pool changes only 160 bits in
the pool, due to the way the mixing function works. Furthermore, they
are consecutive words.

This means that when you hash over the second chunk, there is a good
chance that the bits you mixed in from the first chunk have no effect
on the second hash.

That's severely flawed: it means that consecutive reads can return
identical hash results!

> An ideal version would do:
>
> pick an offset into the pool
> hash somewhere in the neighborhood of 512 bits
> mix 160 bits back in
> update offset
>
> return 160 bit result
>
> ..which is not vulnerable and faster. I'll whip something up.

You must pick the offset carefully so that bits mixed back in affect
consecutive hash results which you are returning.

-- Jamie

David Mosberger

unread,
Aug 11, 2003, 2:30:16 PM8/11/03
to

Yoshifuji> In article <20030810013041...@redhat.com> (at
Yoshifuji> Sun, 10 Aug 2003 01:30:41 -0700), "David S. Miller"
Yoshifuji> <da...@redhat.com> says:
>> > BTW, we spread ((long) ptr & ~PAGE_MASK); it seems ugly. > Why
>> don't we have vert_to_offset(ptr) or something like this? >
>> #define virt_to_offset(ptr) ((unsigned long) (ptr) & ~PAGE_MASK)
>> > Is this bad idea?

>> With some name like "virt_to_pageoff()" it sounds like a great
>> idea.

Yoshifuji> Okay. How about this? (I'm going to do the actual
Yoshifuji> conversion soon.)

It's a bad choice of name. X_to_Y() normally implies that X and Y are
basically different representations of the same thing (e.g., a page
pointer vs. a virtual address). However, virt_to_pageoff() is a
one-way translation, so it's misleading. In my opinion, it should be
called page_offset() or something like that.

--david

David Wagner

unread,
Aug 11, 2003, 3:30:23 PM8/11/03
to
Jamie Lokier wrote:
>It may be that an attacker knows about a systemic problem with our
>machine that we don't know about. For example the attacker might know
>our pool state well enough shortly after boot time, to have a chance
>at matching a dictionary of 2^32 hashes. The attacker might have had
>a chance to read our disk, which reseeding the pool at boot time does
>not protect against.
>
>With the right algorithm, we can protect against weaknesses of this kind.

How? No matter what we do, the outputs are going to be a deterministic
function of the state of the pool. If the attacker can guess the entire
state of our pool (or narrow it down to 2^32 possibilities), we're screwed,
no matter what. Right?

I must be misunderstanding your point. What am I missing?

David S. Miller

unread,
Aug 11, 2003, 11:00:15 PM8/11/03
to
On Mon, 11 Aug 2003 11:21:25 -0700
David Mosberger <dav...@napali.hpl.hp.com> wrote:

> It's a bad choice of name. X_to_Y() normally implies that X and Y are
> basically different representations of the same thing (e.g., a page
> pointer vs. a virtual address). However, virt_to_pageoff() is a
> one-way translation, so it's misleading.

By your arguments, virt_to_page() is also misnamed. It is not possible
to take the page pointer result and use that to get back to the virtual
address input to virt_to_page().

I think as a first step, it's more important to have consistent names
for the two translation routines used.

As a secondary step, we can fix virt_to_*() naming, as appropriate.

Ok?

Theodore Ts'o

unread,
Aug 12, 2003, 11:30:10 PM8/12/03
to
On Sun, Aug 10, 2003 at 12:45:28PM -0500, Matt Mackall wrote:
> I suppose the comment you deleted was a little light on details, sigh.
>
> The idea with the folding was that we can cover up any systemic
> patterns in the returned hash by xoring the first half of the hash
> with the second half of the hash. While this might seem like a good
> technique intuitively, it's mathematically flawed.

No, that's not why the folding was done. You could have asked me
first....

First of all, I wasn't worried about simple correlations. There are
enough statistical tests done that we don't need to worry about xor
causing problems.

The real issue is discarding information. By folding, we hide
information about the output of the SHA hash that will hopefully deny
information that a Very Sophisticated Attacker (say, at the level of a
certain agency that designed the SHA algorithm) that might make it
easier for them to attack the SHA algorithm.

> just x. With the former, every bit of input goes through SHA1 twice,
> once in the input pool, and once in the output pool, along with lots
> of feedback to foil backtracking attacks. In the latter, every output
> bit is going through SHA four times, which is just overkill.

Speed really doesn't matter here. /dev/random is supposed to produce
somethign which is close to true randomness, not crypto
psuedo-randomness. So overkill is a good thing. It should only be
used for generating long-term keys, so speed is really not a concern.

- Ted

Theodore Ts'o

unread,
Aug 13, 2003, 12:00:09 AM8/13/03
to
On Mon, Aug 11, 2003 at 03:09:19AM +0100, Jamie Lokier wrote:
> If you return xy, you are returning a strong digest of the pool state.
> Even with the backtrack-prevention, if the attacker reads 20 bytes
> from /dev/random and sees a _recognised_ pattern, they immediately
> know the entire state of the secondary pool.
>
> This can happen if, for example, a machine has just rebooted and
> hasn't had time to collect much entropy since the initial state.

No, for the following reasons:

1) By the time you go through all of the init scripts, there has been
quite a lot of randomness mixed into the pool, from the hard drive
interrupts if nothing else. So it would be fairly difficult for the
attacker to get any kind of recognized patterns.

2) There's a reason why random.c recommends that the init scripts do
the follwoing at bootup:

echo "Initializing random number generator..."
random_seed=/var/run/random-seed
# Carry a random seed from start-up to start-up
# Load and then save the whole entropy pool
if [ -f $random_seed ]; then
cat $random_seed >/dev/urandom
else
touch $random_seed
fi
chmod 600 $random_seed
poolfile=/proc/sys/kernel/random/poolsize
[ -r $poolfile ] && bytes=`cat $poolfile` || bytes=512
dd if=/dev/urandom of=$random_seed count=1 bs=$bytes

and this at shutdown:

# Carry a random seed from shut-down to start-up
# Save the whole entropy pool
echo "Saving random seed..."
random_seed=/var/run/random-seed
touch $random_seed
chmod 600 $random_seed
poolfile=/proc/sys/kernel/random/poolsize
[ -r $poolfile ] && bytes=`cat $poolfile` || bytes=512
dd if=/dev/urandom of=$random_seed count=1 bs=$bytes

This is subtle. It's important that random data be saved across a
reboot, precisely to avoid the backtracking attack. It is also very
important the seed file be overwritten immediately after it is used,
both (a) so that the pool is initialized to some other value if the
machine reboots uncleanly without the shutdown script being executed,
and (b) so that an attacker which breaches root can not read out the
previously used seed file in an attempt to carry out the back-tracking
attack.

> This means that, for a short time after, they can predict further
> output bits exactly. "Catastrophic reseeding" from the primary pool
> limits how many bits they can predict like this - unless, if they're
> lucky, it tells them the state of the primary pool too.

Given the small amount of information which is transfered from the
primary pool to the secondary pool, it can't give them much
information about the primary pool. I thought very carefully about
this design....

> When you output less of the hash, this reduces the information
> available to predict the state of the pool. However, SHA1 is large
> enough that even half of the hash is a strong predictor, so returning
> half of the hash still has this weakness.
>
> (Ironically, a much weaker hash would prevent this mode of data leakage).
>

Nope, because of a very simple information theoretic argument. At
best, each hash can only reveal 80 bits of information, even if the
hash was totally weak. The secondary pool is 128 bytes, or 1024 bits.
A truely stupid/weak hash could at most leak 80 bits of information
from the pool. A stronger hash will leak less information, which is a
good thing, not at a bad thing.

This by the way is why we need to do catastrophic reseeding; if we
wanted to be truely paranoid, if we reseeded every 1024 bits of
output, there we would be guaranteed that there was no possible way
(using an information theoretic argument) that any information from
the secondary pool could be known. In fact, we reseed every 1024
bytes of input, which means we do rely on SHA being at least halfway
decent.

> As far as I can tell, folding the hash result doesn't help, because
> that simply creates a different kind of hash which can also be
> recognised, and which is large enough to predict the pool state.
>
> Which leaves...
>
> I have just convinced myself of a flaw in the existing random.c -
> which still holds in your patched version. Folding the result of the
> strong hash is useless. Instead, the _input_ to the strong hash
> should be folded, so that a single hash result (whether folded or not)
> doesn't reveal the pool state even when it is recognised.

No, whether you fold the input or the output, you're still creating a
new, slightly different hash. Think about it. If F(x) is the fold
operation, and H(x) is the hash operation, then

H'(x) = F(H(x))
H''(x) = H(F(x))

The reason why folding the output is better is that we're only leaking
80 bits at a time, as opposed to 160 bits at a time.

- Ted

Matt Mackall

unread,
Aug 13, 2003, 12:20:04 AM8/13/03
to
On Tue, Aug 12, 2003 at 11:20:38PM -0400, Theodore Ts'o wrote:
> On Sun, Aug 10, 2003 at 12:45:28PM -0500, Matt Mackall wrote:
> > I suppose the comment you deleted was a little light on details, sigh.
> >
> > The idea with the folding was that we can cover up any systemic
> > patterns in the returned hash by xoring the first half of the hash
> > with the second half of the hash. While this might seem like a good
> > technique intuitively, it's mathematically flawed.
>
> No, that's not why the folding was done. You could have asked me
> first....

Didn't think you were terribly interested, considering your lack of
response to the accounting problems.

> First of all, I wasn't worried about simple correlations. There are
> enough statistical tests done that we don't need to worry about xor
> causing problems.

The argument applies to any and all structure. If X and Y are n-bit
words and some pairs (X,Y) are more likely than others even though X
and Y are perfectly distributed when looked at normally, the
distribution (and hence the entropy) of X^Y will be less ideal than X
or Y taken alone. No statistical tests will catch the case of
X=hash(a) and Y=hash(~a).

> The real issue is discarding information. By folding, we hide
> information about the output of the SHA hash that will hopefully deny
> information that a Very Sophisticated Attacker (say, at the level of a
> certain agency that designed the SHA algorithm) that might make it
> easier for them to attack the SHA algorithm.

I'll buy that. However simply dropping the data would serve this
purpose better. As would hashing less than the entire pool on each
read.

Currently we have:
hash (1024 or 4096) bits down to 160, mix back in (16 or 64), return 80

The patch I'm fiddling with for cryptoapi comparitive benchmarking does:
hash 512 to 160, mix back in 160, return 160

More ideal would be something like:
hash 512 to 160, mix back in 32, return other 128

Actually, I can simply set a return/mix-back ratio per pool, now that
I've added a third one for urandom and then we can raise the tinfoil
factor on /dev/random. Seem reasonable?

> > just x. With the former, every bit of input goes through SHA1 twice,
> > once in the input pool, and once in the output pool, along with lots
> > of feedback to foil backtracking attacks. In the latter, every output
> > bit is going through SHA four times, which is just overkill.
>
> Speed really doesn't matter here. /dev/random is supposed to produce
> somethign which is close to true randomness, not crypto
> psuedo-randomness. So overkill is a good thing. It should only be
> used for generating long-term keys, so speed is really not a concern.

Mostly agreed. But I would also like to:

a) cat /dev/urandom > /dev/hda and not have it take all day
b) enable syncookies with less of a performance penalty
c) generate other short-term keys, cookies, and UUIDs rapidly
d) not disable preemption for long stretches while hashing (a
limitation of cryptoapi)

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Matt Mackall

unread,
Aug 13, 2003, 1:10:06 AM8/13/03
to
On Sun, Aug 10, 2003 at 09:54:22PM -0700, David S. Miller wrote:
> There is no reason why random.c's usage of the crypto-API cannot be
> done cleanly and efficiently such that it is both faster and resulting
> in smaller code size than what random.c uses now. All of this _WITHOUT_
> bypassing and compromising the well designed crypto API interfaces to these
> transformations.

Alright, here's the current state of this, with some numbers to back
it up. Below is my current random -> cryptoapi patch, which sets up
per-cpu crypto tfms at init time and is fairly straightforward. Would
be nice if it didn't block preemption while hashing but that's a
general downside of anything per_cpu. It applies on top of a bunch of
other small stuff that I'll get around to posting eventually. But this
should be enough for discussion.

I built three 2.6.0-test3-mm1 kernels with gcc 3.3.1 for my Athlon:

native - current /dev/random SHA1 plus some WIP, no cryptoapi
unrolled - /dev/random modified to use cryptoapi's unrolled SHA1
with its own SHA code dropped
rolled - like unrolled but with a cryptoapi SHA1 implementation as
close to the original as reasonable

text data bss dec hex filename bandwidth code data
2413597 477694 157224 3048515 2e8443 native 17.6MB/s 1906 594
2421648 478951 157224 3057823 2ea89f rolled 14.0MB/s 1913 599
2427280 478944 157224 3063448 2ebe98 unrolled 17.5MB/s 1918 598

(bandwidth here is 'cat /dev/random | pipeview > /dev/null')

So the size penalty for cryptoapi in itself looks to be about 8k+5k
currently. Going to the unrolled SHA1 costs another 5k(!).

I also built a couple custom kernels to let me calculate the overhead
of various parts of the process and the raw algorithm speed.

1/o = Null /dev/urandom hash function = 60.3MB/s
1/(o+c) = Null cryptoapi hash function = 32.9MB/s

This lets us calculate:

1/c = cryptoapi null through (aka overhead)= 72.4MB/s (mostly memcpy?)
1/n = native SHA1 implementation = 24.9MB/s
1/r = "rolled" cryptoapi SHA1 - overhead = 24.4MB/s (extra memset)
1/u = unrolled SHA1 in cryptoapi - overhead= 37.4MB/s

So unrolled is about 50% faster and looking at the size of the .o
files (2.2 vs 7.8k), almost 4 times bigger. And about a third of the
time spent hashing with the cryptoapi is bookkeeping and buffering
overhead.

So we've got a few ways to proceed:

a) keep two copies of SHA around
b) cryptoapi+sha become non-optional (+optional rolled version)
c) somebody exports raw SHA transform and cryptoapi stays optional

(The big downside of c) is we won't be able to easily plugin different
transforms)

diff -urN -X dontdiff orig/drivers/char/mem.c work/drivers/char/mem.c
--- orig/drivers/char/mem.c 2003-07-13 22:34:32.000000000 -0500
+++ work/drivers/char/mem.c 2003-08-12 21:18:51.000000000 -0500
@@ -680,7 +680,6 @@
S_IFCHR | devlist[i].mode, devlist[i].name);
}

- rand_initialize();
#if defined (CONFIG_FB)
fbmem_init();
#endif
diff -urN -X dontdiff orig/drivers/char/random.c work/drivers/char/random.c
--- orig/drivers/char/random.c 2003-08-12 21:18:53.000000000 -0500
+++ work/drivers/char/random.c 2003-08-12 21:22:01.000000000 -0500
@@ -249,11 +249,16 @@
#include <linux/genhd.h>
#include <linux/interrupt.h>
#include <linux/spinlock.h>
+#include <linux/crypto.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>

#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/irq.h>
#include <asm/io.h>
+#include <asm/scatterlist.h>

/*
* Configuration information
@@ -360,7 +365,7 @@
* modulo the generator polymnomial. Now, for random primitive polynomials,
* this is a universal class of hash functions, meaning that the chance
* of a collision is limited by the attacker's knowledge of the generator
- * polynomail, so if it is chosen at random, an attacker can never force
+ * polynomial, so if it is chosen at random, an attacker can never force
* a collision. Here, we use a fixed polynomial, but we *can* assume that
* ###--> it is unknown to the processes generating the input entropy. <-###
* Because of this important property, this is a good, collision-resistant
@@ -374,6 +379,8 @@
static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);

+static DEFINE_PER_CPU(struct crypto_tfm *, sha_tfm);
+
/*
* Forward procedure declarations
*/
@@ -775,122 +782,6 @@
add_timer_randomness(disk->random, 0x100+MKDEV(disk->major, disk->first_minor));
}

-/******************************************************************
- *
- * Hash function definition
- *
- *******************************************************************/
-
-/*
- * This chunk of code defines a function
- * void SHATransform(__u32 digest[HASH_BUFFER_SIZE + HASH_EXTRA_SIZE],
- * __u32 const data[16])
- *
- * The function hashes the input data to produce a digest in the first
- * HASH_BUFFER_SIZE words of the digest[] array, and uses HASH_EXTRA_SIZE
- * more words for internal purposes. (This buffer is exported so the
- * caller can wipe it once rather than this code doing it each call,
- * and tacking it onto the end of the digest[] array is the quick and
- * dirty way of doing it.)
- *
- * For /dev/random purposes, the length of the data being hashed is
- * fixed in length, so appending a bit count in the usual way is not
- * cryptographically necessary.
- */
-
-#define HASH_BUFFER_SIZE 5
-#define HASH_EXTRA_SIZE 80
-
-/*
- * SHA transform algorithm, taken from code written by Peter Gutmann,
- * and placed in the public domain.
- */
-
-/* The SHA f()-functions. */
-
-#define f1(x,y,z) ( z ^ (x & (y^z)) ) /* Rounds 0-19: x ? y : z */
-#define f2(x,y,z) (x ^ y ^ z) /* Rounds 20-39: XOR */
-#define f3(x,y,z) ( (x & y) + (z & (x ^ y)) ) /* Rounds 40-59: majority */
-#define f4(x,y,z) (x ^ y ^ z) /* Rounds 60-79: XOR */
-
-/* The SHA Mysterious Constants */
-
-#define K1 0x5A827999L /* Rounds 0-19: sqrt(2) * 2^30 */
-#define K2 0x6ED9EBA1L /* Rounds 20-39: sqrt(3) * 2^30 */
-#define K3 0x8F1BBCDCL /* Rounds 40-59: sqrt(5) * 2^30 */
-#define K4 0xCA62C1D6L /* Rounds 60-79: sqrt(10) * 2^30 */
-
-#define ROTL(n,X) ( ( ( X ) << n ) | ( ( X ) >> ( 32 - n ) ) )
-
-#define subRound(a, b, c, d, e, f, k, data) \
- ( e += ROTL( 5, a ) + f( b, c, d ) + k + data, b = ROTL( 30, b ) )
-
-
-static void SHATransform(__u32 digest[85], __u32 const data[16])
-{
- __u32 A, B, C, D, E; /* Local vars */
- __u32 TEMP;
- int i;
-#define W (digest + HASH_BUFFER_SIZE) /* Expanded data array */
-
- /*
- * Do the preliminary expansion of 16 to 80 words. Doing it
- * out-of-line line like this is faster than doing it in-line on
- * register-starved machines like the x86, and not really any
- * slower on real processors.
- */
- memcpy(W, data, 16*sizeof(__u32));
- for (i = 0; i < 64; i++) {
- TEMP = W[i] ^ W[i+2] ^ W[i+8] ^ W[i+13];
- W[i+16] = ROTL(1, TEMP);
- }
-
- /* Set up first buffer and local data buffer */
- A = digest[ 0 ];
- B = digest[ 1 ];
- C = digest[ 2 ];
- D = digest[ 3 ];
- E = digest[ 4 ];
-
- /* Heavy mangling, in 4 sub-rounds of 20 iterations each. */
- for (i = 0; i < 80; i++) {
- if (i < 40) {
- if (i < 20)
- TEMP = f1(B, C, D) + K1;
- else
- TEMP = f2(B, C, D) + K2;
- } else {
- if (i < 60)
- TEMP = f3(B, C, D) + K3;
- else
- TEMP = f4(B, C, D) + K4;
- }
- TEMP += ROTL(5, A) + E + W[i];
- E = D; D = C; C = ROTL(30, B); B = A; A = TEMP;
- }
-
- /* Build message digest */
- digest[ 0 ] += A;
- digest[ 1 ] += B;
- digest[ 2 ] += C;
- digest[ 3 ] += D;
- digest[ 4 ] += E;
-
- /* W is wiped by the caller */
-#undef W
-}
-
-#undef ROTL
-#undef f1
-#undef f2
-#undef f3
-#undef f4
-#undef K1
-#undef K2
-#undef K3
-#undef K4
-#undef subRound
-
/*********************************************************************
*
* Entropy extraction routines
@@ -899,8 +790,6 @@

#define EXTRACT_ENTROPY_USER 1
#define EXTRACT_ENTROPY_LIMIT 2
-#define TMP_BUF_SIZE (HASH_BUFFER_SIZE + HASH_EXTRA_SIZE)
-#define SEC_XFER_SIZE (TMP_BUF_SIZE*4)

static ssize_t extract_entropy(struct entropy_store *r, void * buf,
size_t nbytes, int flags);
@@ -912,7 +801,7 @@
*/
static void reseed_pool(struct entropy_store *r, int margin, int wanted)
{
- __u32 tmp[TMP_BUF_SIZE];
+ __u32 tmp[32]; /* 256 bits */
int bytes;

DEBUG_ENT("reseed %s wants %d bits (margin %d)\n",
@@ -948,9 +837,10 @@
size_t nbytes, int flags)
{
ssize_t ret, i, pos;
- __u32 tmp[TMP_BUF_SIZE];
- __u32 x;
unsigned long cpuflags;
+ struct crypto_tfm *tfm;
+ struct scatterlist sg[1];
+ __u32 hash[5]; /* 160 bits */

/* Hold lock while accounting */
spin_lock_irqsave(&r->lock, cpuflags);
@@ -986,18 +876,15 @@
}

DEBUG_ENT("extract sleep (%d bytes left)\n", nbytes);
-
schedule();
-
DEBUG_ENT("extract wake\n");
}

- /* Hash the pool to get the output */
- tmp[0] = 0x67452301;
- tmp[1] = 0xefcdab89;
- tmp[2] = 0x98badcfe;
- tmp[3] = 0x10325476;
- tmp[4] = 0xc3d2e1f0;
+ /* Pick a position in the pool to hash from */
+ pos = r->pos;
+ if(pos > r->poolinfo->poolwords - 15)
+ pos=0;
+ r->pos = pos + 1;

/* Pick a position in the pool to hash from */
pos = r->pos;
@@ -1015,26 +902,32 @@
* backtrack prevention over catastrophic reseeding.
*/

- SHATransform(tmp, r->pool + pos);
- add_entropy_words(r, tmp, HASH_BUFFER_SIZE);
+ sg[0].page = virt_to_page(r->pool + pos);
+ sg[0].offset = ((long)(r->pool + pos) & ~PAGE_MASK);
+ /* avoid padding overhead in cryptoapi */
+ sg[0].length = 56;
+ tfm=get_cpu_var(sha_tfm);
+ crypto_digest_digest(tfm, sg, 1, (char *)hash);
+ put_cpu_var(sha_tfm);
+ add_entropy_words(r, hash, 5);

/* Copy data to destination buffer */
- i = min(nbytes, HASH_BUFFER_SIZE*sizeof(__u32));
+ i = min(nbytes, sizeof(hash));
if (flags & EXTRACT_ENTROPY_USER) {
- i -= copy_to_user(buf, (__u8 const *)tmp, i);
+ i -= copy_to_user(buf, (__u8 const *)hash, i);
if (!i) {
ret = -EFAULT;
break;
}
} else
- memcpy(buf, (__u8 const *)tmp, i);
+ memcpy(buf, (__u8 const *)hash, i);
nbytes -= i;
buf += i;
ret += i;
}

/* Wipe data just returned from memory */
- memset(tmp, 0, sizeof(tmp));
+ memset(hash, 0, sizeof(hash));

return ret;
}
@@ -1093,7 +986,25 @@
}
}

-void __init rand_initialize(void)
+static int random_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ if (action == CPU_UP_PREPARE)
+ {
+ per_cpu(sha_tfm, (long)hcpu) = crypto_alloc_tfm("sha1", 0);
+
+ printk(KERN_NOTICE "random cpu %ld sha_tfm = %p\n",
+ (long)hcpu, per_cpu(sha_tfm, (long)hcpu));
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block random_nb = {
+ .notifier_call = random_cpu_notify,
+};
+
+static int __init init_random(void)
{
int i;

@@ -1103,10 +1014,14 @@
"nonblocking");

if(!(input_pool && blocking_pool && nonblocking_pool))
- return;
+ return -ENOMEM;

- if(batch_entropy_init(BATCH_ENTROPY_SIZE))
- return;
+ if((i=batch_entropy_init(BATCH_ENTROPY_SIZE)) != 0)
+ return i;
+
+ random_cpu_notify(&random_nb, (unsigned long)CPU_UP_PREPARE,
+ (void *)(long)smp_processor_id());
+ register_cpu_notifier(&random_nb);

init_std_data(input_pool);
sysctl_init_random(input_pool);
@@ -1115,8 +1030,12 @@
irq_timer_state[i] = NULL;
memset(&keyboard_timer_state, 0, sizeof(struct timer_rand_state));
memset(&mouse_timer_state, 0, sizeof(struct timer_rand_state));
+
+ return 0;
}

+__initcall(init_random);
+
void rand_initialize_irq(int irq)
{
struct timer_rand_state *state;
@@ -1153,15 +1072,10 @@
static ssize_t
random_read(struct file * file, char * buf, size_t nbytes, loff_t *ppos)
{
- ssize_t n, retval = 0, count = 0;
+ ssize_t n, retval = 0, count = 0;

- if (nbytes == 0)
- return 0;
-
while (nbytes > 0) {
- n = nbytes;
- if (n > SEC_XFER_SIZE)
- n = SEC_XFER_SIZE;
+ n = min_t(size_t, nbytes, BLOCKING_POOL_SIZE/8);

/* We can take all the entropy in the input pool */
reseed_pool(blocking_pool, 0, n);
@@ -1839,13 +1753,16 @@
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)

static int syncookie_init;
-static __u32 syncookie_secret[2][16-3+HASH_BUFFER_SIZE];
+static __u32 syncookie_secret[2][16-3];

__u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr, __u16 sport,
__u16 dport, __u32 sseq, __u32 count, __u32 data)
{
- __u32 tmp[16 + HASH_BUFFER_SIZE + HASH_EXTRA_SIZE];
- __u32 seq;
+ __u32 tmp[16]; /* 512 bits */
+ __u32 hash[5]; /* 160 bits */
+ __u32 seq;
+ struct crypto_tfm *tfm;
+ struct scatterlist sg[1];

/*
* Pick two random secrets the first time we need a cookie.
@@ -1866,22 +1783,27 @@
* MSS into the second hash value.
*/

- memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0]));
+ sg[0].page = virt_to_page(tmp);
+ sg[0].offset = ((long) tmp & ~PAGE_MASK);
+ sg[0].length = sizeof(tmp);
+
tmp[0]=saddr;
tmp[1]=daddr;
tmp[2]=(sport << 16) + dport;
- SHATransform(tmp+16, tmp);
- seq = tmp[17] + sseq + (count << COOKIEBITS);
+
+ memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0]));
+ tfm = get_cpu_var(sha_tfm);
+ crypto_digest_digest(tfm, sg, 1, (char *)hash);
+
+ seq = hash[1] + sseq + (count << COOKIEBITS);

memcpy(tmp+3, syncookie_secret[1], sizeof(syncookie_secret[1]));
- tmp[0]=saddr;
- tmp[1]=daddr;
- tmp[2]=(sport << 16) + dport;
tmp[3] = count; /* minute counter */
- SHATransform(tmp+16, tmp);
+ crypto_digest_digest(tfm, sg, 1, (char *)hash);
+ put_cpu_var(sha_tfm);

/* Add in the second hash and the data */
- return seq + ((tmp[17] + data) & COOKIEMASK);
+ return seq + ((hash[1] + data) & COOKIEMASK);
}

/*
@@ -1896,19 +1818,30 @@
__u32 check_tcp_syn_cookie(__u32 cookie, __u32 saddr, __u32 daddr, __u16 sport,
__u16 dport, __u32 sseq, __u32 count, __u32 maxdiff)
{
- __u32 tmp[16 + HASH_BUFFER_SIZE + HASH_EXTRA_SIZE];
- __u32 diff;
+ __u32 tmp[16]; /* 512 bits */
+ __u32 hash[5]; /* 160 bits */
+ __u32 diff;
+ struct crypto_tfm *tfm;
+ struct scatterlist sg[1];

if (syncookie_init == 0)
return (__u32)-1; /* Well, duh! */

- /* Strip away the layers from the cookie */
- memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0]));
+ sg[0].page = virt_to_page(tmp);
+ sg[0].offset = ((long) tmp & ~PAGE_MASK);
+ sg[0].length = sizeof(tmp);
+
tmp[0]=saddr;
tmp[1]=daddr;
tmp[2]=(sport << 16) + dport;
- SHATransform(tmp+16, tmp);
- cookie -= tmp[17] + sseq;
+
+ /* Strip away the layers from the cookie */
+ memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0]));
+ tfm = get_cpu_var(sha_tfm);
+ crypto_digest_digest(tfm, sg, 1, (char *)hash);
+ put_cpu_var(sha_tfm);
+
+ cookie -= hash[1] + sseq;
/* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */

diff = (count - (cookie >> COOKIEBITS)) & ((__u32)-1 >> COOKIEBITS);
@@ -1916,13 +1849,12 @@
return (__u32)-1;

memcpy(tmp+3, syncookie_secret[1], sizeof(syncookie_secret[1]));
- tmp[0] = saddr;
- tmp[1] = daddr;
- tmp[2] = (sport << 16) + dport;
tmp[3] = count - diff; /* minute counter */
- SHATransform(tmp+16, tmp);
+ tfm = get_cpu_var(sha_tfm);
+ crypto_digest_digest(tfm, sg, 1, (char *)hash);
+ put_cpu_var(sha_tfm);

- return (cookie - tmp[17]) & COOKIEMASK; /* Leaving the data behind */
+ return (cookie - hash[1]) & COOKIEMASK; /* Leaving the data behind */
}
#endif


--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Jim Carter

unread,
Aug 13, 2003, 12:00:13 PM8/13/03
to
In response to a message of Theodore Ts'o <ty...@mit.edu> dated 2003-08-12
(Subject was: [RFC][PATCH] Make cryptoapi non-optional?):

I've recently come to the unpleasant conclusion that the reason i810_rng.o
(kernel 2.4.20) doesn't load on various Dell models (on none of the
numerous Dell models I've tried) is that the motherboards actually lack the
hardware random number generator. The motherboard of a Dell Dimension 4100
has a SST 49LF004A firmware hub, not an Intel 82802AB, and among those
chips plus the Winbond W39V040FA and the STM M50FW040, presumably most of
the ones competing for that market, only the Intel chip has the random
number generator.

It's a violation of modularity plus a very effective marketing ploy for
Intel to put the RNG on the BIOS flash RAM chip. Hiss, boo!

Dell has a very effective procedure for discouraging customer feedback, and
I have not been able to identify a useful recipient for product
suggestions. If any of you people happen to have the ear of someone at
Dell, could you please carry a message that there are people who depend on
crypto, and will suggest to their management that all machines must have a
hardware random number generator, and that would leave Dell out of the
running.

A similar suggestion might be made to Winbond, STM and SST.

James F. Carter Voice 310 825 2897 FAX 310 206 6673
UCLA-Mathnet; 6115 MSA; 405 Hilgard Ave.; Los Angeles, CA, USA 90095-1555
Email: ji...@math.ucla.edu http://www.math.ucla.edu/~jimc (q.v. for PGP key)

Jeff Garzik

unread,
Aug 13, 2003, 12:20:10 PM8/13/03
to
On Wed, Aug 13, 2003 at 08:44:56AM -0700, Jim Carter wrote:
> In response to a message of Theodore Ts'o <ty...@mit.edu> dated 2003-08-12
> (Subject was: [RFC][PATCH] Make cryptoapi non-optional?):
>
> I've recently come to the unpleasant conclusion that the reason i810_rng.o
> (kernel 2.4.20) doesn't load on various Dell models (on none of the
> numerous Dell models I've tried) is that the motherboards actually lack the
> hardware random number generator.

This is correct: Intel dropped the RNG after the first few releases of
their ICH chipset (which is now up to ICH5). Not much Dell can do about
it.

Intel's RNG was slow anyway, compared to the AMG and now VIA RNGs, so
you didn't want it anyway :) I really like VIA's "xstore", which is an
RNG implemented via a CPU instruction. That way you don't need a kernel
driver at all, really.

Jeff

Jamie Lokier

unread,
Aug 13, 2003, 2:50:15 PM8/13/03
to
Theodore Ts'o wrote:
> 1) By the time you go through all of the init scripts, there has been
> quite a lot of randomness mixed into the pool, from the hard drive
> interrupts if nothing else. So it would be fairly difficult for the
> attacker to get any kind of recognized patterns.

This is not true when booting from ramdisk or ROM. On some systems,
everything, the initial CPU clock counter, relative timings, and
everything else is deterministic - all locked to the same master clock.

On those systems, the random state has zero entropy until the first
network packet arrives or the pool can be seeded from saved (and well
hidden) state.

> 2) There's a reason why random.c recommends that the init scripts do
> the follwoing at bootup:

> [save & restore over boot]


> This is subtle. It's important that random data be saved across a
> reboot, precisely to avoid the backtracking attack.

Agreed. This is very, very important.

Unfortunately it isn't airtight if the attacker can see the file in
flight to your NFS-mounted diskless box, although booting over NFS
would tend to introduce enough entropy by itself so we are fine here :)

Everything important I have to say is below, and assumes one of:

1. some kind of weakness in saving/restoring/overwriting the seed.
2. other flaws which make the pool state non-random in a predictable way.
3. SHA revealing more than zero bits of useful entropy per hash.

If you don't believe in those scenarios, press "d" in your mailer now :)

> > When you output less of the hash, this reduces the information
> > available to predict the state of the pool. However, SHA1 is large
> > enough that even half of the hash is a strong predictor, so returning
> > half of the hash still has this weakness.
> >
> > (Ironically, a much weaker hash would prevent this mode of data leakage).
>
> Nope, because of a very simple information theoretic argument. At
> best, each hash can only reveal 80 bits of information, even if the
> hash was totally weak.

This is not true of a strong hash. If there is insufficient entropy
in the input, a strong hash reveals _more_ bits of input than it
outputs.

In general, if the input has d bits of entropy, where d << strong hash
output size, then a dictionary attack of size 2^d will determine the
whole input with virtual certainty, regardless of the input size.

> The secondary pool is 128 bytes, or 1024 bits.
> A truely stupid/weak hash could at most leak 80 bits of information
> from the pool. A stronger hash will leak less information, which is a
> good thing, not at a bad thing.

Incorrect, _except_ when you are certain the pool has enough entropy.

> This by the way is why we need to do catastrophic reseeding; if we
> wanted to be truely paranoid, if we reseeded every 1024 bits of
> output, there we would be guaranteed that there was no possible way
> (using an information theoretic argument) that any information from
> the secondary pool could be known. In fact, we reseed every 1024
> bytes of input, which means we do rely on SHA being at least halfway
> decent.

This is true provided the primary pool always has enough entropy to
keep the secondary pool filled with enough entropy, from the attackers
point of view, to prevent a dictionary matching attach.

It makes your point about retaining the entropy across reboots even
more important. When there is enough entropy, the dictionary attack
doesn't work - assuming the hash is not invertible, which we do assume.

Note that when I say "entropy from the attacker's point of view", I
don't mean entropy as estimated in random.c. By assuming SHA1 is
strong enough, when there is adequate entropy in the pool to prevent
dictionary attacks, "entropy from the attacker's point of view" does
not decrease when the pool is read - unlike random.c's estimated
value, which has a different meaning: "entropy assuming SHA1 is
breakable".

It should be noted that, from an information theoretic point of view,
if the strong hash leaks _any_ information about the pool state, this
reduces the pool entropy from the point of view of the attacker, and,
if successive hashes leak different information, may gradually open
up the pool to a dictionary attack regardless of state at boot time.

Practically, we are relying on the hash leaking _zero_ useful
information about the pool state which the attacker can accumulate to
produce a dictionary of possible pool states. That's just another way
of saying what you said: we rely on SHA being decent.

> > I have just convinced myself of a flaw in the existing random.c -
> > which still holds in your patched version. Folding the result of the
> > strong hash is useless. Instead, the _input_ to the strong hash
> > should be folded, so that a single hash result (whether folded or not)
> > doesn't reveal the pool state even when it is recognised.
>
> No, whether you fold the input or the output, you're still creating a
> new, slightly different hash. Think about it. If F(x) is the fold
> operation, and H(x) is the hash operation, then
>
> H'(x) = F(H(x))
> H''(x) = H(F(x))
>
> The reason why folding the output is better is that we're only leaking
> 80 bits at a time, as opposed to 160 bits at a time.

I disagree for the simple reason that a strong hash can reveal more
bits of input, than it outputs, when there is insufficient entropy in
the input from the point of view of the attacker.

Outputting 80 bits instead of 160 bits makes very little difference,
if the pool has only 32 bits of entropy from the point of view of the attacker.

If the attacker knows the pool state is among a set of 2^32 states
which he knows or has calculated, either hash gives adequate
information to reconstruct the pool state. If the attacker is merely
testing to see whether the pool state is one of that set of 2^32, 160
bits of hash gives the attacker ~(1-2^-128) probability of determining
the pool state, whereas 80 bits gives the attacker ~(1-2^-48)
probability of determining it.

The point about folding the input, or only using part of the input, is
so that this kind of attack cannot determine the whole input from a
single hash output. However, the argument isn't that convincing,
because if there's a reason why the pool is in a predictable state,
it's quite likely that the _whole_ pool is in a state which is
predictable given part of the pool.

The only exception I see to that (and I don't claim to see all the
exceptions) is when the input mixer has changed some of the words
which aren't yet being used as input to the hash, but will be on
subsequent hashings. Then if the attacker develops perfect knowledge
of the input to one hash output, he would still not be able to predict
subsequent outputs.

Of course, all this talk of hashing over a limited part of the pool is
something you do already by having two pools, and hash only over the
bits in the secondary pool.

The subtlety, then, is knowing when to move information from the
primary pool to thwart a dictionary attack on a partially known pool state.

My conclusions
==============

- The only effect of Matt's change to limit the number of input bits
to the hash, is to increase the number of hash results that an
attacker needs, at times when a dictionary attack is feasible,
before a dictionary hit can predict future and past hash results
reliably, over whatever time period occurs between catastrophic
reseedings. There will be times when a single hash result is
enough to predict those results anyway - Matt's change doesn't
protect against all weak pool states. Note that this mode of
attack does not require a weakness of the hash; it relies on the
pool state being predictable due to inadequate seeding _or_
weakness of the hash.

- Folding the hash output bits has no useful effect. It reduces the
number of bits an attacker needs to read a whole hash result in
order to run a dictionary attack against a weak pool state, but
the attacker doesn't need the whole hash result to do that anyway.

- Folding the hash output bits does not protect against you-know-who
from inverting the hash, because it simply changes the hash
function with no justification for calling this a stronger one.

Anyone inverting SHA1 in order to predict random.c's past or
future outputs would need to extend the analysis into the
backtrack-protection feedback anyway, so such an attack would need
to be specialised for random.c, whether hash output folding is
present or not.

Final conclusion :)
===================

- Hashing over fewer input bits is of dubious benefit. But the
output folding may as well go because it isn't useful at all.

My 2p,
:)
-- Jamie

Jamie Lokier

unread,
Aug 13, 2003, 3:00:27 PM8/13/03
to
Jeff Garzik wrote:
> Intel's RNG was slow anyway, compared to the AMG and now VIA RNGs, so
> you didn't want it anyway :) I really like VIA's "xstore", which is an
> RNG implemented via a CPU instruction. That way you don't need a kernel
> driver at all, really.

From an electronic point of view, assuming similar technology, and
circuits of similar size, I would expect a slower source to be able to
give getter randomness than a very fast source.

Whether it does depends on the implementation, of course.

I don't know if "xstore" can execute on every clock cycle. But
imagine if it could execute once every 1GHz cycle, and if there was
only one RNG circuit, not lots of them in parallel. Then it would
seem difficult indeed to ensure that successive outputs of the RNG
were strongly independent. Whereas the same circuit, integrating over
a signal 10^6 times to return a value every 1ms, would return better
results but fewer of them.

That said, it might be better to read lots of results from "xstore"
and feed them into a known algorithm like random.c, than to read just
a few results from a slow Intel black box and trust the quality of its
source.

-- Jamie

Jamie Lokier

unread,
Aug 13, 2003, 3:50:08 PM8/13/03
to
David Wagner wrote:
> >It may be that an attacker knows about a systemic problem with our
> >machine that we don't know about. For example the attacker might know
> >our pool state well enough shortly after boot time, to have a chance
> >at matching a dictionary of 2^32 hashes. The attacker might have had
> >a chance to read our disk, which reseeding the pool at boot time does
> >not protect against.
> >
> >With the right algorithm, we can protect against weaknesses of this kind.
>
> How? No matter what we do, the outputs are going to be a deterministic
> function of the state of the pool. If the attacker can guess the entire
> state of our pool (or narrow it down to 2^32 possibilities), we're screwed,
> no matter what. Right?

Right if the attacker can guess the entire pool state. If it's
narrowed to 2^32 possibilities, the question is, how hard do we make
it for the attacker to narrow it down further?

By hashing over fewer than all the internal state bits, we prevent the
attacker from narrowing down knowledge of the unhashed bits _from a
single hash_, under those conditions where it's possible for the
hashed bits to be a weak state while the unhashed bits may not be
determined by the hashed bits.

This can sometimes be true, theoretically, and sometimes it will not
be true.

If the attacker can read multiple hash results, though, they will
eventually have enough to guess the entire pool state anyway, if the
state is weak as a whole. Also, by hashing over fewer bits, some
kinds of partially-weak state, where a subset of bits is weak but
other bits are unguessable, become guessable where they would not be
with hashing over the whole state.

It's a convoluted enough mix of pros and cons that you can see why I
said, in my last reply to Matt & Ted, that hashing over fewer bits than in
the pool is of dubious value.

-- Jamie

Val Henson

unread,
Aug 14, 2003, 1:00:15 PM8/14/03
to
On Tue, Aug 12, 2003 at 11:06:14PM -0500, Matt Mackall wrote:
> On Tue, Aug 12, 2003 at 11:20:38PM -0400, Theodore Ts'o wrote:
>
> > The real issue is discarding information. By folding, we hide
> > information about the output of the SHA hash that will hopefully deny
> > information that a Very Sophisticated Attacker (say, at the level of a
> > certain agency that designed the SHA algorithm) that might make it
> > easier for them to attack the SHA algorithm.
>
> I'll buy that. However simply dropping the data would serve this
> purpose better. As would hashing less than the entire pool on each
> read.

I agree. Throwing away 80 bits of the 160 bit output is much better
than folding the two halves together. In all the cases we've
discussed where folding might improve matters, throwing away half the
output would be even better.

-VAL

David Wagner

unread,
Aug 14, 2003, 3:50:17 PM8/14/03
to
Val Henson wrote:
>Throwing away 80 bits of the 160 bit output is much better
>than folding the two halves together. In all the cases we've
>discussed where folding might improve matters, throwing away half the
>output would be even better.

I don't see where you are getting this from. Define
F(x) = first80bits(SHA(x))
G(x) = first80bits(SHA(x)) xor last80bits(SHA(x)).
What makes you think that F is a better (or worse) hash function than G?

I think there is little basis for discriminating between them.
If SHA is cryptographically secure, both F and G are fine.
If SHA is insecure, then all bets are off, and both F and G might be weak.

Chris Friesen

unread,
Aug 14, 2003, 4:10:13 PM8/14/03
to
David Wagner wrote:

> I don't see where you are getting this from. Define
> F(x) = first80bits(SHA(x))
> G(x) = first80bits(SHA(x)) xor last80bits(SHA(x)).
> What makes you think that F is a better (or worse) hash function than G?
>
> I think there is little basis for discriminating between them.
> If SHA is cryptographically secure, both F and G are fine.

In this case we can save the bother of doing the xor, no?

Chris

--
Chris Friesen | MailStop: 043/33/F10
Nortel Networks | work: (613) 765-0557
3500 Carling Avenue | fax: (613) 765-2986
Nepean, ON K2H 8E9 Canada | email: cfri...@nortelnetworks.com

Jamie Lokier

unread,
Aug 14, 2003, 5:40:29 PM8/14/03
to
David Wagner wrote:
> Val Henson wrote:
> >Throwing away 80 bits of the 160 bit output is much better
> >than folding the two halves together. In all the cases we've
> >discussed where folding might improve matters, throwing away half the
> >output would be even better.
>
> I don't see where you are getting this from. Define
> F(x) = first80bits(SHA(x))
> G(x) = first80bits(SHA(x)) xor last80bits(SHA(x)).
> What makes you think that F is a better (or worse) hash function than G?
>
> I think there is little basis for discriminating between them.
> If SHA is cryptographically secure, both F and G are fine.
> If SHA is insecure, then all bets are off, and both F and G might be weak.

I still do not see why either F or G are any more secure than SHA.

F, G and SHA are all supposedly strong hash functions, and I don't see
why the postulated folks capable of getting useful information about
the inputs to SHA would have any more difficulty getting useful
information about the inputs to F or G.

Unless we're postulating that SHA is deliberately weak, so that the
designers have a back door, that is not present in F or G.

Could some explain, please?

-- Jamie

Mike Fedyk

unread,
Aug 14, 2003, 8:20:10 PM8/14/03
to
On Wed, Aug 13, 2003 at 07:36:28PM +0100, Jamie Lokier wrote:
> Unfortunately it isn't airtight if the attacker can see the file in
> flight to your NFS-mounted diskless box, although booting over NFS
> would tend to introduce enough entropy by itself so we are fine here :)

Assuming that all network drivers are contributing entropy...

Was Robert Love's patch for this ever merged?

Val Henson

unread,
Aug 14, 2003, 8:20:09 PM8/14/03
to
On Thu, Aug 14, 2003 at 07:40:25PM +0000, David Wagner wrote:
> Val Henson wrote:
> >Throwing away 80 bits of the 160 bit output is much better
> >than folding the two halves together. In all the cases we've
> >discussed where folding might improve matters, throwing away half the
> >output would be even better.
>
> I don't see where you are getting this from. Define
> F(x) = first80bits(SHA(x))
> G(x) = first80bits(SHA(x)) xor last80bits(SHA(x)).
> What makes you think that F is a better (or worse) hash function than G?

See Matt Mackall's earlier post on correlation, excerpted at the end
of this message. Basically, with two strings x and y, the entropy of
x alone or y alone is always greater than or equal to the entropy of x
xored with y.

entropy(x) >= entropy(x xor y)
entropy(y) >= entropy(x xor y)

If you have the goal of throwing away some information so that the
attacker can't guess the state of the pool (which I'm not entirely
sure I agree with), then it is better to throw away half the hash than
to xor the two halves together.

> I think there is little basis for discriminating between them.
> If SHA is cryptographically secure, both F and G are fine.
> If SHA is insecure, then all bets are off, and both F and G might be weak.

There's plenty of basis. We have two goals:

1. Return as much random data as possible (maximize entropy).
2. Don't reveal the state of the entire pool.

Throwing away half the result is at least as good or better than
folding the result. There is no way in which folding is better than
halving, and folding is demonstrably worse if SHA-1's output is
correlated across the two halves in any way (which is almost certainly
true).

-VAL

From Matt Mackall:

Let's assume the simplest case of a two bit hash xy. "Patterns" here
are going to translate into a correlation between otherwise
well-distributed x and y. A perfectly uncorrelated system xy is going
to have two bits of entropy. A perfectly correlated (or
anti-correlated) system xy is going to have only 1 bit of entropy.

Now what happens when we fold these two bits together z=x^y? In the
uncorrelated case we end up with a well-distributed z. In the
correlated case, we end up with 0 or 1, that is z=x^x=0 or z=x^-x, and
we've eliminated any entropy we once had. If correlation is less than
100%, then we get somewhere between 0 and 1 bits of entropy, but
always less than if we just returned z=x or z=y. This argument
naturally scales up to larger variables x and y.

Robert Love

unread,
Aug 14, 2003, 8:30:12 PM8/14/03
to
On Thu, 2003-08-14 at 17:16, Mike Fedyk wrote:

> Assuming that all network drivers are contributing entropy...
>
> Was Robert Love's patch for this ever merged?

Nope.

Robert Love

Val Henson

unread,
Aug 14, 2003, 8:40:08 PM8/14/03
to
On Thu, Aug 14, 2003 at 10:36:00PM +0100, Jamie Lokier wrote:
> David Wagner wrote:
> > Val Henson wrote:
> > >Throwing away 80 bits of the 160 bit output is much better
> > >than folding the two halves together. In all the cases we've
> > >discussed where folding might improve matters, throwing away half the
> > >output would be even better.
> >
> > I don't see where you are getting this from. Define
> > F(x) = first80bits(SHA(x))
> > G(x) = first80bits(SHA(x)) xor last80bits(SHA(x)).
> > What makes you think that F is a better (or worse) hash function than G?
> >
> > I think there is little basis for discriminating between them.
> > If SHA is cryptographically secure, both F and G are fine.
> > If SHA is insecure, then all bets are off, and both F and G might be weak.
>
> I still do not see why either F or G are any more secure than SHA.

They aren't, in the sense of cryptographically signing a document.
They do reveal less information about the input than SHA-1.

> F, G and SHA are all supposedly strong hash functions, and I don't see
> why the postulated folks capable of getting useful information about
> the inputs to SHA would have any more difficulty getting useful
> information about the inputs to F or G.
>
> Unless we're postulating that SHA is deliberately weak, so that the
> designers have a back door, that is not present in F or G.

That's exactly what Ted described as his reason for doing the folding
in the first place. Matt Mackall simply pointed out that a little bit
of information theory will show you that throwing away half the output
is more effective.

I think Matt's doing an excellent job. His understanding of the
mathematics involved certainly exceeds that of 99% of the operating
systems developers I've met.

-VAL

David Wagner

unread,
Aug 14, 2003, 9:50:04 PM8/14/03
to
Val Henson wrote:
>On Thu, Aug 14, 2003 at 07:40:25PM +0000, David Wagner wrote:
>> I don't see where you are getting this from. Define
>> F(x) = first80bits(SHA(x))
>> G(x) = first80bits(SHA(x)) xor last80bits(SHA(x)).
>> What makes you think that F is a better (or worse) hash function than G?
>
>See Matt Mackall's earlier post on correlation, excerpted at the end
>of this message. Basically, with two strings x and y, the entropy of
>x alone or y alone is always greater than or equal to the entropy of x
>xored with y.
>
>entropy(x) >= entropy(x xor y)
>entropy(y) >= entropy(x xor y)

Sorry; that's not accurate. Here's a counterexample. Let x and y be
two 80-bit strings. Assume that x is either 0 or 1 (equal probability
for both possibilities). Assume y is either 0 or 2 (equal probability
for both possibilities), and is independent of x. Then
entropy(x) = 1 bit
entropy(y) = 1 bit
entropy(x xor y) = 2 bits

The difference between F and G is very small, and there is not much
basis for choosing one over the other.

Matt Mackall

unread,
Aug 14, 2003, 10:30:05 PM8/14/03
to
On Fri, Aug 15, 2003 at 01:45:45AM +0000, David Wagner wrote:
> Val Henson wrote:
> >On Thu, Aug 14, 2003 at 07:40:25PM +0000, David Wagner wrote:
> >> I don't see where you are getting this from. Define
> >> F(x) = first80bits(SHA(x))
> >> G(x) = first80bits(SHA(x)) xor last80bits(SHA(x)).
> >> What makes you think that F is a better (or worse) hash function than G?
> >
> >See Matt Mackall's earlier post on correlation, excerpted at the end
> >of this message. Basically, with two strings x and y, the entropy of
> >x alone or y alone is always greater than or equal to the entropy of x
> >xored with y.
> >
> >entropy(x) >= entropy(x xor y)
> >entropy(y) >= entropy(x xor y)
>
> Sorry; that's not accurate. Here's a counterexample. Let x and y be
> two 80-bit strings. Assume that x is either 0 or 1 (equal probability
> for both possibilities). Assume y is either 0 or 2 (equal probability
> for both possibilities), and is independent of x. Then
> entropy(x) = 1 bit
> entropy(y) = 1 bit
> entropy(x xor y) = 2 bits

Indeed. But here we're already assuming the entropy of x and y are
approximately the same as their size.

> The difference between F and G is very small, and there is not much
> basis for choosing one over the other.

The debate is really about the merits of F vs the original hash, and
it only came up because I had taken out the folding when trying to
benchmark $subject. My current code gets around that whole debate by
using F for /dev/random and (almost all of) the original hash for
/dev/urandom so we have both paranoid and fast. Now can we please
discuss the merits of $subject?

See this:

http://groups.google.com/groups?hl=en&lr=&ie=UTF-8&selm=jW3A.6xq.3%40gated-at.bofh.it&rnum=1

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Andries Brouwer

unread,
Aug 15, 2003, 3:40:04 AM8/15/03
to
On Thu, Aug 14, 2003 at 06:17:13PM -0600, Val Henson wrote:

> See Matt Mackall's earlier post on correlation, excerpted at the end
> of this message. Basically, with two strings x and y, the entropy of
> x alone or y alone is always greater than or equal to the entropy of x
> xored with y.
>
> entropy(x) >= entropy(x xor y)
> entropy(y) >= entropy(x xor y)

Is this trolling? Are you serious?
Try to put z = x xor y and apply your insight to the strings x and z.

David S. Miller

unread,
Aug 15, 2003, 3:50:05 AM8/15/03
to
On Fri, 15 Aug 2003 09:30:03 +0200
Andries Brouwer <ae...@win.tue.nl> wrote:

> On Thu, Aug 14, 2003 at 06:17:13PM -0600, Val Henson wrote:
>
> > entropy(x) >= entropy(x xor y)
> > entropy(y) >= entropy(x xor y)
>
> Is this trolling? Are you serious?

These lemma are absolutely true. XOR is the worst way
to gain entropy because it means that if you are able
to know anything about either 'x' or 'y' then you are
able to know something about the resulting entropy.

It is the reason our ipv4 routing cache hashes used to
be exploitable.

Andries Brouwer

unread,
Aug 15, 2003, 4:00:12 AM8/15/03
to
On Fri, Aug 15, 2003 at 12:40:04AM -0700, David S. Miller wrote:
> Andries Brouwer <ae...@win.tue.nl> wrote:
>
> > On Thu, Aug 14, 2003 at 06:17:13PM -0600, Val Henson wrote:
> >
> > > entropy(x) >= entropy(x xor y)
> > > entropy(y) >= entropy(x xor y)
> >
> > Is this trolling? Are you serious?
>
> These lemma are absolutely true.

David, did you read this line:

> > Try to put z = x xor y and apply your insight to the strings x and z.

Let us do it. Let z be an abbreviation for x xor y.

The lemma that you believe in, applied to x and z, says

entropy(x) >= entropy(x xor z)
entropy(z) >= entropy(x xor z)

But x xor z equals y, so you believe for arbitrary strings x and y that

entropy(x) >= entropy(y)
entropy(x xor y) >= entropy(y).

This "lemma", formulated in this generality, is just plain nonsense.

Andries

Måns Rullgård

unread,
Aug 15, 2003, 4:10:09 AM8/15/03
to
Andries Brouwer <ae...@win.tue.nl> writes:

>> > > entropy(x) >= entropy(x xor y)
>> > > entropy(y) >= entropy(x xor y)
>> >
>> > Is this trolling? Are you serious?
>>
>> These lemma are absolutely true.
>
> David, did you read this line:
>
>> > Try to put z = x xor y and apply your insight to the strings x and z.
>
> Let us do it. Let z be an abbreviation for x xor y.
>
> The lemma that you believe in, applied to x and z, says
>
> entropy(x) >= entropy(x xor z)
> entropy(z) >= entropy(x xor z)
>
> But x xor z equals y, so you believe for arbitrary strings x and y that
>
> entropy(x) >= entropy(y)
> entropy(x xor y) >= entropy(y).
>
> This "lemma", formulated in this generality, is just plain nonsense.

Not quite non-sense, but it would mean that for any strings x and y,

entropy(x) == entropy(y),

which seems incorrect.

--
Måns Rullgård
m...@users.sf.net

Nick Piggin

unread,
Aug 15, 2003, 4:20:07 AM8/15/03
to

David S. Miller wrote:

>On Fri, 15 Aug 2003 09:30:03 +0200
>Andries Brouwer <ae...@win.tue.nl> wrote:
>
>
>>On Thu, Aug 14, 2003 at 06:17:13PM -0600, Val Henson wrote:
>>
>>
>>>entropy(x) >= entropy(x xor y)
>>>entropy(y) >= entropy(x xor y)
>>>
>>Is this trolling? Are you serious?
>>
>
>These lemma are absolutely true. XOR is the worst way
>to gain entropy because it means that if you are able
>to know anything about either 'x' or 'y' then you are
>able to know something about the resulting entropy.
>

_Either_ x or y? Surely not! 1 ^ ? = ? you still have 1
bit of entropy.

Nick Piggin

unread,
Aug 15, 2003, 4:20:07 AM8/15/03
to

Måns Rullgård wrote:

>Andries Brouwer <ae...@win.tue.nl> writes:
>
>
>>>>>entropy(x) >= entropy(x xor y)
>>>>>entropy(y) >= entropy(x xor y)
>>>>>
>>>>Is this trolling? Are you serious?
>>>>
>>>These lemma are absolutely true.
>>>
>>David, did you read this line:
>>
>>
>>>>Try to put z = x xor y and apply your insight to the strings x and z.
>>>>
>>Let us do it. Let z be an abbreviation for x xor y.
>>
>>The lemma that you believe in, applied to x and z, says
>>
>> entropy(x) >= entropy(x xor z)
>> entropy(z) >= entropy(x xor z)
>>
>>But x xor z equals y, so you believe for arbitrary strings x and y that
>>
>> entropy(x) >= entropy(y)
>> entropy(x xor y) >= entropy(y).
>>
>>This "lemma", formulated in this generality, is just plain nonsense.
>>
>
>Not quite non-sense, but it would mean that for any strings x and y,
>
> entropy(x) == entropy(y),
>
>which seems incorrect.
>

Well, just the line entropy(x) >= entropy(y) is incorrect. ie. proof
by contradiction.

Jamie Lokier

unread,
Aug 15, 2003, 7:50:10 AM8/15/03
to
Val Henson wrote:
> > I still do not see why either F or G are any more secure than SHA.
>
> They aren't, in the sense of cryptographically signing a document.
> They do reveal less information about the input than SHA-1.

Not really. There are two ways in which these functions can reveal
something about the input:

1. Non-random input. An 80 bit signature is virtually as good as a
160 bit signature at confirming an attacker's hypothesis about
the input being among a set of known inputs. It's not like the
kernel is deliberately trying to induce collisions, which is the
usual reason for wanting more signature bits.

2. Random input, i.e. plenty of real entropy. If there is a weakness
in the hash which NSA can use to determine the bit state, then
it's true that F or G _might_ reveal half of the information that
SHA would, if the weakness is also present in F and G. Or it
might reveal much less (Ted's hypothesis for F), or more (see 1).

But! When someone reads x bytes from /dev/urandom, twice as many
hash transforms are performed with F or G than with SHA.

So, if someone reads x bytes, the amount of information revealed
through weakness could be less or more with F or G than with SHA.

> > Unless we're postulating that SHA is deliberately weak, so that the
> > designers have a back door, that is not present in F or G.
>
> That's exactly what Ted described as his reason for doing the folding
> in the first place.

At least that reason makes sense :)

> Matt Mackall simply pointed out that a little bit of information
> theory will show you that throwing away half the output is more
> effective.

No, that depends on the nature of the hypothetical back door.

They could have engineered a back door which is completely present
when you throw the second half of the bits away. Or any half of the
bits. Or when you fold the bits together. Or which reveals some but
less information when you do any of these.

In other words, you can't protect against a deliberate back door in
SHA, you can only try to scramble the output in some way that NSA
didn't prepare for. And then you cannot be sure you succeeded.

-- Jamie

Jamie Lokier

unread,
Aug 15, 2003, 9:00:06 AM8/15/03
to
Val Henson wrote:
> See Matt Mackall's earlier post on correlation, excerpted at the end
> of this message. Basically, with two strings x and y, the entropy of
> x alone or y alone is always greater than or equal to the entropy of x
> xored with y.
>
> entropy(x) >= entropy(x xor y)
> entropy(y) >= entropy(x xor y)

No.

Let x have zero entropy, i.e. a known value.
Let y be random, i.e. entropy(y) == bits(y), and non-empty, i.e. bits(y) > 0.

Then,
entropy(x xor y) == entropy(y)

=> entropy(x) < entropy(x xor y)

In general, either condition is possible depending on the relationship
between x and y.

Entropy vs. information hiding
==============================

For our purpose, maximising entropy in F or G is one goal, to ensure
each output bit is maximally independent, but another goal is
preventing knowledge of the pool state, and that's a _different_ goal.

Reducing entropy in the output actually makes acquiring knowledge of
the pool state _more difficult_.

This is because the amount of information you can deduce about the
pool state is limited by (a) the output entropy and (b) the strength
of the hash function.

To xor or not to xor
====================

For an attacker unable to break SHA, F or G are equally unbreakable.
For an attacker who knows a weakness in SHA, then whether F or G is
stronger depends entirely on the nature of that weakness. It could
even be that F is stronger against one attacker and G is stronger
against a different one.

Which begs the question, why throw away 50% of the bits? Why not 25%,
or 75%? The choice is arbitrary and not based on any known (perhaps
not knowable) properties of the hash function. We are speculating
about weaknesses, yet cannot know which weaknesses to protect against.

-- Jamie

Matt Mackall

unread,
Aug 15, 2003, 12:20:07 PM8/15/03
to
On Fri, Aug 15, 2003 at 10:06:42AM +0200, M?ns Rullg?rd wrote:
> Andries Brouwer <ae...@win.tue.nl> writes:
>
> >> > > entropy(x) >= entropy(x xor y)
> >> > > entropy(y) >= entropy(x xor y)
> >> >
> >> > Is this trolling? Are you serious?
> >>
> >> These lemma are absolutely true.
> >
> > David, did you read this line:
> >
> >> > Try to put z = x xor y and apply your insight to the strings x and z.
> >
> > Let us do it. Let z be an abbreviation for x xor y.
> >
> > The lemma that you believe in, applied to x and z, says
> >
> > entropy(x) >= entropy(x xor z)
> > entropy(z) >= entropy(x xor z)
> >
> > But x xor z equals y, so you believe for arbitrary strings x and y that
> >
> > entropy(x) >= entropy(y)
> > entropy(x xor y) >= entropy(y).
> >
> > This "lemma", formulated in this generality, is just plain nonsense.
>
> Not quite non-sense, but it would mean that for any strings x and y,
>
> entropy(x) == entropy(y),
>
> which seems incorrect.

No, it's a premise stated at the beginning of the thread. We're
assuming perfect distribution for x and y. The problem here is that x
and y can be dependent or independent. If they're independent, then
there's no issue. If they're dependent (for instance correlated or
anticorrelated) then x^y biases toward zero or one. Which clearly has
less entropy.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Matt Mackall

unread,
Aug 15, 2003, 12:40:06 PM8/15/03
to
On Fri, Aug 15, 2003 at 09:30:03AM +0200, Andries Brouwer wrote:
> On Thu, Aug 14, 2003 at 06:17:13PM -0600, Val Henson wrote:
>
> > See Matt Mackall's earlier post on correlation, excerpted at the end
> > of this message. Basically, with two strings x and y, the entropy of
> > x alone or y alone is always greater than or equal to the entropy of x
> > xored with y.
> >
> > entropy(x) >= entropy(x xor y)
> > entropy(y) >= entropy(x xor y)
>
> Is this trolling? Are you serious?
> Try to put z = x xor y and apply your insight to the strings x and z.

Val left out the assumption that x and y are already perfectly
distributed, in and of themselves.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Andries Brouwer

unread,
Aug 15, 2003, 1:10:11 PM8/15/03
to
On Fri, Aug 15, 2003 at 10:03:24AM -0500, Matt Mackall wrote:
> On Fri, Aug 15, 2003 at 09:30:03AM +0200, Andries Brouwer wrote:
> > On Thu, Aug 14, 2003 at 06:17:13PM -0600, Val Henson wrote:
> >
> > > See Matt Mackall's earlier post on correlation, excerpted at the end
> > > of this message. Basically, with two strings x and y, the entropy of
> > > x alone or y alone is always greater than or equal to the entropy of x
> > > xored with y.
> > >
> > > entropy(x) >= entropy(x xor y)
> > > entropy(y) >= entropy(x xor y)
> >
> > Is this trolling? Are you serious?
> > Try to put z = x xor y and apply your insight to the strings x and z.
>
> Val left out the assumption that x and y are already perfectly
> distributed, in and of themselves.

I see. "Perfectly distributed" does not sound like a good technical term.
Surely you do not mean "uniformly distributed".
Maybe you mean: "of maximal possible entropy?".

Then the statement has nothing to do with the xor and is just
entropy(x) >= entropy(a)
assuming that x has maximal possible entropy.

Well, yes, indeed.

Val Henson

unread,
Aug 15, 2003, 4:30:15 PM8/15/03
to
On Fri, Aug 15, 2003 at 09:55:03AM +0200, Andries Brouwer wrote:
> The lemma that you believe in, applied to x and z, says
>
> entropy(x) >= entropy(x xor z)
> entropy(z) >= entropy(x xor z)
[snip]

> This "lemma", formulated in this generality, is just plain nonsense.

Indeed. In the context of x = first 80 bits of SHA-1 hash, y = second
80 bits of SHA-1 hash, we are assuming that the entropy of x and y are
basically equal. Then xoring them together gives us no benefit over
taking just one or the other, and may reduce the entropy if the output
of SHA-1 is correlated between the two halves in any way (which seems
more likely than not).

Including a little more context, then:

If entropy(x) == entropy(y), then:

entropy(x) >= entropy(x xor y)
entropy(y) >= entropy(x xor y)

Amusingly, someone already worked their way backwards from my post
without the benefit of any context to find that it implied that
entropy(x) == entropy(y). I should know better than to assume that
people have read the entire thread before posting.

Apologies to Matt for sidetracking the discussion. He has come up
with a nice solution despite our kibitzing.

-VAL

Jamie Lokier

unread,
Aug 15, 2003, 6:10:08 PM8/15/03
to
Matt Mackall wrote:
> Val left out the assumption that x and y are already perfectly
> distributed, in and of themselves.

That is equivalent to saying all the bits in x are independent of each
other, and the same for y.

It is not correct to assume that, and at the same time consider a
non-zero correlation between specific pairs of bits in x and y to
prove your result.

This is why:

There may be correlation between different bits of SHA output, but if
so they are a just as likely, a priori, between any pairs of bits.

Your assumption is that bits in x are independent of each other, and
unpredictable, and the same for y, yet there may be correlations
between the n'th bit of x and n'th bit of y for 0<=n<=79.

Given that, and the existence of a non-zero correlation:

entropy(x xor y) < entropy(x)
entropy(x xor y) < entropy(y)

Which justifies dropping half the bits instead of folding.

But wait!

Why should we drop the second 80 bits, instead of (say) every other bit?
All the bits from SHA are equivalent, right?

So, let's define some permutations. Remember, x and y are 80 bits wide.

a = [ x[ 0..39], y[ 0..39] ]
b = [ x[40..79], y[40..79] ]

Now, making exactly the same assumptions (x is perfectly distributed,
y the same), considering the same possible non-zero correlation (n'th
bit of x with n'th bit of y for any 0<=n<=79), we see it is _possible_
for the following to be true, although these assumptions alone don't
make it so:

entropy(a xor b) > entropy(a) (is possible)
entropy(a xor b) > entropy(b) (is possible)

This is because of the dependence between bits _within_ a or b,
causing entropy(a) < entropy(x) or entropy(b) < entropy(x). Whereas
entropy(a xor b) = entropy(x) if x[n] is independent of y[n'] for
40<=n<=79 and 40<=n'<=79, which is one additional assumption that can
make the above statement true.

Having just shown that, within the assumptions which prove entropy(x
xor y) < entropy(x), it is possible to have entropy(a xor b) >
entropy(a), where [a,b] is a permutation of [x,y], we see:

Your assumptions imply you should drop certain bits instead of other
bits, otherwise dropping can be worse than folding.

This is clearly absurd :)


Eh?
---

The reasoning error is to assume that certain pairs of bits are
independent and entertain the possibility of certain other pairs of
bits being dependent.

Of course bias in choosing those pairs favours one way of reducing the
hash output compared with another.

When you remove the bias? Now that is a more interesting question.

Perhaps you can show that dropping is better than folding, given no
bias in your correlation assumptions, but I don't see that in the
claim quoted at the start of this message.


Enjoy,
-- Jamie

Jamie Lokier

unread,
Aug 15, 2003, 6:10:19 PM8/15/03
to
Andries Brouwer wrote:
> Then the statement has nothing to do with the xor and is just
> entropy(x) >= entropy(a)
> assuming that x has maximal possible entropy.
>
> Well, yes, indeed.

I think you just said what I said in a long, complicated message a few
minutes ago, but much more succinctly.

:)
-- Jamie

Jamie Lokier

unread,
Aug 15, 2003, 6:20:12 PM8/15/03
to
Matt Mackall wrote:
> No, it's a premise stated at the beginning of the thread. We're
> assuming perfect distribution for x and y. The problem here is that x
> and y can be dependent or independent. If they're independent, then
> there's no issue. If they're dependent (for instance correlated or
> anticorrelated) then x^y biases toward zero or one. Which clearly has
> less entropy.

Sure, but that only holds when you assume a specific mix of
independence and dependence among the bits.

(Bits within x are independent of each other, and also within y, while
at the same time x and y are dependent.)

In general, bits from x^y do not have more bias towards zero or one
than bits from x or y alone. Consider an extreme:

x = [ random_bit_0, random_bit_0 ]
y = [ random_bit_1, ~random_bit_1 ]

Then:

entropy(x) = entropy(y) = 1
entropy(x^y) = 2

This is no more arbitrary a mix of dependence and independence than
your assumption.

-- Jamie

Theodore Ts'o

unread,
Aug 15, 2003, 6:20:11 PM8/15/03
to
On Tue, Aug 12, 2003 at 11:06:14PM -0500, Matt Mackall wrote:
>
> The argument applies to any and all structure. If X and Y are n-bit
> words and some pairs (X,Y) are more likely than others even though X
> and Y are perfectly distributed when looked at normally, the
> distribution (and hence the entropy) of X^Y will be less ideal than X
> or Y taken alone. No statistical tests will catch the case of
> X=hash(a) and Y=hash(~a).

Right, but that's also easy to test. You just simply test the
statistical randomness of folded hash. In any case, if there was any
kind of correlation between any two bits in the SHA hash, that would
be a significant weakness in the hash, and would have detected before
now.

> I'll buy that. However simply dropping the data would serve this
> purpose better. As would hashing less than the entire pool on each
> read.

Dropping the data does serve this purpose, yes. But it has the
downside of exposing half the hash directly to the attacker. This
probably shouldn't be an issue, but if you assume no statistical
correlation between any two bits, folding will hide half of the hash.

In any case, it probably doesn't make that much difference either way,
assuming that SHA is anywhere near competently designed.

> > Speed really doesn't matter here. /dev/random is supposed to produce
> > somethign which is close to true randomness, not crypto
> > psuedo-randomness. So overkill is a good thing. It should only be
> > used for generating long-term keys, so speed is really not a concern.
>
> Mostly agreed. But I would also like to:
>
> a) cat /dev/urandom > /dev/hda and not have it take all day

You shouldn't be using /dev/urandom this way. OpenBSD has a
/dev/frandom which is just a quick psuedo-random generator. I've
resisted this, since there's no reason why this should be done in the
kernel. User-space is perfectly capable of taking 160 bits of
randomness out of /dev/urandom, and using it to seed a SHA-based
random number generator. However, if people insist on doing silly
things like "cat /dev/urandom > /dev/hda", and worse yet, screwing
with the /dev/random and /dev/urandom with the excuse that speed is of
importance, then perhaps following OpenBSD and implementing
/dev/frandom is a better choice.

> b) enable syncookies with less of a performance penalty

Syncookies doesn't use the mainline random number generator at all.
It uses a syncookie secret generated using the high quality random
number generator, and it uses a single crank of the SHA hash to
generate the random number.

> c) generate other short-term keys, cookies, and UUIDs rapidly

In the kernel or in user space? In either case, the right answer is
to do something similar to what Syncookies does --- pull a seed from
/dev/urandom, and then use simple SHA-based PRNG to generate
short-term keys, cookies, or UUID's.

> d) not disable preemption for long stretches while hashing (a
> limitation of cryptoapi)

Sounds like a bug in CryptoAPI that should be fixed in CryptoAPI.

- Ted

Theodore Ts'o

unread,
Aug 15, 2003, 6:40:08 PM8/15/03
to
On Thu, Aug 14, 2003 at 06:17:13PM -0600, Val Henson wrote:
> There is no way in which folding is better than
> halving, and folding is demonstrably worse if SHA-1's output is
> correlated across the two halves in any way (which is almost certainly
> true).

If there any kind of correlation between any two individual bits, such
that XOR'ing them together resulted in a pattern which was not
statistically random, it would be a significant weakness in the SHA
algorithm.

Consider that a requirement of a secure cryptographic hash is one
where for any (x,y) pair where y=H(x), finding another x' where
y=H(x') is no easier than brute force search. If two bits in the
output of the hash have any kind of statistically detectable
correlation, then it effectively reduces the size of the output space
of the hash, and so it would take less effort than a brute force
search of (on average) 2**80 random inputs before finding a hash
collision.

- Ted

James Morris

unread,
Aug 15, 2003, 7:40:10 PM8/15/03
to
On Fri, 15 Aug 2003, Theodore Ts'o wrote:

> > d) not disable preemption for long stretches while hashing (a
> > limitation of cryptoapi)
>
> Sounds like a bug in CryptoAPI that should be fixed in CryptoAPI.

This is for the case of hashing from a per cpu context, which is an
inherently unsafe context for introducing schedule points. This is not
a crypto API specific problem.


- James
--
James Morris
<jmo...@intercode.com.au>

Matt Mackall

unread,
Aug 15, 2003, 8:00:24 PM8/15/03
to
On Fri, Aug 15, 2003 at 06:12:11PM -0400, Theodore Ts'o wrote:

> However, if people insist on doing silly things like "cat
> /dev/urandom > /dev/hda", and worse yet, screwing with the
> /dev/random and /dev/urandom with the excuse that speed is of
> importance, then perhaps following OpenBSD and implementing
> /dev/frandom is a better choice.

Sigh..

I posted a proof of concept patch for discussion on $SUBJECT. In that
patch, I removed the folding for the purposes of having a reasonable
comparison between cryptoapi and native. Cryptoapi does FIPS-180-1 and
thus does twice as much hashing on 512 bits. Removing the folding was
a convenient and obvious way of addressing it for the purposes of
discussing $SUBECT until a good way to work around the extra padding
was found. I've already indicated my willingness to accept your
SHA-may-be-backdoored-and-somehow-leverageable argument, so can we
kindly discuss $SUBJECT instead of this trivia?

As for "screwing with /dev/random", it's got rather more serious and
longstanding problems than speed that I've been addressing. For
instance, I'm pretty sure there was never a time when entropy
accounting wasn't racy let alone wrong, SMP or no (fixed in -mm, thank
you). Nor has there ever been a time when change_poolsize() wasn't an
oops waiting to happen (patch queued for resend).

And frankly, given that my local version of /dev/urandom does 18MB/s
without starving /dev/random, I see no reason to reinvent strong PRNGs
in userspace for most applications.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Andrew Morton

unread,
Aug 15, 2003, 8:30:08 PM8/15/03
to
Matt Mackall <m...@selenic.com> wrote:
>
> I'm pretty sure there was never a time when entropy
> accounting wasn't racy let alone wrong, SMP or no (fixed in -mm, thank
> you).

Well is has been argued that the lack of locking in the random driver is a
"feature", adding a little more unpredictability.

Now I don't know if that makes sense or not, but the locking certainly has
a cost. If it doesn't actually fix anything then that cost becomes a
waste.

IOW: what bug does that locking fix?

Jamie Lokier

unread,
Aug 15, 2003, 9:10:04 PM8/15/03
to
Andrew Morton wrote:
> Matt Mackall <m...@selenic.com> wrote:
> > I'm pretty sure there was never a time when entropy
> > accounting wasn't racy let alone wrong, SMP or no (fixed in -mm, thank
> > you).
>
> Well is has been argued that the lack of locking in the random driver is a
> "feature", adding a little more unpredictability.

Dodgy. Does lack of locking mean users can trick /dev/random into
thinking it has more entropy than it does? Or let them detect the
time when /dev/random gains entropy, without reading it?

> Now I don't know if that makes sense or not, but the locking certainly has
> a cost. If it doesn't actually fix anything then that cost becomes a
> waste.

Per-cpu random pools, perhaps :)

-- Jamie

Matt Mackall

unread,
Aug 16, 2003, 12:50:05 AM8/16/03
to
On Fri, Aug 15, 2003 at 05:05:32PM -0700, Andrew Morton wrote:
> Matt Mackall <m...@selenic.com> wrote:
> >
> > I'm pretty sure there was never a time when entropy
> > accounting wasn't racy let alone wrong, SMP or no (fixed in -mm, thank
> > you).
>
> Well is has been argued that the lack of locking in the random driver is a
> "feature", adding a little more unpredictability.

> Now I don't know if that makes sense or not, but the locking certainly has
> a cost. If it doesn't actually fix anything then that cost becomes a
> waste.
>
> IOW: what bug does that locking fix?

Ok, the brief summary is that we're keeping a count of the number of
bits of unguessable environmental data (aka entropy) we've collected.
If we never give out more bits than we take in, we are immune to even
brute force attacks on the state of the pool and therefore the outputs
are unguessable as well (which is why /dev/random blocks where
/dev/urandom does not). If we fail to account for outputs properly, we
lose this property.

a) extract_entropy (pool->lock)

This can be called simultaneously from different contexts and a race
will result in extracting more bits from the pool than are accounted
for or available. This could also underflow. I use the new pool lock
here to cover the accounting - we rely on the hashing and mixing to
evenly distribute bits in the extraction itself and we have feedback
during the extraction as well.

For nitpickers, there remains a vanishingly small possibility that
two readers could get identical results from the pool by hitting a
window immediately after reseeding, after accounting, _and_ after
feedback mixing. I have another patch I'm still playing with that
addresses this by hashing a different subset of the pool on each call
rather than the entire pool, with the position covered by the pool
lock during accounting. (This has the controversial side effect of
doubling bandwidth.)

Note that I added a flag here to limit the amount of entropy pulled
to what's available. Random_read() formerly would check for the
number of available bits and then ask for them, and extract_entropy
would copy out that many bits even if an intervening caller raced and
depleted the pool.

The possible contention issue here is that /dev/random and
/dev/urandom+get_random_bytes() currently share the same pool and
get_random_bytes gets called moderately frequently by things like
sequence number generation by numerous places in the network layer
and various things in filesystems. I suspect this won't ever be a
noticeable lock but see below.

[There was also a cute sleeping problem here with random_read.
random_read used old-style open-coded sleeping, marked itself
TASK_INTERRUPTIBLE, then called extract_entropy, which would do a
conditional reschedule, and fall asleep until the next wake up,
despite having enough entropy to fulfill the request.]

b) add_entropy_words (pool->lock)

This function folds new words into the pool. The pool lock is taken
to prevent a race whereby both callers read the same value of for the
add_ptr and mix on top of each other, again causing an overestimate
of the entropy in the pool. The lock could be taken and released
inside the loop, but this tends to get called for a small number of
words (usually <=8).

c) credit_entropy_store (pool->lock)

Locking here actually prevents underaccounting, but it also prevents
overflow. Short and sweet.

d) RNDGETPOOL ioctl (pool->lock)

The intent of this ioctl is to take an atomic snapshot of the entropy
pool, entropy count, etc., presumably for debugging purposes. For
purpose of getting random data, one should simply read(). With the
addition of a second pool, it's no longer useful for debugging, but
could be useful for attackers and I'd it to just return -ENOTTY. For
now, it takes the pool lock. Should be no users, shouldn't increase
contention.

e) batch_entropy_store && batch_entropy_process (batch_lock)

This gets really messy without locking. This is a circular buffer
that gets filled typically from interrupts and emptied by a
workqueue. Lack of locking here could cause tail to pass head, etc.,
dropping samples on the ground or replaying a whole buffer worth old
samples. Rather than hold the lock while we mix the queue, we copy
the sample ring. (I have a better patch to do this with a flip
buffer.)

[By the way, whoever did the workqueue conversion for 2.5 changed this
code to wakeup the processing worker when the sample pool was half
full rather on every sample but got the test not quite right. I had
to stare at this for a bit:

new = (batch_head+1) & (batch_max-1);
if ((unsigned)(new - batch_tail) >= (unsigned)(batch_max / 2))
{

There was a further problem where extract_entropy was adding zero
entropy timing samples and filling the sample buffer with useless
samples. Occassionally the above bug would keep it from scheduling
batch processing in this case and the code could block permanently,
throwing out all new samples. Ouch.]

f) change_poolsize (queued for resend)

The /proc/sys/kernel/random sysctl is writable and lets you change
the pointer to the input pool out from under callers (boom). Short of
expanding the pool lock to cover all operations on the pool, there's
no clean fix for this. And it's not really worth the trouble. As it takes
quite a number of transactions to empty the 4k+1kbits of entropy we can
currently hold (enough for dozens of strong keys), if we end up
waiting for entropy regularly, queueing theory tells us we're
exceeding our input rate and we're going to lose no matter how big
the buffer is.

On the other hand, the input pool is only 4kbits and rather than keep
this feature I can save most of those 512 bytes for everyone by deleting the
resizing code.

g) urandom starves/races random (queued for resend)

Readers of /dev/urandom and get_random_bytes (both nonblock) pull from
the same pool as /dev/random readers and without limit. As there are
numerous users of get_random_bytes as pointed out above, /dev/random
readers can easily be starved (and previously, race on wakeup), even
by remote readers. This is rather a problem for the classic
entropy-source-limited headless web server which may very well be
trying to use both in, for example, a departmental certificate
authority.

My solution is to clean up the pool creation code and add a second
output pool for nonblocking users. The pool reseeding logic is
cleaned up to address a bunch of corner cases and has a low watermark
parameter so that the nonblocking users can avoid draining the input
pool entirely. The current default is to not let nonblocking readers
draw the input pool below the point where blocking readers can do two
catastrophic reseeds.

The cleanup of the pool code lets this easily become per_cpu output
pools for the non-blocking readers with about 10 lines of code if the
above-mentioned contention is an issue. I haven't tried this yet, but
I already did per_cpu for the cryptoapi stuff and it should be about
the same.

We could go completely lockless for the nonblocking pool also, but
that would require some code duplication.

I can detail some of the non-race accounting issues I fixed but that
message would be similarly long and more tedious.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Matt Mackall

unread,
Aug 16, 2003, 1:00:08 AM8/16/03
to
On Sat, Aug 16, 2003 at 01:58:07AM +0100, Jamie Lokier wrote:
> Andrew Morton wrote:
> > Matt Mackall <m...@selenic.com> wrote:
> > > I'm pretty sure there was never a time when entropy
> > > accounting wasn't racy let alone wrong, SMP or no (fixed in -mm, thank
> > > you).
> >
> > Well is has been argued that the lack of locking in the random driver is a
> > "feature", adding a little more unpredictability.
>
> Dodgy. Does lack of locking mean users can trick /dev/random into
> thinking it has more entropy than it does? Or let them detect the
> time when /dev/random gains entropy, without reading it?

Yes to the first, detailed at great length in a separate message. You
can do timing attacks on the inputs either way. I'll repost my fix for
it eventually, it's low on the list.



> > Now I don't know if that makes sense or not, but the locking certainly has
> > a cost. If it doesn't actually fix anything then that cost becomes a
> > waste.
>
> Per-cpu random pools, perhaps :)

I really doubt contention here is significant, but will take about ten
lines to address if these locks ever show up on someone's Specweb
benchmark.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

Andrew Morton

unread,
Aug 16, 2003, 1:10:05 AM8/16/03
to
Matt Mackall <m...@selenic.com> wrote:
>
> a) extract_entropy (pool->lock)
>
> ...

>
> [There was also a cute sleeping problem here with random_read.
> random_read used old-style open-coded sleeping, marked itself
> TASK_INTERRUPTIBLE, then called extract_entropy, which would do a
> conditional reschedule, and fall asleep until the next wake up,
> despite having enough entropy to fulfill the request.]

OK. There was no description of all this in your initial patch, so I don't
know which patches solve these problems.

> ...


> [By the way, whoever did the workqueue conversion for 2.5 changed this
> code to wakeup the processing worker when the sample pool was half
> full rather on every sample but got the test not quite right.

That would have been me. The context switch rate due to
add_disk_randomness() was much too high (1000/sec iirc), so that was an
attempt to reduce the wakeup rate.

What we really should try to do is to not pass the work to keventd at all:
just do the entropy addition right there in interrupt context, with
appropriate locking.


> f) change_poolsize (queued for resend)

Queued for resend because I just didn't know what to do with the patches.

I do not understand the random driver, and judging from the past couple of
days discussions I'm not likely to. I don't know how to test it and I
doubt if anyone else is testing it in sufficient depth.

And, to add to my dilemma, the random driver is security-related and
something which scary propellor-headed types get all emotional about :)

So getting these changes in will be hard. It would help a lot if they
could be presented individually and that Ted, Jamie and/or other interested
parties were to review and ack them.

> g) urandom starves/races random (queued for resend)
>
> Readers of /dev/urandom and get_random_bytes (both nonblock) pull from
> the same pool as /dev/random readers and without limit. As there are
> numerous users of get_random_bytes as pointed out above, /dev/random
> readers can easily be starved (and previously, race on wakeup), even
> by remote readers. This is rather a problem for the classic
> entropy-source-limited headless web server which may very well be
> trying to use both in, for example, a departmental certificate
> authority.
>
> My solution is to clean up the pool creation code and add a second
> output pool for nonblocking users. The pool reseeding logic is
> cleaned up to address a bunch of corner cases and has a low watermark
> parameter so that the nonblocking users can avoid draining the input
> pool entirely. The current default is to not let nonblocking readers
> draw the input pool below the point where blocking readers can do two
> catastrophic reseeds.
>
> The cleanup of the pool code lets this easily become per_cpu output
> pools for the non-blocking readers with about 10 lines of code if the
> above-mentioned contention is an issue. I haven't tried this yet, but
> I already did per_cpu for the cryptoapi stuff and it should be about
> the same.
>
> We could go completely lockless for the nonblocking pool also, but
> that would require some code duplication.

hrm. That's one which I actually understood. There is yet hope.

It is loading more messages.
0 new messages