seperator error in __mask_snprintf

Joe Korty

unread,

Jan 14, 2004, 7:28:56 PM1/14/04

to

On Wed, Jan 14, 2004 at 03:03:31PM -0800, Paul Jackson wrote:
> Joe - question - is there any good reason not to use Paul M's
> suggestion, eor'ing the index with 1 on 64 bit big endian hardware?
> I have a patch about ready (as soon as I can get time on my big system
> to test it) that uses the eor 1 idea.

In principle it should work fine. The details will be in the code
of course.

I've been working on-and-off on fixing the equivalent endian problem
in __mask_parse_len. How is that part going for you? I haven't yet
decided if I want to post it.

--
"Money can buy bandwidth, but latency is forever" -- John Mashey
Joe
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Paul Jackson

unread,

Jan 14, 2004, 7:41:55 PM1/14/04

to

> I've been working on-and-off on fixing the equivalent endian problem
> in __mask_parse_len. How is that part going for you? I haven't yet
> decided if I want to post it.

I have coded both - it was a simple matter of replacing each:

wordp[i]

with:

wordp[M32X(i)]

where M32X (for "Mask 32-bit indeX") is defined:

#include <asm/byteorder.h>
#if BITS_PER_LONG == 64 && defined(__BIG_ENDIAN)
#define M32X(i) ((i)^1)
#elif BITS_PER_LONG == 32 || defined(__LITTLE_ENDIAN)
#define M32X(i) (i)
#endif

I spent more time writing the comments explaining it than I did writing
the code.

--
I won't rest till it's the best ...
Programmer, Linux Scalability
Paul Jackson <p...@sgi.com> 1.650.933.1373

Paul Jackson

unread,

Jan 14, 2004, 11:43:51 PM1/14/04

to

Ok - here is a patch that should fix lib/mask.c displaying and parsing
cpumasks for 64 bit big endian architectures. It has only been tested
on 64 bit little endian architectures ;). Technically, it is against
2.6.1-mm3, though it should work against any kernel with a lib/mask.c
file, since this is the first change to that file.

The comments in the patch explain the details.

Thanks to Paul Mackerras for the "eor-1" coding suggestion, and to Joe
Korty for keeping me honest.

Tomorrow I will attempt a patch to change the ascii format for masks to
zero-fill each word (8 ascii hex chars, representing 32 bits).

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
# ChangeSet 1.1507 -> 1.1508
# lib/mask.c 1.1 -> 1.2
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 04/01/14 p...@sgi.com 1.1508
# Swizzle u32 wordp[] indexing to handle 64 bit big-endian arch.
# --------------------------------------------
#
diff -Nru a/lib/mask.c b/lib/mask.c
--- a/lib/mask.c Wed Jan 14 20:29:14 2004
+++ b/lib/mask.c Wed Jan 14 20:29:14 2004
@@ -33,15 +33,16 @@
* about any such struct details, relying on inline macros in
* files such as cpumask.h to pass in an unsigned long pointer
* and a length (in bytes), describing the mask contents.
- * The 32bit words in the array are in little-endian order,
- * low order word first. Beware that this is the reverse order
- * of the ascii representation.
+ *
+ * This layout of masks is determined by the macros in bitops.h,
+ * which pre-date masks. The bitop operations were formalized
+ * before the mask data type to which they apply.
*
* Even though the size of the input mask is provided in bytes,
- * the following code may assume that the mask is a multiple of
- * 32 or 64 bit words long, and ignore any fractional portion
- * of a word at the end. The main reason the size is passed in
- * bytes is because it is so easy to write 'sizeof(somemask_t)'
+ * the following code may assume that the mask size is a multiple
+ * of sizeof(long), and ignore any fractional portion of a word
+ * at the end. The main reason the size is passed in bytes is
+ * because it is so easy to write 'sizeof(somemask_t)'.
* in the macros.
*
* Masks are not a single,simple type, like classic 'C'
@@ -72,6 +73,36 @@
#define BASE 16 /* masks are input in hex (base 16) */
#define NUL ((char)'\0') /* nul-terminator */

+/*
+ * The following M32X() macro and associated comment really
+ * belong in include/linux/mask.h, which declares the 'mask'
+ * data type to which bitops apply. However mask.h doesn't
+ * exist yet in the main Linux development line ...
+ */
+
+/*
+ * Masks are arrays of unsigned long. This is almost the same
+ * as an array of unsigned ints, except on 64 bit big endian
+ * architectures, in which the two 32-bit int halves of each long
+ * are reversed (big 32-bit halfword first, naturally).
+ *
+ * Use this M32X (for "Mask 32-bit indeX") macro to index the
+ * i-th word of a mask declared as an array of 32 bit words.
+ *
+ * Usage example accessing 32-bit words in mask[] in order,
+ * smallest first:
+ * u32 mask[MASKLENGTH];
+ * int i;
+ * for (i = 0; i < MASKLENGTH; i++)
+ * ... mask[M32X(i)] ...
+ */
+#include <asm/byteorder.h>
+#if BITS_PER_LONG == 64 && defined(__BIG_ENDIAN)
+#define M32X(i) ((i)^1)
+#elif BITS_PER_LONG == 32 || defined(__LITTLE_ENDIAN)
+#define M32X(i) (i)
+#endif
+
/**
* __mask_snprintf_len - represent multi-word bit mask as string.
* @buf: The buffer to place the result into
@@ -93,10 +124,11 @@
int len = 0;
char *sep = "";

- while (i >= 1 && wordp[i] == 0)
+ while (i >= 1 && wordp[M32X(i)] == 0)
i--;
while (i >= 0) {
- len += snprintf(buf+len, buflen-len, "%s%x", sep, wordp[i]);
+ len += snprintf(buf+len, buflen-len,
+ "%s%x", sep, wordp[M32X(i)]);
sep = ",";
i--;
}
@@ -165,13 +197,13 @@
t = simple_strtoull(p, 0, BASE);
if (t != (u32)t)
return -EOVERFLOW;
- wordp[j++] = t;
+ wordp[M32X(j++)] = t;
}
--j;
while (i < j) {
- u32 t = wordp[i];
- wordp[i] = wordp[j];
- wordp[j] = t;
+ u32 t = wordp[M32X(i)];
+ wordp[M32X(i)] = wordp[M32X(j)];
+ wordp[M32X(j)] = t;
i++, --j;
}
return 0;

Andrew Morton

unread,

Jan 15, 2004, 11:19:28 AM1/15/04

to

Paul Jackson <p...@sgi.com> wrote:
>
> Ok - here is a patch that should fix lib/mask.c displaying and parsing
> cpumasks for 64 bit big endian architectures.

Gad.

> + * This layout of masks is determined by the macros in bitops.h,
> + * which pre-date masks. The bitop operations were formalized
> + * before the mask data type to which they apply.

So why not simply iterate across it with test_bit()?

val = 0;
for (bit = maskbytes * 8; bit >= 0; bit--) {
val <<= 1;
if (__test_bit(maskp, bit))
val |= 1;
if ((bit & 15) == 15) {
sprintf(buf, "%x", val);
buf++;
val = 0;
}
}

(plus bounds checking, null-termination, etc)? It is hardly
performance-critical.

Joe Korty

unread,

Jan 15, 2004, 1:21:11 PM1/15/04

to

Paul and Andrew,
This patch captures what I am looking for in bitmap display and input.

The biggest feature is the folding of all functionality into the bitmap.h
family, with corresponding name and function call argument changes.

bitmap size is now expressed in #bits rather than #bytes. The cpumask
parse and display functions now call the bitmap functions with NR_CPUS.
Thus on display exactly NR_CPUS bits are displayed, no more and no less,
and on input no value is accepted which is larger than what can be stored
in exactly NR_CPUS bits.

ChangeLog:
o move into the bitmap.h family, rename and refactor interface to match
o bitmap size resolution changed from byte to bit
o no limits on string length when parsing
o chunking (digits between commas) changed from 8 to 4 digits
o display no longer affected by sizeof(unsigned long).
o no alloca usage
o works correctly independent of the size of an unsigned long.
o works on big and little endian machine.

Downsides:
o code is subtle and fragile. Corner cases may be present.

Most of the subtlety is for handling bitmask size resolution down to
the bit and for detecting illegal syntax such as "3,,2" "," and "".

These routines have been tested in user-land on both a big and little
endian machine. Testing focused mainly on standard and corner bitmap
sizes (1,2,3,4,5,7,8,15,16,17,31,32,33,36,37,48,64 and 96), for a wide
variety of legal syntax and illegal syntax (eg legal: "0", "00", "0,0",
"0,0,0,00000000000017,0", "dead,beaf", "a,beaf", "beaf,a").

Regards,
Joe

diff -Nua 2.6/lib/Makefile.0 2.6/lib/Makefile
--- 2.6/lib/Makefile.0 2004-01-15 12:10:41.000000000 -0500
+++ 2.6/lib/Makefile 2004-01-15 11:37:01.000000000 -0500
@@ -5,7 +5,7 @@

lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
- kobject.o idr.o div64.o parser.o int_sqrt.o mask.o
+ kobject.o idr.o div64.o parser.o int_sqrt.o bitmap.o

lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
diff -Nua 2.6/lib/bitmap.c.0 2.6/lib/bitmap.c
--- 2.6/lib/bitmap.c.0 2004-01-15 12:10:41.000000000 -0500
+++ 2.6/lib/bitmap.c 2004-01-15 12:08:10.000000000 -0500
@@ -0,0 +1,130 @@
+/*
+ * lib/bitmask.c - bitmask manipulation routines too big to go into bitmask.h
+ *
+ * Copyright 2002, Concurrent Computer Corporation, and distributed under
+ * the GNU GPL license version 2. For other license arrangements, contact
+ * Concurrent Computer Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/bitmap.h>
+#include <asm/uaccess.h>
+
+#define CHUNKSZ 16
+
+#define ROUNDUP_POWER2(val,modulus) (((val) + (modulus) - 1) & ~((modulus) - 1))
+
+
+/**
+ * bitmap_snprintf - convert bitmap to an ASCII hex string.
+ * @buf: byte buffer into which string is placed
+ * @buflen: reserved size of @buf, in bytes
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ *
+ * Exactly nmaskbits bits are displayed. Hex digits are grouped into
+ * fours seperated by commas.
+ */
+
+int bitmap_snprintf(char *buf, unsigned int buflen,
+ const unsigned long *maskp, unsigned int nmaskbits)
+{
+ int i, word, bit, len = 0;
+ unsigned long val;
+ const char *sep = "";
+ int chunksz;
+ u32 chunkmask;
+
+ chunksz = nmaskbits & (CHUNKSZ - 1);
+ if (chunksz == 0)
+ chunksz = CHUNKSZ;
+
+ i = ROUNDUP_POWER2(nmaskbits, CHUNKSZ) - CHUNKSZ;
+ for (; i >= 0; i -= CHUNKSZ) {
+ chunkmask = ((1ULL << chunksz) - 1);
+ word = i / BITS_PER_LONG;
+ bit = i % BITS_PER_LONG;
+ val = (maskp[word] >> bit) & chunkmask;
+ len += snprintf(buf+len, buflen-len, "%s%0*lx", sep,
+ (chunksz+3)/4, val);
+ chunksz = CHUNKSZ;
+ sep = ",";
+ }
+ return len;
+}
+/**
+* bitmap_parse - convert an ASCII hex string into a bitmap.
+* @buf: pointer to buffer in user space containing string.
+* @buflen: buffer size in bytes. If string is smaller than this
+* then it must be terminated by a \0.
+* @maskp: pointer to bitmap that will contain result.
+* @nmaskbits: size of bitmap, in bits.
+*
+* Commas group hex digits into chunks. Each chunk defines exactly 16
+* bits of the resultant bitmask. No chunk may specify a value larger
+* than 16 bits (-EOVERFLOW), and if a chunk specifies a smaller value
+* leading 0-bits are appended.
+*/
+int bitmap_parse(const char __user *ubuf, unsigned int ubuflen,
+ unsigned long *maskp, unsigned int nmaskbits)
+{
+ int i, j, c, d, n, nt;
+ int nchunks = ROUNDUP_POWER2(nmaskbits, CHUNKSZ) / CHUNKSZ;
+ u32 chunk, firstchunk = 0;
+ int chunkoff = nmaskbits & (CHUNKSZ - 1);
+ u32 chunkmask = ~((1U << chunkoff) - 1);
+
+ bitmap_clear(maskp, nmaskbits);
+
+ i = nt = 0;
+ while (ubuflen) {
+ chunk = c = n = 0;
+ while (ubuflen) {
+ if (get_user(c, ubuf++))
+ return -EFAULT;
+ ubuflen--;
+ if (!c || c == ',')
+ break;
+ if (isspace(c))
+ continue;
+ if (!isxdigit(c))
+ return -EINVAL;
+ if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1))
+ return -EOVERFLOW;
+ d = isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10);
+ chunk = (chunk << 4) | d;
+ n++; nt++;
+ }
+ if (n==0) {
+ if (c == ',')
+ return -EINVAL;
+ if (!c)
+ break;
+ }
+ if (i >= nchunks)
+ return -EOVERFLOW;
+ if (i > 0 || chunk) {
+ bitmap_shift_right(maskp, maskp, CHUNKSZ, nmaskbits);
+ for (j = 0; j < 32; j++) {
+ if (chunk & (1 << j)) {
+ set_bit(j, maskp);
+ }
+ }
+ i++;
+ if (i == 1)
+ firstchunk = chunk;
+ if ( i == nchunks) {
+ if (chunkoff && (firstchunk & chunkmask)) {
+ /* input value > 2^nmaskbits -1 */
+ return -EOVERFLOW;
+ }
+ }
+ }
+ }
+ if (nt == 0)
+ return -EINVAL;
+ return 0;
+}
diff -Nua 2.6/include/linux/bitmap.h.0 2.6/include/linux/bitmap.h
--- 2.6/include/linux/bitmap.h.0 2004-01-15 12:10:41.000000000 -0500
+++ 2.6/include/linux/bitmap.h 2004-01-15 11:44:21.000000000 -0500
@@ -154,6 +154,12 @@
}
#endif

+extern int bitmap_snprintf(char *buf, unsigned int buflen,
+ const unsigned long *maskp, unsigned int nmaskbits);
+
+extern int bitmap_parse(const char __user *ubuf, unsigned int ubuflen,
+ unsigned long *maskp, unsigned int nmaskbits);
+
#endif /* __ASSEMBLY__ */

#endif /* __LINUX_BITMAP_H */
diff -Nua 2.6/include/linux/cpumask.h.0 2.6/include/linux/cpumask.h
--- 2.6/include/linux/cpumask.h.0 2004-01-15 12:10:41.000000000 -0500
+++ 2.6/include/linux/cpumask.h 2004-01-15 11:41:26.000000000 -0500
@@ -2,6 +2,7 @@
#define __LINUX_CPUMASK_H

#include <linux/threads.h>
+#include <linux/bitmap.h>
#include <asm/cpumask.h>
#include <asm/bug.h>

@@ -35,16 +36,10 @@
cpu < NR_CPUS; \
cpu = next_online_cpu(cpu,map))

-extern int __mask_snprintf_len(char *buf, unsigned int buflen,
- const unsigned long *maskp, unsigned int maskbytes);
-
#define cpumask_snprintf(buf, buflen, map) \
- __mask_snprintf_len(buf, buflen, cpus_addr(map), sizeof(map))
-
-extern int __mask_parse_len(const char __user *ubuf, unsigned int ubuflen,
- unsigned long *maskp, unsigned int maskbytes);
+ bitmap_snprintf(buf, buflen, cpus_addr(map), NR_CPUS)

#define cpumask_parse(buf, buflen, map) \
- __mask_parse_len(buf, buflen, cpus_addr(map), sizeof(map))
+ bitmap_parse(buf, buflen, cpus_addr(map), NR_CPUS)

#endif /* __LINUX_CPUMASK_H */
diff -Nua 2.6/lib/mask.c.0 2.6/lib/mask.c
--- 2.6/lib/mask.c.0 2004-01-15 12:10:41.000000000 -0500
+++ 2.6/lib/mask.c 1969-12-31 19:00:00.000000000 -0500
@@ -1,178 +0,0 @@
-/*
- * lib/mask.c
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2003 Silicon Graphics, Inc. All Rights Reserved.
- */
-
-/*
- * Routines to manipulate multi-word bit masks, such as cpumasks.
- *
- * The ascii representation of multi-word bit masks displays each
- * 32bit word in hex (not zero filled), and for masks longer than
- * one word, uses a comma separator between words. Words are
- * displayed in big-endian order most significant first. And hex
- * digits within a word are also in big-endian order, of course.
- *
- * Examples:
- * A mask with just bit 0 set displays as "1".
- * A mask with just bit 127 set displays as "80000000,0,0,0".
- * A mask with just bit 64 set displays as "1,0,0".
- * A mask with bits 0, 1, 2, 4, 8, 16, 32 and 64 set displays
- * as "1,1,10117". The first "1" is for bit 64, the second
- * for bit 32, the third for bit 16, and so forth, to the
- * "7", which is for bits 2, 1 and 0.
- * A mask with bits 32 through 39 set displays as "ff,0".
- *
- * The internal binary representation of masks is as one or
- * an array of unsigned longs, perhaps wrapped in a struct for
- * convenient use as an lvalue. The following code doesn't know
- * about any such struct details, relying on inline macros in
- * files such as cpumask.h to pass in an unsigned long pointer
- * and a length (in bytes), describing the mask contents.

- * The 32bit words in the array are in little-endian order,
- * low order word first. Beware that this is the reverse order
- * of the ascii representation.

- *
- * Even though the size of the input mask is provided in bytes,

- * the following code may assume that the mask is a multiple of
- * 32 or 64 bit words long, and ignore any fractional portion
- * of a word at the end. The main reason the size is passed in
- * bytes is because it is so easy to write 'sizeof(somemask_t)'

- * in the macros.
- *
- * Masks are not a single,simple type, like classic 'C'
- * nul-term strings. Rather they are a family of types, one
- * for each different length. Inline macros are used to pick
- * up the actual length, where it is known to the compiler, and
- * pass it down to these routines, which work on any specified
- * length array of unsigned longs. Poor man's templates.
- *
- * Many of the inline macros don't call into the following
- * routines. Some of them call into other kernel routines,
- * such as memset(), set_bit() or ffs(). Some of them can
- * accomplish their task right inline, such as returning the
- * size or address of the unsigned long array, or optimized
- * versions of the macros for the most common case of an array
- * of a single unsigned long.
- */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <linux/gfp.h>
-#include <asm/uaccess.h>
-
-#define MAX_HEX_PER_BYTE 4 /* dont need > 4 hex chars to encode byte */
-#define BASE 16 /* masks are input in hex (base 16) */
-#define NUL ((char)'\0') /* nul-terminator */
-
-/**
- * __mask_snprintf_len - represent multi-word bit mask as string.
- * @buf: The buffer to place the result into
- * @buflen: The size of the buffer, including the trailing null space
- * @maskp: Points to beginning of multi-word bit mask.
- * @maskbytes: Number of bytes in bit mask at maskp.
- *
- * This routine is expected to be called from a macro such as:
- *
- * #define cpumask_snprintf(buf, buflen, mask) \
- * __mask_snprintf_len(buf, buflen, cpus_addr(mask), sizeof(mask))
- */
-
-int __mask_snprintf_len(char *buf, unsigned int buflen,
- const unsigned long *maskp, unsigned int maskbytes)
-{
- u32 *wordp = (u32 *)maskp;
- int i = maskbytes/sizeof(u32) - 1;
- int len = 0;
- char *sep = "";
-

- while (i >= 1 && wordp[i] == 0)

- i--;
- while (i >= 0) {

- len += snprintf(buf+len, buflen-len, "%s%x", sep, wordp[i]);

- sep = ",";
- i--;
- }
- return len;
-}
-
-/**
- * __mask_parse_len - parse user string into maskbytes mask at maskp
- * @ubuf: The user buffer from which to take the string
- * @ubuflen: The size of this buffer, including the terminating char
- * @maskp: Place resulting mask (array of unsigned longs) here
- * @masklen: Construct mask at @maskp to have exactly @masklen bytes
- *
- * @masklen is a multiple of sizeof(unsigned long). A mask of
- * @masklen bytes is constructed starting at location @maskp.
- * The value of this mask is specified by the user provided
- * string starting at address @ubuf. Only bytes in the range
- * [@ubuf, @ubuf+@ubuflen) can be read from user space, and
- * reading will stop after the first byte that is not a comma
- * or valid hex digit in the characters [,0-9a-fA-F], or at
- * the point @ubuf+@ubuflen, whichever comes first.
- *
- * Since the user only needs about 2.25 chars per byte to encode
- * a mask (one char per nibble plus one comma separator or nul
- * terminator per byte), we blow them off with -EINVAL if they
- * claim a @ubuflen more than 4 (MAX_HEX_PER_BYTE) times maskbytes.
- * An empty word (delimited by two consecutive commas, for example)
- * is taken as zero. If @buflen is zero, the entire @maskp is set
- * to zero.
- *
- * If the user provides fewer comma-separated ascii words
- * than there are 32 bit words in maskbytes, we zero fill the
- * remaining high order words. If they provide more, they fail
- * with -EINVAL. Each comma-separate ascii word is taken as
- * a hex representation; leading zeros are ignored, and do not
- * imply octal. '00e1', 'e1', '00E1', 'E1' are all the same.
- * If user passes a word that is larger than fits in a u32,
- * they fail with -EOVERFLOW.
- */
-
-int __mask_parse_len(const char __user *ubuf, unsigned int ubuflen,
- unsigned long *maskp, unsigned int maskbytes)
-{
- char buf[maskbytes * MAX_HEX_PER_BYTE + sizeof(NUL)];
- char *bp = buf;
- u32 *wordp = (u32 *)maskp;
- char *p;
- int i, j;
-
- if (ubuflen > maskbytes * MAX_HEX_PER_BYTE)
- return -EINVAL;
- if (copy_from_user(buf, ubuf, ubuflen))
- return -EFAULT;
- buf[ubuflen] = NUL;
-
- /*
- * Put the words into wordp[] in big-endian order,
- * then go back and reverse them.
- */
- memset(wordp, 0, maskbytes);
- i = j = 0;
- while ((p = strsep(&bp, ",")) != NULL) {
- unsigned long long t;
- if (j == maskbytes/sizeof(u32))
- return -EINVAL;
- t = simple_strtoull(p, 0, BASE);
- if (t != (u32)t)
- return -EOVERFLOW;

- wordp[j++] = t;

- }
- --j;
- while (i < j) {

- u32 t = wordp[i];
- wordp[i] = wordp[j];
- wordp[j] = t;

- i++, --j;
- }
- return 0;
-}

Paul Jackson

unread,

Jan 15, 2004, 5:57:32 PM1/15/04

to

Andrew wrote:
> Gad.

Could you elaborate a bit on this critique, Andrew?

You sketch an alternative, that loops by bit, with an sprintf each
nibble, instead of looping by u32 word. By the time that alternative is
fancied up to handle (optionally) suppression of leading zero words and
(vital, for very long masks) a 32-bit word separator, and various other
details, I doubt that it will be any simpler than the corresponding bit
of code in my patch:

int i = maskbytes/sizeof(u32) - 1;

int len = 0;
char *sep = "";

while (i >= 1 && wordp[M32X(i)] == 0)

i--;
while (i >= 0) {

len += snprintf(buf+len, buflen-len,

"%s%x", sep, wordp[M32X(i)]);
sep = ",";
i--;
}

I am at a loss to understand why the above u32 loop version of this code
is so much worse that it only merits a "Gad".

Did I provide too many comments?

> It is hardly performance-critical.

I quite agree on that. One thing I _do_ try to optimize in most code is
the number of "fussy details". The few operations, conditions, special
cases, and such, the better, given the finite limits of human brain power.

--
I won't rest till it's the best ...
Programmer, Linux Scalability
Paul Jackson <p...@sgi.com> 1.650.933.1373

Paul Jackson

unread,

Jan 15, 2004, 7:21:07 PM1/15/04

to

> This patch captures what I am looking for in bitmap display and input.

Interesting. I appear to have provoked Joe into a burst of coding.
Now, if I had any smarts, I would stand aside and let Joe own this,
just as Bill Irwin did when I posted my initial lib/mask.c patch a
couple months ago.

Andrew:

If you find Joe's coding more to your liking than my "Gad" style,
I will bless this, and after tossing in a few parting shots, will
stand aside. It meets my essential needs, which were:
- chunked output (a comma every 16 or 32 bits),
- symmetric input and output formats, and
- display and parsing code generic to diverse bitmap sizes.

My actual recommendation, if however you are still undecided, is:
- my patch of last night (with the M32X() 64 bit big endian fix),
- Joe's recommended format, zero-filling to chunksize each word,
- Joe's renaming/refactoring from lib/mask.c to lib/bitmap.h, and
- a chunksize of 16 rather than 32 (Joe likes 16, I don't care).

The essential differences between Joe's and my proposals that I see are:
- Joe's has more code, especially in the parsing routine,
- Joe's bitmap size resolution is bits, not words, and
- I use an implied alloca of 4 x sizeof(mask) bytes.

Comments on Joe's patch:

> ChangeLog:

Good job of summarizing for the Changelog the changes.

> o move into the bitmap.h family, rename and refactor interface to match

I think I like this - good.

> o bitmap size resolution changed from byte to bit

Why? This adds a fair bit of complexity to the code, I suspect.
I am not aware of a need for this, but if there is one, ok.

> o chunking (digits between commas) changed from 8 to 4 digits

Ok - either 8 or 4 works for me. This detail should be decided
by those working on 16 to 32 cpu systems, who will notice this
choice the most. Those of us on larger or smaller systems are
going to see, or not see, separators in either case.

> o display no longer affected by sizeof(unsigned long).

A bit of a misstatement. The display was only affected by the
chunksize, one of 32 (sizeof(u32)*8) or 16 (CHUNKSZ).

> o no alloca usage

True. Though on the other hand, you need to roll your own parsing
code of comma-separated chunks, instead of getting by with using
strsep(). So you trade alloca usage for code complexity. Either
way works - coders choice. I agree that we disagree on this tradeoff.

> o works correctly independent of the size of an unsigned long.

And my version doesn't?

> o works on big and little endian machine.

With my M32X() eor-1 fix, so did mine.

> + * lib/bitmask.c - bitmask manipulation routines too big to go into bitmask.h

Typo? Did you mean bitmap.c and bitmap.h, not bitmask?

> +int bitmap_parse(const char __user *ubuf, unsigned int ubuflen,
> + unsigned long *maskp, unsigned int nmaskbits)

This routine has quite a bit more code detail than my corresponding
parsing routine. I hope that:
(1) providing bit-level resolution, and
(2) removing the implied alloca
justifies this increase in code detail.

> + if (isspace(c))
> + continue;

So a space embedded in a hex number is skipped? That is, your code
parses "dead,beef" and "de a d, bee f" the same? This seems strange.
Perhaps you would prefer to suppress only leading spaces in each chunk:

if (!n && isspace(c))
continue;

> + for (j = 0; j < 32; j++) {

What's this "32", an unrepentant CHUNKSZ?

Joe Korty

unread,

Jan 15, 2004, 7:52:30 PM1/15/04

to

On Thu, Jan 15, 2004 at 04:17:32PM -0800, Paul Jackson wrote:
> > This patch captures what I am looking for in bitmap display and input.
>
> Interesting. I appear to have provoked Joe into a burst of coding.
> Now, if I had any smarts, I would stand aside and let Joe own this,
> just as Bill Irwin did when I posted my initial lib/mask.c patch a
> couple months ago.

I would prefer that we provoke each other until Andrew finds something
that he likes:)

> If you find Joe's coding more to your liking than my "Gad" style,
> I will bless this, and after tossing in a few parting shots, will
> stand aside. It meets my essential needs, which were:

I like the output parser but am not too happy with the input parser.
I would prefer that the commas on input be optional, and when present
silently skipped over. That would greatly simplify the code and has
the added benefit that masks generated by programs (which by default
have no commas) be feedable to the parser, while still supporting
the human readable form for input.

>> o bitmap size resolution changed from byte to bit
>
> Why? This adds a fair bit of complexity to the code, I suspect.
> I am not aware of a need for this, but if there is one, ok.

I think it important that we display exactly what the bitmask represents,
no more and no less. This is a philosophical point, to be sure.

In any case I think we should not have the display change from one
machine to the next, simply because the size of the underlying
'unsigned long' changed underneath us. The fact that bitmasks are
encoded in unsigned longs is entirely an internal bitmask.h affair.

> So a space embedded in a hex number is skipped? That is, your code
> parses "dead,beef" and "de a d, bee f" the same? This seems strange.
> Perhaps you would prefer to suppress only leading spaces in each chunk:
>
> if (!n && isspace(c))
> continue;

Yes, this would be better.

>> + for (j = 0; j < 32; j++) {
> What's this "32", an unrepentant CHUNKSZ?

Oops. Thanks, good catch.

--
"Money can buy bandwidth, but latency is forever" -- John Mashey
Joe

Andrew Morton

unread,

Jan 15, 2004, 8:09:26 PM1/15/04

to

Paul Jackson <p...@sgi.com> wrote:
>
> Andrew wrote:
> > Gad.
>
> Could you elaborate a bit on this critique, Andrew?

The code seems very complex for such a conceptually simple problem.

And doubts remain whether it is correct on particular wordsize/endianness
machines.

It seems that there is a layering violation in which this code is poking
around inside representations which should known only to the particular
architecture. Hence, if we can find a way to provide this function using
existing architecture API's (test_bit) then everything is solved.

If this code needs some arch-provided support function then so be it.
That's better than adding a tangle of ifdefs in generic code.

> Did I provide too many comments?

You can never provide too many comments!

Anyway, please wake me up again when you and Joe have finished. (I agree
that making that function take a number of bits is better than taking a
number of bytes btw).

Paul Jackson

unread,

Jan 15, 2004, 8:53:01 PM1/15/04

to

Joe wrote:
> I would prefer that we provoke each other until Andrew finds something
> that he likes:)

Drat ;). Perhaps Andrew will elaborate on his "gad", and I will have
some clue of whether it is worth responding with another variant.
Clearly something in my last patch looked ugly to him.

> I would prefer that the commas on input be optional, and when present
> silently skipped over.

This wasn't an option before, in my variations that didn't zero-fill each
chunk. But with your zero-fill, which I agree is better, this is possible.

Not a bad idea. Another instance of Postel's Prescription:

Be liberal in what you accept, and conservative in what you send.
http://www.postel.org/postel.html
Referenced in: Basics of the Unix Philosophy
http://www.faqs.org/docs/artu/ch01s06.html

Though possibly a violation of the Rule of Composition:

Design programs to be connected to other programs.

If commas are optional on input, then a user level program that
generates such masks could not always feed them to another user level
program that reads them. The reader might require exact comma placement
that the producer didn't provide.

Since the primary "documentation" for this format will be what is
displayed when someone does "cat /proc/irq/prof_cpu_mask", I rather
think that the design format should be exactly what displays here,
including exact comma separation, both displaying and parsing.

> I think it important that we display exactly what the bitmask represents,
> no more and no less. This is a philosophical point, to be sure.

Purely philosophical, as best as I can tell. All the other bitmask
stuff works on objects that are some number of words in size. I see no
use whatsoever for this additional complexity.

Unless you can show me a need for this, I think it is overdesign.

> In any case I think we should not have the display change from one
> machine to the next, simply because the size of the underlying
> 'unsigned long' changed underneath us.

Huh? I am unware that my code displays differently depending on whether
the underling "unsigned long" is 4 or 8 bytes.

--
I won't rest till it's the best ...
Programmer, Linux Scalability
Paul Jackson <p...@sgi.com> 1.650.933.1373

Paul Jackson

unread,

Jan 15, 2004, 9:58:47 PM1/15/04

to

Thanks for the feedback, Andrew.

Two points you made that I now agree with:

* Better that this code uses existing arch-dependent bitops,
than that it have code pretending to be generic hiding
additional arch specific dependencies.

* Having just realized that the other existing include/bitmap.h
calls take a count of bits, not bytes, I now agree with you
and Joe that it should be bit counts.

> You can never provide too many comments!

Good - I'm on solid ground there ;)

> Anyway, please wake me up again when you and Joe have finished.

ok

Paul Jackson

unread,

Jan 16, 2004, 12:17:28 AM1/16/04

to

I give, Joe. Given the several details that are better with your
solution, I endorse your solution, with the couple of minor edits you
have in the pipeline.

It pains me to see the minor code growth (parsing went from 391 bytes
of machine code to 625), with non-trivial code duplication of the
simple_stroull() routine, and admitted increase in code complexity.

But, yes, better bits than bytes, better not to alloca(), and
better using existing bitops than misplaced arch dependencies.

And better you than me ... tag - you're it <grin>.

Bonus question:

Should any of the other inline routines in include/bitmap.h
be moved to your new file lib/bitmap.c?

Others have commented that too much stuff is marked inline,
and this might be such a case.

For example, I count about a dozen copies of bitmask_empty(),
mostly as cpus_empty(), in various generic and i386 files,
each one worth perhaps 80 bytes of kernel text space.

Andrew Morton

unread,

Jan 16, 2004, 12:31:10 AM1/16/04

to

Paul Jackson <p...@sgi.com> wrote:
>
> Should any of the other inline routines in include/bitmap.h
> be moved to your new file lib/bitmap.c?

Yup. The below patch will be in 2.6.1-mm4:

uninline bitmap functions

- A couple of them are using alloca (via DECLARE_BITMAP) and this generates
a cannot-inline warning with -Winline.

- These functions are too big to inline anwyay.

---

include/linux/bitmap.h | 140 ++++---------------------------------------------
lib/Makefile | 3 -
lib/bitmap.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 156 insertions(+), 127 deletions(-)

diff -puN /dev/null lib/bitmap.c
--- /dev/null 2002-08-30 16:31:37.000000000 -0700
+++ 25-akpm/lib/bitmap.c 2004-01-14 02:52:02.000000000 -0800
@@ -0,0 +1,140 @@
+#include <linux/bitmap.h>
+#include <linux/module.h>
+
+int bitmap_empty(const unsigned long *bitmap, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if (bitmap[k] & ((1UL << (bits % BITS_PER_LONG)) - 1))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL(bitmap_empty);
+
+int bitmap_full(const unsigned long *bitmap, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (~bitmap[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if (~bitmap[k] & ((1UL << (bits % BITS_PER_LONG)) - 1))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL(bitmap_full);
+
+int bitmap_equal(const unsigned long *bitmap1,
+ unsigned long *bitmap2, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;;
+ for (k = 0; k < lim; ++k)
+ if (bitmap1[k] != bitmap2[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if ((bitmap1[k] ^ bitmap2[k]) &
+ ((1UL << (bits % BITS_PER_LONG)) - 1))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL(bitmap_equal);
+
+void bitmap_complement(unsigned long *bitmap, int bits)
+{
+ int k;
+
+ for (k = 0; k < BITS_TO_LONGS(bits); ++k)
+ bitmap[k] = ~bitmap[k];
+}
+EXPORT_SYMBOL(bitmap_complement);
+
+void bitmap_shift_right(unsigned long *dst,
+ const unsigned long *src, int shift, int bits)
+{
+ int k;
+ DECLARE_BITMAP(__shr_tmp, bits);
+
+ bitmap_clear(__shr_tmp, bits);
+ for (k = 0; k < bits - shift; ++k)
+ if (test_bit(k + shift, src))
+ set_bit(k, __shr_tmp);
+ bitmap_copy(dst, __shr_tmp, bits);
+}
+EXPORT_SYMBOL(bitmap_shift_right);
+
+void bitmap_shift_left(unsigned long *dst,
+ const unsigned long *src, int shift, int bits)
+{
+ int k;
+ DECLARE_BITMAP(__shl_tmp, bits);
+
+ bitmap_clear(__shl_tmp, bits);
+ for (k = bits; k >= shift; --k)
+ if (test_bit(k - shift, src))
+ set_bit(k, __shl_tmp);
+ bitmap_copy(dst, __shl_tmp, bits);
+}
+EXPORT_SYMBOL(bitmap_shift_left);
+
+void bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] & bitmap2[k];
+}
+EXPORT_SYMBOL(bitmap_and);
+
+void bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] | bitmap2[k];
+}
+EXPORT_SYMBOL(bitmap_or);
+
+#if BITS_PER_LONG == 32
+int bitmap_weight(const unsigned long *bitmap, int bits)
+{
+ int k, w = 0, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; k++)
+ w += hweight32(bitmap[k]);
+
+ if (bits % BITS_PER_LONG)
+ w += hweight32(bitmap[k] &
+ ((1UL << (bits % BITS_PER_LONG)) - 1));
+
+ return w;
+}
+#else
+int bitmap_weight(const unsigned long *bitmap, int bits)
+{
+ int k, w = 0, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; k++)
+ w += hweight64(bitmap[k]);
+
+ if (bits % BITS_PER_LONG)
+ w += hweight64(bitmap[k] &
+ ((1UL << (bits % BITS_PER_LONG)) - 1));
+
+ return w;
+}
+#endif
+EXPORT_SYMBOL(bitmap_weight);
+
diff -puN include/linux/bitmap.h~uninline-bitmap-functions include/linux/bitmap.h
--- 25/include/linux/bitmap.h~uninline-bitmap-functions 2004-01-14 02:36:26.000000000 -0800
+++ 25-akpm/include/linux/bitmap.h 2004-01-14 02:36:26.000000000 -0800
@@ -10,57 +10,11 @@
#include <linux/bitops.h>
#include <linux/string.h>

-static inline int bitmap_empty(const unsigned long *bitmap, int bits)
-{
- int k, lim = bits/BITS_PER_LONG;
- for (k = 0; k < lim; ++k)
- if (bitmap[k])
- return 0;
-
- if (bits % BITS_PER_LONG)
- if (bitmap[k] & ((1UL << (bits % BITS_PER_LONG)) - 1))
- return 0;
-
- return 1;
-}
-
-static inline int bitmap_full(const unsigned long *bitmap, int bits)
-{
- int k, lim = bits/BITS_PER_LONG;
- for (k = 0; k < lim; ++k)
- if (~bitmap[k])
- return 0;
-
- if (bits % BITS_PER_LONG)
- if (~bitmap[k] & ((1UL << (bits % BITS_PER_LONG)) - 1))
- return 0;
-
- return 1;
-}
-
-static inline int bitmap_equal(const unsigned long *bitmap1,
- unsigned long *bitmap2, int bits)
-{
- int k, lim = bits/BITS_PER_LONG;;
- for (k = 0; k < lim; ++k)
- if (bitmap1[k] != bitmap2[k])
- return 0;
-
- if (bits % BITS_PER_LONG)
- if ((bitmap1[k] ^ bitmap2[k]) &
- ((1UL << (bits % BITS_PER_LONG)) - 1))
- return 0;
-
- return 1;
-}
-
-static inline void bitmap_complement(unsigned long *bitmap, int bits)
-{
- int k;
-
- for (k = 0; k < BITS_TO_LONGS(bits); ++k)
- bitmap[k] = ~bitmap[k];
-}
+int bitmap_empty(const unsigned long *bitmap, int bits);
+int bitmap_full(const unsigned long *bitmap, int bits);
+int bitmap_equal(const unsigned long *bitmap1,
+ unsigned long *bitmap2, int bits);
+void bitmap_complement(unsigned long *bitmap, int bits);

static inline void bitmap_clear(unsigned long *bitmap, int bits)
{
@@ -78,81 +32,15 @@ static inline void bitmap_copy(unsigned
memcpy(dst, src, BITS_TO_LONGS(bits)*sizeof(unsigned long));
}

-static inline void bitmap_shift_right(unsigned long *dst,
- const unsigned long *src, int shift, int bits)
-{
- int k;
- DECLARE_BITMAP(__shr_tmp, bits);
-
- bitmap_clear(__shr_tmp, bits);
- for (k = 0; k < bits - shift; ++k)
- if (test_bit(k + shift, src))
- set_bit(k, __shr_tmp);
- bitmap_copy(dst, __shr_tmp, bits);
-}
-
-static inline void bitmap_shift_left(unsigned long *dst,
- const unsigned long *src, int shift, int bits)
-{
- int k;
- DECLARE_BITMAP(__shl_tmp, bits);
-
- bitmap_clear(__shl_tmp, bits);
- for (k = bits; k >= shift; --k)
- if (test_bit(k - shift, src))
- set_bit(k, __shl_tmp);
- bitmap_copy(dst, __shl_tmp, bits);
-}
-
-static inline void bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
-{
- int k;
- int nr = BITS_TO_LONGS(bits);
-
- for (k = 0; k < nr; k++)
- dst[k] = bitmap1[k] & bitmap2[k];
-}
-
-static inline void bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
-{
- int k;
- int nr = BITS_TO_LONGS(bits);
-
- for (k = 0; k < nr; k++)
- dst[k] = bitmap1[k] | bitmap2[k];
-}
-
-#if BITS_PER_LONG == 32
-static inline int bitmap_weight(const unsigned long *bitmap, int bits)
-{
- int k, w = 0, lim = bits/BITS_PER_LONG;
-
- for (k = 0; k < lim; k++)
- w += hweight32(bitmap[k]);
-
- if (bits % BITS_PER_LONG)
- w += hweight32(bitmap[k] &
- ((1UL << (bits % BITS_PER_LONG)) - 1));
-
- return w;
-}
-#else
-static inline int bitmap_weight(const unsigned long *bitmap, int bits)
-{
- int k, w = 0, lim = bits/BITS_PER_LONG;
-
- for (k = 0; k < lim; k++)
- w += hweight64(bitmap[k]);
-
- if (bits % BITS_PER_LONG)
- w += hweight64(bitmap[k] &
- ((1UL << (bits % BITS_PER_LONG)) - 1));
-
- return w;
-}
-#endif
+void bitmap_shift_right(unsigned long *dst,
+ const unsigned long *src, int shift, int bits);
+void bitmap_shift_left(unsigned long *dst,
+ const unsigned long *src, int shift, int bits);
+void bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+void bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+int bitmap_weight(const unsigned long *bitmap, int bits);

#endif /* __ASSEMBLY__ */

diff -puN lib/Makefile~uninline-bitmap-functions lib/Makefile
--- 25/lib/Makefile~uninline-bitmap-functions 2004-01-14 02:36:26.000000000 -0800
+++ 25-akpm/lib/Makefile 2004-01-14 02:36:26.000000000 -0800
@@ -5,7 +5,8 @@

lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
- kobject.o idr.o div64.o parser.o int_sqrt.o mask.o

+ kobject.o idr.o div64.o parser.o int_sqrt.o mask.o \
+ bitmap.o

lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o

_

William Lee Irwin III

unread,

Jan 16, 2004, 12:56:02 AM1/16/04

to

Paul Jackson <p...@sgi.com> wrote:
>> Should any of the other inline routines in include/bitmap.h
>> be moved to your new file lib/bitmap.c?

On Thu, Jan 15, 2004 at 09:26:13PM -0800, Andrew Morton wrote:
> Yup. The below patch will be in 2.6.1-mm4:
> uninline bitmap functions
> - A couple of them are using alloca (via DECLARE_BITMAP) and this generates
> a cannot-inline warning with -Winline.
> - These functions are too big to inline anwyay.

Uninlining is good (I originally wanted to avoid controversy by not
having them compiled in unless they were called), but it's also possible
to remove the stack allocations entirely with more sophisticated
implementations. These are actually quite dumb as implementations of the
bitmap operations, and meant to look simple as opposed to performing well
or being well-behaved with respect to stackspace.

-- wli

Andi Kleen

unread,

Jan 16, 2004, 3:28:35 AM1/16/04

to

Andrew Morton <ak...@osdl.org> writes:

> +void bitmap_complement(unsigned long *bitmap, int bits)
> +{
> + int k;
> +
> + for (k = 0; k < BITS_TO_LONGS(bits); ++k)
> + bitmap[k] = ~bitmap[k];
> +}
> +EXPORT_SYMBOL(bitmap_complement);

I actually had to change that one in my NUMA API patchkit (which uses
bitmap functions for its node maps), because gcc 3.2 miscompiled the
loop.

Please add something like that (looks silly, but makes a big
difference):

static inline void bitmap_complement(unsigned long *bitmap, int bits)

{
int k;
+ int max = BITS_TO_LONGS(bits);

- for (k = 0; k < BITS_TO_LONGS(bits); ++k)

+ for (k = 0; k < max; ++k)

bitmap[k] = ~bitmap[k];

-Andi

Andrew Morton

unread,

Jan 16, 2004, 3:38:38 AM1/16/04

to

Andi Kleen <a...@muc.de> wrote:
>
> Andrew Morton <ak...@osdl.org> writes:
>
> > +void bitmap_complement(unsigned long *bitmap, int bits)
> > +{
> > + int k;
> > +
> > + for (k = 0; k < BITS_TO_LONGS(bits); ++k)
> > + bitmap[k] = ~bitmap[k];
> > +}
> > +EXPORT_SYMBOL(bitmap_complement);
>
> I actually had to change that one in my NUMA API patchkit (which uses
> bitmap functions for its node maps), because gcc 3.2 miscompiled the
> loop.
>
> Please add something like that (looks silly, but makes a big
> difference):
>
> static inline void bitmap_complement(unsigned long *bitmap, int bits)
> {
> int k;
> + int max = BITS_TO_LONGS(bits);
>
> - for (k = 0; k < BITS_TO_LONGS(bits); ++k)
> + for (k = 0; k < max; ++k)
> bitmap[k] = ~bitmap[k];

OK. bitmap_and() and bitmap_or() were converted to this form a while back
because they too were hitting the bug. On ia32. It might no longer
happen now they're uninlined but whatever - you don't have to look
at the code and think "gee, I hope the compiler moves that arith out
of the loop".

Andi Kleen

unread,

Jan 16, 2004, 5:19:29 AM1/16/04

to

> > static inline void bitmap_complement(unsigned long *bitmap, int bits)
> > {
> > int k;
> > + int max = BITS_TO_LONGS(bits);
> >
> > - for (k = 0; k < BITS_TO_LONGS(bits); ++k)
> > + for (k = 0; k < max; ++k)
> > bitmap[k] = ~bitmap[k];
>
> OK. bitmap_and() and bitmap_or() were converted to this form a while back
> because they too were hitting the bug. On ia32. It might no longer
> happen now they're uninlined but whatever - you don't have to look
> at the code and think "gee, I hope the compiler moves that arith out
> of the loop".

BTW I think the original point of the inlining was that when you have bits ==
BITS_PER_LONG the function could collapse to a single ~ operation using
the compiler's optimizer. That would be the case with nodemasks on x86-64.
As for the NUMA API i don't care particularly because there is no bitmap
manipulation in any fast path. Avoiding miscompilations is definitely
higher priority.

-Andi

Joe Korty

unread,

Jan 16, 2004, 9:28:31 AM1/16/04

to

On Thu, Jan 15, 2004 at 09:14:02PM -0800, Paul Jackson wrote:
> I give, Joe. Given the several details that are better with your
> solution, I endorse your solution, with the couple of minor edits you
> have in the pipeline.
>
> It pains me to see the minor code growth (parsing went from 391 bytes
> of machine code to 625), with non-trivial code duplication of the
> simple_stroull() routine, and admitted increase in code complexity.
>
> But, yes, better bits than bytes, better not to alloca(), and
> better using existing bitops than misplaced arch dependencies.

First of all, I don't like my parser anymore so I hope you don't back
out, Paul. Perhaps all that is needed to make your parser acceptable
to Andrew is 1) tweak it to use bitmap_shift_right / set_bit, and 2)
use nbits in the interface but immediately convert it to the nbytes that
the algorithm actually wants.

Over the weekend, I may poke at my version and look over yours again
and perhaps yet a third version will come out of this. Which is a good
thing since lots of choices to pick and merge from is what is best for
Andrew and for Linux.

Joe

Matthew Dobson

unread,

Jan 16, 2004, 6:39:56 PM1/16/04

to

FWIW, I think a 32-bit chunk-size is preferable. I personally don't
think "dead,beef" is easier to read than "deadbeef", nor "0073,0f0f"
compared to "00730f0f". On the small CPU count, ie: <=32, either
version is pretty readable, but on larger CPU count boxen you're going
to overflow your brain counting groups of 4 versus groups of 8. I can't
believe I just called a 32 CPU box "small". My world perspective is a
bit skewed... ;)

Also, on the input side, a lot of apps will output a 32-bit CPU mask.
With commas separating every 32 bits, we can feed an "uncommafied" mask
to the kernel and it won't barf. If we go with 16-bit chunks, we'll
have to "commafy" these 32-bit bitmasks to feed them to the kernel.

As a NUMA guy who deals with largish CPU count machines daily, that's my
2 cents...

-Matt

Paul Jackson wrote:
>>This patch captures what I am looking for in bitmap display and input.
>
>
> Interesting. I appear to have provoked Joe into a burst of coding.
> Now, if I had any smarts, I would stand aside and let Joe own this,
> just as Bill Irwin did when I posted my initial lib/mask.c patch a
> couple months ago.
>
>

> Andrew:

>
> If you find Joe's coding more to your liking than my "Gad" style,
> I will bless this, and after tossing in a few parting shots, will
> stand aside. It meets my essential needs, which were:

> - chunked output (a comma every 16 or 32 bits),
> - symmetric input and output formats, and
> - display and parsing code generic to diverse bitmap sizes.
>
> My actual recommendation, if however you are still undecided, is:
> - my patch of last night (with the M32X() 64 bit big endian fix),
> - Joe's recommended format, zero-filling to chunksize each word,
> - Joe's renaming/refactoring from lib/mask.c to lib/bitmap.h, and
> - a chunksize of 16 rather than 32 (Joe likes 16, I don't care).
>
> The essential differences between Joe's and my proposals that I see are:
> - Joe's has more code, especially in the parsing routine,
> - Joe's bitmap size resolution is bits, not words, and
> - I use an implied alloca of 4 x sizeof(mask) bytes.
>
>
> Comments on Joe's patch:
>
>
>>ChangeLog:
>
>
> Good job of summarizing for the Changelog the changes.
>
>
>>o move into the bitmap.h family, rename and refactor interface to match
>
>
> I think I like this - good.
>
>

>> o bitmap size resolution changed from byte to bit
>
>
> Why? This adds a fair bit of complexity to the code, I suspect.
> I am not aware of a need for this, but if there is one, ok.
>
>

> So a space embedded in a hex number is skipped? That is, your code
> parses "dead,beef" and "de a d, bee f" the same? This seems strange.
> Perhaps you would prefer to suppress only leading spaces in each chunk:
>
> if (!n && isspace(c))
> continue;
>
>

>>+ for (j = 0; j < 32; j++) {
>
>
> What's this "32", an unrepentant CHUNKSZ?
>

Paul Jackson

unread,

Jan 17, 2004, 4:16:12 AM1/17/04

to

> I think a 32-bit chunk-size is preferable.

I see that Joe has been persuaded by your points,
in his latest patch.

Good - 32 it is. Thanks.

--
I won't rest till it's the best ...
Programmer, Linux Scalability
Paul Jackson <p...@sgi.com> 1.650.933.1373

Paul Jackson

unread,

Jan 17, 2004, 5:10:32 AM1/17/04

to

Joe wrote:
> First of all, I don't like my parser anymore so I hope you don't back
> out, Paul.

Well, I see Joe has already refined his version, as version 3
in my inbox.

Here is a half-baked refinement of mine, based on Joe's version 2
of a couple days ago:

o It is called with a bit count, not a byte count.
o It uses kmalloc()/kfree(), instead of implied alloca().
o The M32X() eor-1 index swizzling was moved to linux/bitops.h
o The CHUNKSZ is 32. Thanks, Matthew.

However:
- The parsing code is in my u32 word style, but the printing
code in Joe's bit fiddling style.
- It has never been tested.
- It is now only moderately "simpler" than Joe's version 3,
in my view. Source line count may well favor Joe's by
now.
- While I moved the ifdef'd defines of the magic eor-1 index
macro to linux/bitops.h, it's still an ifdef'd define,
because I still think that is the most robust and maintainable
way of stating this detail.
- It has not been upgraded with any relavent refinements from
Joe's version 3.

My take is that we should go with Joe's. Personally, I don't like, and
avoid at great effort, bit coding. However, this stuff _is_ all about
bits, and for the purposes of maintainability, I conjecture that it is
better to have code in a style that future hackers of this code are most
likely to be comfortable with. My careful use of C types to avoid bit
details is a minor negative, even if it does reduce the code size a
little.

If there is a concensus to the contrary, and a hue and cry for my u32
word style of coding, then the printing routine should be redone in the
same style, and the other negatives above addressed, before this is
accepted.

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:

# ChangeSet 1.1506 -> 1.1507
# lib/bitmap.c 1.1 -> 1.2
# include/linux/bitops.h 1.6 -> 1.7

#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------

# 04/01/17 p...@sgi.com 1.1507
# rework bitmap_parse - takes bits, u32 word style algorithm
# --------------------------------------------
#
diff -Nru a/include/linux/bitops.h b/include/linux/bitops.h
--- a/include/linux/bitops.h Sat Jan 17 01:47:35 2004
+++ b/include/linux/bitops.h Sat Jan 17 01:47:35 2004
@@ -130,4 +130,29 @@
return sizeof(w) == 4 ? generic_hweight32(w) : generic_hweight64(w);
}

+/*
+ * Bitops apply to arrays of unsigned long. This is almost
+ * the same as an array of unsigned ints, except on 64 bit big
+ * endian architectures, in which the two 32-bit int halves of
+ * each long are reversed (big 32-bit halfword first, naturally).
+ *
+ * Use this BIT32X (for "BITop 32-bit indeX") macro to index the
+ * i-th word of a bit mask declared as an array of 32 bit words.

+ *
+ * Usage example accessing 32-bit words in mask[] in order,
+ * smallest first:
+ * u32 mask[MASKLENGTH];
+ * int i;
+ * for (i = 0; i < MASKLENGTH; i++)

+ * ... mask[BIT32X(i)] ...
+ */
+#ifndef BIT32X

+#include <asm/byteorder.h>
+#if BITS_PER_LONG == 64 && defined(__BIG_ENDIAN)

+#define BIT32X(i) ((i)^1)

+#elif BITS_PER_LONG == 32 || defined(__LITTLE_ENDIAN)

+#define BIT32X(i) (i)
+#endif
+#endif
+
#endif
diff -Nru a/lib/bitmap.c b/lib/bitmap.c
--- a/lib/bitmap.c Sat Jan 17 01:47:35 2004
+++ b/lib/bitmap.c Sat Jan 17 01:47:35 2004
@@ -11,11 +11,12 @@
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/bitmap.h>
+#include <linux/slab.h>
#include <asm/uaccess.h>

-#define CHUNKSZ 16
+#define CHUNKSZ 32

-#define ROUNDUP_POWER2(val,modulus) (((val) + (modulus) - 1) & ~((modulus) - 1))
+#define ROUND_UP(val,modulus) (((val) + (modulus) - 1) & ~((modulus) - 1))

/**
@@ -25,8 +26,8 @@

* @maskp: pointer to bitmap to convert

* @nmaskbits: size of bitmap, in bits

*
- * Exactly nmaskbits bits are displayed. Hex digits are grouped into
- * fours seperated by commas.
+ * Exactly nmaskbits bits are displayed. Hex digits are put
+ * in groups of eight, seperated by commas.
*/

int bitmap_snprintf(char *buf, unsigned int buflen,

@@ -42,7 +43,7 @@
if (chunksz == 0)
chunksz = CHUNKSZ;

- i = ROUNDUP_POWER2(nmaskbits, CHUNKSZ) - CHUNKSZ;
+ i = ROUND_UP(nmaskbits, CHUNKSZ) - CHUNKSZ;

for (; i >= 0; i -= CHUNKSZ) {

chunkmask = ((1ULL << chunksz) - 1);

word = i / BITS_PER_LONG;

@@ -55,6 +56,11 @@
}
return len;
}
+
+#define MAX_HEX_PER_BYTE 4 /* dont need > 4 hex chars to encode byte */
+#define BASE 16 /* masks are input in hex (base 16) */
+#define NUL ((char)'\0') /* nul-terminator */
+
/**

* bitmap_parse - convert an ASCII hex string into a bitmap.

* @buf: pointer to buffer in user space containing string.

@@ -63,68 +69,89 @@
* @maskp: pointer to bitmap that will contain result.
* @nmaskbits: size of bitmap, in bits.
*
-* Commas group hex digits into chunks. Each chunk defines exactly 16
+* Commas group hex digits into chunks. Each chunk defines exactly 32

* bits of the resultant bitmask. No chunk may specify a value larger

-* than 16 bits (-EOVERFLOW), and if a chunk specifies a smaller value
+* than 32 bits (-EOVERFLOW), and if a chunk specifies a smaller value
* leading 0-bits are appended.
+*
+* Since the user only needs about 2.25 chars per byte to encode
+* a mask (one char per nibble plus one comma separator or nul
+* terminator per byte), we blow them off with -EINVAL if they
+* claim a @ubuflen more than 4 (MAX_HEX_PER_BYTE) times maskbytes.
+* An empty word (delimited by two consecutive commas, for example)
+* is taken as zero. If @buflen is zero, the entire @maskp is set
+* to zero.
+*
+* If the user provides fewer comma-separated ascii words
+* than there are 32 bit words in maskbytes, we zero fill the
+* remaining high order words. If they provide more, they fail
+* with -EINVAL. Each comma-separate ascii word is taken as
+* a hex representation; leading zeros are ignored, and do not
+* imply octal. '00e1', 'e1', '00E1', 'E1' are all the same.
+* If user passes a word that is larger than fits in a u32,
+* they fail with -EOVERFLOW.
*/
+

int bitmap_parse(const char __user *ubuf, unsigned int ubuflen,

unsigned long *maskp, unsigned int nmaskbits)

{
- int i, j, c, d, n, nt;
- int nchunks = ROUNDUP_POWER2(nmaskbits, CHUNKSZ) / CHUNKSZ;
- u32 chunk, firstchunk = 0;
- int chunkoff = nmaskbits & (CHUNKSZ - 1);
- u32 chunkmask = ~((1U << chunkoff) - 1);
-
- bitmap_clear(maskp, nmaskbits);
-
- i = nt = 0;
- while (ubuflen) {
- chunk = c = n = 0;
- while (ubuflen) {
- if (get_user(c, ubuf++))
- return -EFAULT;
- ubuflen--;
- if (!c || c == ',')
- break;
- if (isspace(c))
- continue;
- if (!isxdigit(c))
- return -EINVAL;
- if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1))
- return -EOVERFLOW;
- d = isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10);
- chunk = (chunk << 4) | d;
- n++; nt++;
- }
- if (n==0) {
- if (c == ',')
- return -EINVAL;
- if (!c)
- break;
+ char *buf; /* copy user's ubuf[] to here */
+ char *bp; /* strsep() cursor over buf[] */
+ unsigned maskbytes; /* bytes in nmaskbits, round to sizeof(u32) */
+ u32 *wordp; /* access output maskp as array of u32 */
+ char *p; /* scan comma separated input hex words */
+ int i, j; /* index output wordp[] */
+ int lwbits; /* num bits ok set in last word (0 == all) */
+ int ret; /* return value */
+
+ maskbytes = ROUND_UP(nmaskbits, CHUNKSZ)/CHUNKSZ;
+ if (ubuflen > maskbytes * MAX_HEX_PER_BYTE)
+ return -EINVAL;
+ buf = kmalloc(maskbytes * MAX_HEX_PER_BYTE + sizeof(NUL), GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ if (copy_from_user(buf, ubuf, ubuflen)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ buf[ubuflen] = NUL;
+
+ /*
+ * Put the words into wordp[] in big-endian order,
+ * make sure no bits above nmaskbits set in last word,
+ * and then go back and reverse the words.
+ */
+ wordp = (u32 *)maskp;
+ memset(maskp, 0, ROUND_UP(maskbytes, sizeof(*maskp)));
+ i = j = 0;
+ bp = buf;
+ while ((p = strsep(&bp, ",")) != NULL) {
+ unsigned long long t;
+ if (j == maskbytes/sizeof(u32)) {
+ ret = -EINVAL;
+ goto out;
}
- if (i >= nchunks)
- return -EOVERFLOW;
- if (i > 0 || chunk) {
- bitmap_shift_right(maskp, maskp, CHUNKSZ, nmaskbits);
- for (j = 0; j < 32; j++) {
- if (chunk & (1 << j)) {
- set_bit(j, maskp);
- }
- }
- i++;
- if (i == 1)
- firstchunk = chunk;
- if ( i == nchunks) {
- if (chunkoff && (firstchunk & chunkmask)) {
- /* input value > 2^nmaskbits -1 */
- return -EOVERFLOW;
- }
- }
+ t = simple_strtoull(p, 0, BASE);
+ if (t != (u32)t) {
+ ret = -EOVERFLOW;
+ goto out;
}
+ wordp[BIT32X(j++)] = t;
}
- if (nt == 0)
- return -EINVAL;
- return 0;
+ lwbits = nmaskbits % CHUNKSZ;
+ if (lwbits && (wordp[0] & ~((1<<lwbits) - 1))) {
+ ret = -EOVERFLOW;
+ goto out;
+ }
+ --j;
+ while (i < j) {
+ u32 t = wordp[BIT32X(i)];
+ wordp[BIT32X(i)] = wordp[BIT32X(j)];
+ wordp[BIT32X(j)] = t;
+ i++, --j;
+ }
+ ret = 0;
+ out:
+ kfree(buf);
+ return ret;

seperator error in __mask_snprintf_len

Joe Korty

Paul Jackson

Paul Jackson

Andrew Morton

Joe Korty

Paul Jackson

Paul Jackson

Joe Korty

Andrew Morton

Paul Jackson

Paul Jackson

Paul Jackson

Andrew Morton

William Lee Irwin III

Andi Kleen

Andrew Morton

Andi Kleen

Joe Korty

Matthew Dobson

Paul Jackson

Paul Jackson