git: 27e60668bf29 - stable/13 - libc: Add strverscmp(3) and versionsort(3)

23 views
Skip to first unread message

Konstantin Belousov

unread,
Aug 30, 2022, 9:33:06 PM8/30/22
to src-com...@freebsd.org, dev-commi...@freebsd.org, dev-commits-...@freebsd.org
The branch stable/13 has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=27e60668bf29dc4da3409168967974e7ee3ffddc

commit 27e60668bf29dc4da3409168967974e7ee3ffddc
Author: Aymeric Wibo <obi...@gmail.com>
AuthorDate: 2022-08-24 23:20:13 +0000
Commit: Konstantin Belousov <k...@FreeBSD.org>
CommitDate: 2022-08-31 01:20:28 +0000

libc: Add strverscmp(3) and versionsort(3)

(cherry picked from commit 05c9a0158f6837bb3a3781e4ed75f66115f6415a)
---
include/dirent.h | 1 +
include/string.h | 1 +
lib/libc/gen/Makefile.inc | 3 +-
lib/libc/gen/Symbol.map | 1 +
lib/libc/gen/scandir.3 | 22 +++++++-
lib/libc/gen/scandir.c | 7 +++
lib/libc/string/Makefile.inc | 4 +-
lib/libc/string/Symbol.map | 1 +
lib/libc/string/strverscmp.3 | 56 ++++++++++++++++++++
lib/libc/string/strverscmp.c | 91 ++++++++++++++++++++++++++++++++
lib/libc/tests/string/Makefile | 5 +-
lib/libc/tests/string/strverscmp_test.c | 93 +++++++++++++++++++++++++++++++++
12 files changed, 278 insertions(+), 7 deletions(-)

diff --git a/include/dirent.h b/include/dirent.h
index 047206258471..751a016838c0 100644
--- a/include/dirent.h
+++ b/include/dirent.h
@@ -108,6 +108,7 @@ int alphasort(const struct dirent **, const struct dirent **);
int dirfd(DIR *);
#endif
#if __BSD_VISIBLE
+int versionsort(const struct dirent **, const struct dirent **);
DIR *__opendir2(const char *, int);
int fdclosedir(DIR *);
ssize_t getdents(int, char *, size_t);
diff --git a/include/string.h b/include/string.h
index afa90c3f2536..29683eefa15b 100644
--- a/include/string.h
+++ b/include/string.h
@@ -81,6 +81,7 @@ char *strcat(char * __restrict, const char * __restrict);
char *strchr(const char *, int) __pure;
#if __BSD_VISIBLE
char *strchrnul(const char*, int) __pure;
+int strverscmp(const char *, const char *) __pure;
#endif
int strcmp(const char *, const char *) __pure;
int strcoll(const char *, const char *);
diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc
index a8037564a095..6b1588b4f781 100644
--- a/lib/libc/gen/Makefile.inc
+++ b/lib/libc/gen/Makefile.inc
@@ -495,7 +495,8 @@ MLINKS+=rand48.3 _rand48.3 \
MLINKS+=recv.2 recvmmsg.2
MLINKS+=scandir.3 alphasort.3 \
scandir.3 scandirat.3 \
- scandir.3 scandir_b.3
+ scandir.3 scandir_b.3 \
+ scandir.3 versionsort.3
MLINKS+=sem_open.3 sem_close.3 \
sem_open.3 sem_unlink.3
MLINKS+=sem_wait.3 sem_trywait.3
diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map
index 74676ffe2270..0a20a41d0e20 100644
--- a/lib/libc/gen/Symbol.map
+++ b/lib/libc/gen/Symbol.map
@@ -443,6 +443,7 @@ FBSD_1.7 {
sched_getaffinity;
sched_setaffinity;
sched_getcpu;
+ versionsort;
__cpuset_alloc;
__cpuset_free;
};
diff --git a/lib/libc/gen/scandir.3 b/lib/libc/gen/scandir.3
index b533b33ce7a7..07d8074ae592 100644
--- a/lib/libc/gen/scandir.3
+++ b/lib/libc/gen/scandir.3
@@ -35,7 +35,8 @@
.Nm scandir ,
.Nm scandirat ,
.Nm scandir_b ,
-.Nm alphasort
+.Nm alphasort ,
+.Nm versionsort
.Nd scan a directory
.Sh LIBRARY
.Lb libc
@@ -65,6 +66,8 @@
.Fc
.Ft int
.Fn alphasort "const struct dirent **d1" "const struct dirent **d2"
+.Ft int
+.Fn versionsort "const struct dirent **d1" "const struct dirent **d2"
.Sh DESCRIPTION
The
.Fn scandir
@@ -106,6 +109,13 @@ is a routine which can be used for the
argument to sort the array alphabetically using
.Xr strcoll 3 .
.Pp
+The
+.Fn versionsort
+function is a routine which can be used for the
+.Fa compar
+argument to sort the array naturally using
+.Xr strverscmp 3 .
+.Pp
The memory allocated for the array can be deallocated with
.Xr free 3 ,
by freeing each pointer in the array and then the array itself.
@@ -161,7 +171,12 @@ cannot allocate enough memory to hold all the data structures.
.Xr malloc 3 ,
.Xr qsort 3 ,
.Xr strcoll 3 ,
+.Xr strverscmp 3 ,
.Xr dir 5
+.Sh STANDARDS
+The
+.Fn versionsort
+function is a GNU extension and conforms to no standard.
.Sh HISTORY
The
.Fn scandir
@@ -171,5 +186,8 @@ functions appeared in
.Bx 4.2 .
The
.Fn scandirat
-function was added in
+and
+.Fn
+versionsort
+functions were added in
.Fx 14.0 .
diff --git a/lib/libc/gen/scandir.c b/lib/libc/gen/scandir.c
index 3a891b0ad3f2..8a260adcd2f3 100644
--- a/lib/libc/gen/scandir.c
+++ b/lib/libc/gen/scandir.c
@@ -191,6 +191,13 @@ alphasort(const struct dirent **d1, const struct dirent **d2)
return (strcoll((*d1)->d_name, (*d2)->d_name));
}

+int
+versionsort(const struct dirent **d1, const struct dirent **d2)
+{
+
+ return (strverscmp((*d1)->d_name, (*d2)->d_name));
+}
+
static int
alphasort_thunk(void *thunk, const void *p1, const void *p2)
{
diff --git a/lib/libc/string/Makefile.inc b/lib/libc/string/Makefile.inc
index 6945155812af..b06731386c41 100644
--- a/lib/libc/string/Makefile.inc
+++ b/lib/libc/string/Makefile.inc
@@ -16,7 +16,7 @@ MISRCS+=bcmp.c bcopy.c bzero.c explicit_bzero.c \
strcspn.c strdup.c strerror.c strlcat.c strlcpy.c strlen.c strmode.c \
strncat.c strncmp.c strncpy.c strndup.c strnlen.c strnstr.c \
strpbrk.c strrchr.c strsep.c strsignal.c strspn.c strstr.c strtok.c \
- strxfrm.c swab.c \
+ strverscmp.c strxfrm.c swab.c \
timingsafe_bcmp.c \
timingsafe_memcmp.c \
wcpcpy.c wcpncpy.c wcscasecmp.c wcscat.c \
@@ -37,7 +37,7 @@ MAN+= bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 index.3 memccpy.3 memchr.3 \
memcmp.3 memcpy.3 memmem.3 memmove.3 memset.3 strcasecmp.3 strcat.3 \
strchr.3 strcmp.3 strcoll.3 strcpy.3 strdup.3 strerror.3 \
string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strsep.3 \
- strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 \
+ strspn.3 strstr.3 strtok.3 strverscmp.3 strxfrm.3 swab.3 \
timingsafe_bcmp.3 \
wcscoll.3 wcstok.3 \
wcswidth.3 wcsxfrm.3 wmemchr.3
diff --git a/lib/libc/string/Symbol.map b/lib/libc/string/Symbol.map
index ec45d7fd7ddb..4fcd194bafd8 100644
--- a/lib/libc/string/Symbol.map
+++ b/lib/libc/string/Symbol.map
@@ -116,6 +116,7 @@ FBSD_1.6 {

FBSD_1.7 {
mempcpy;
+ strverscmp;
wmempcpy;
};

diff --git a/lib/libc/string/strverscmp.3 b/lib/libc/string/strverscmp.3
new file mode 100644
index 000000000000..e4413fb96e36
--- /dev/null
+++ b/lib/libc/string/strverscmp.3
@@ -0,0 +1,56 @@
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\" Copyright (c) 2022 Aymeric Wibo <obi...@gmail.com>
+.Dd July 11, 2022
+.Dt STRVERSCMP 3
+.Os
+.Sh NAME
+.Nm strverscmp
+.Nd compare strings according to natural order
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In string.h
+.Ft int
+.Fn strverscmp "const char *s1" "const char *s2"
+.Sh DESCRIPTION
+The
+.Fn strverscmp
+function
+compares the null-terminated strings
+.Fa s1
+and
+.Fa s2
+according to their natural order
+and returns an integer greater than, equal to, or less than 0,
+depending on whether
+.Fa s1
+is greater than, equal to, or less than
+.Fa s2 .
+.Pp
+More specifically, this natural order is found by iterating over both
+strings until a difference is found.
+If the difference is between non-decimal characters,
+.Fn strverscmp
+acts like
+.Xr strcmp 3
+(thus, the ordering would be "a", "b", "train").
+If a decimal digit is found, the whole number is read and compared
+(thus, the ordering would be "9", "10", "420" which is different to lexicographic order,
+what
+.Xr strcmp 3
+would have done).
+Numbers with leading zeroes are interpreted as fractional parts (even without a decimal point),
+and numbers with more leading zeroes are placed before numbers with fewer leading zeroes
+(thus, the ordering would be "000", "00", "01", "010", "09", "0", "1", "9", "10").
+.Sh SEE ALSO
+.Xr strcmp 3 ,
+.Xr versionsort 3
+.Sh STANDARDS
+The
+.Fn strverscmp
+function is a GNU extension and conforms to no standard.
+.Sh HISTORY
+The
+.Fn strverscmp
+function was added in
+.Fx 14.0 .
diff --git a/lib/libc/string/strverscmp.c b/lib/libc/string/strverscmp.c
new file mode 100644
index 000000000000..6051adb35499
--- /dev/null
+++ b/lib/libc/string/strverscmp.c
@@ -0,0 +1,91 @@
+/*-
+* SPDX-License-Identifier: BSD-2-Clause
+* Copyright (c) 2022 Aymeric Wibo <obi...@gmail.com>
+*/
+
+#include <ctype.h>
+#include <stddef.h>
+
+int
+strverscmp(const char *s1, const char *s2)
+{
+ size_t digit_count_1, digit_count_2;
+ size_t zeros_count_1, zeros_count_2;
+ const unsigned char *num_1, *num_2;
+ const unsigned char *u1 = __DECONST(const unsigned char *, s1);
+ const unsigned char *u2 = __DECONST(const unsigned char *, s2);
+
+ /*
+ * If pointers are the same, no need to go through to process of
+ * comparing them.
+ */
+ if (s1 == s2)
+ return (0);
+
+ while (*u1 != '\0' && *u2 != '\0') {
+ /* If either character is not a digit, act like strcmp(3). */
+
+ if (!isdigit(*u1) || !isdigit(*u2)) {
+ if (*u1 != *u2)
+ return (*u1 - *u2);
+ u1++;
+ u2++;
+ continue;
+ }
+ if (*u1 == '0' || *u2 == '0') {
+ /*
+ * Treat leading zeros as if they were the fractional
+ * part of a number, i.e. as if they had a decimal point
+ * in front. First, count the leading zeros (more zeros
+ * == smaller number).
+ */
+ zeros_count_1 = 0;
+ zeros_count_2 = 0;
+ for (; *u1 == '0'; u1++)
+ zeros_count_1++;
+ for (; *u2 == '0'; u2++)
+ zeros_count_2++;
+ if (zeros_count_1 != zeros_count_2)
+ return (zeros_count_2 - zeros_count_1);
+
+ /* Handle the case where 0 < 09. */
+ if (!isdigit(*u1) && isdigit(*u2))
+ return (1);
+ if (!isdigit(*u2) && isdigit(*u1))
+ return (-1);
+ } else {
+ /*
+ * No leading zeros; we're simply comparing two numbers.
+ * It is necessary to first count how many digits there
+ * are before going back to compare each digit, so that
+ * e.g. 7 is not considered larger than 60.
+ */
+ num_1 = u1;
+ num_2 = u2;
+
+ /* Count digits (more digits == larger number). */
+ for (; isdigit(*u1); u1++)
+ ;
+ for (; isdigit(*u2); u2++)
+ ;
+ digit_count_1 = u1 - num_1;
+ digit_count_2 = u2 - num_2;
+ if (digit_count_1 != digit_count_2)
+ return (digit_count_1 - digit_count_2);
+
+ /*
+ * If there are the same number of digits, go back to
+ * the start of the number.
+ */
+ u1 = num_1;
+ u2 = num_2;
+ }
+
+ /* Compare each digit until there are none left. */
+ for (; isdigit(*u1) && isdigit(*u2); u1++, u2++) {
+ if (*u1 != *u2)
+ return (*u1 - *u2);
+ }
+ }
+ return (*u1 - *u2);
+}
diff --git a/lib/libc/tests/string/Makefile b/lib/libc/tests/string/Makefile
index c6a98572564d..eacf7e15c27c 100644
--- a/lib/libc/tests/string/Makefile
+++ b/lib/libc/tests/string/Makefile
@@ -4,10 +4,11 @@ ATF_TESTS_C+= memcmp_test
ATF_TESTS_C+= memset_s_test
ATF_TESTS_C+= stpncpy_test
ATF_TESTS_C+= strerror2_test
-ATF_TESTS_C+= wcscasecmp_test
-ATF_TESTS_C+= wcsnlen_test
+ATF_TESTS_C+= strverscmp_test
ATF_TESTS_C+= strxfrm_test
+ATF_TESTS_C+= wcscasecmp_test
ATF_TESTS_C+= wcscoll_test
+ATF_TESTS_C+= wcsnlen_test

# TODO: popcount, stresep

diff --git a/lib/libc/tests/string/strverscmp_test.c b/lib/libc/tests/string/strverscmp_test.c
new file mode 100644
index 000000000000..fd6a2620cb48
--- /dev/null
+++ b/lib/libc/tests/string/strverscmp_test.c
@@ -0,0 +1,93 @@
+/*-
+* SPDX-License-Identifier: BSD-2-Clause
+* Copyright (c) 2022 Aymeric Wibo <obi...@gmail.com>
+*/
+
+#include <atf-c.h>
+#include <string.h>
+
+static void
+check_all(size_t len, const char *ordered[len])
+{
+ const char *a, *b;
+
+ for (size_t i = 0; i < len; i++) {
+ for (size_t j = 0; j < len; j++) {
+ a = ordered[i];
+ b = ordered[j];
+
+ if (i == j)
+ ATF_CHECK_MSG(
+ strverscmp(a, b) == 0,
+ "strverscmp(\"%s\", \"%s\") == 0",
+ a, b
+ );
+ else if (i < j)
+ ATF_CHECK_MSG(
+ strverscmp(a, b) < 0,
+ "strverscmp(\"%s\", \"%s\") < 0",
+ a, b
+ );
+ else if (i > j)
+ ATF_CHECK_MSG(
+ strverscmp(a, b) > 0,
+ "strverscmp(\"%s\", \"%s\") > 0",
+ a, b
+ );
+ }
+ }
+}
+
+#define CHECK_ALL(...) do { \
+ const char *ordered[] = { __VA_ARGS__ }; \
+ check_all(sizeof(ordered) / sizeof(*ordered), ordered); \
+} while (0)
+
+ATF_TC_WITHOUT_HEAD(strcmp_functionality);
+ATF_TC_BODY(strcmp_functionality, tc)
+{
+ CHECK_ALL("", "a", "b");
+}
+
+/* from Linux man page strverscmp(3) */
+
+ATF_TC_WITHOUT_HEAD(vers_ordering);
+ATF_TC_BODY(vers_ordering, tc)
+{
+ CHECK_ALL("000", "00", "01", "010", "09", "0", "1", "9", "10");
+}
+
+ATF_TC_WITHOUT_HEAD(natural_ordering);
+ATF_TC_BODY(natural_ordering, tc)
+{
+ CHECK_ALL("jan1", "jan2", "jan9", "jan10", "jan11", "jan19", "jan20");
+}
+
+/* https://sourceware.org/bugzilla/show_bug.cgi?id=9913 */
+
+ATF_TC_WITHOUT_HEAD(glibc_bug_9913);
+ATF_TC_BODY(glibc_bug_9913, tc)
+{
+ CHECK_ALL(
+ "B0075022800016.gbp.corp.com",
+ "B007502280067.gbp.corp.com",
+ "B007502357019.GBP.CORP.COM"
+ );
+}
+
+ATF_TC_WITHOUT_HEAD(semver_ordering);
+ATF_TC_BODY(semver_ordering, tc)
+{
+ CHECK_ALL("2.6.20", "2.6.21");
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+ ATF_TP_ADD_TC(tp, strcmp_functionality);
+ ATF_TP_ADD_TC(tp, vers_ordering);
+ ATF_TP_ADD_TC(tp, natural_ordering);
+ ATF_TP_ADD_TC(tp, glibc_bug_9913);
+ ATF_TP_ADD_TC(tp, semver_ordering);
+
+ return (atf_no_error());
+}

Reply all
Reply to author
Forward
0 new messages