the Tcl language has the [lsort] command with `dictionary'
sorting for strings; it means that when two strings have an
embedded number, the numbers are considered as integers by
the comparison function; so "ciao3" sorts before "ciao10".
I wonder if someone has already written such comparison
function in pure Scheme and is distributing it under a Libre
license, so that I can shamelessly include it in my
libraries. :-)
--
Marco Maggi
I made the function below for you. I place it in the public domain.
I don't know if it does exactly what you want.
(define (my-string<? a b)
(define str-num cons)
(define str-num-str car)
(define str-num-num cdr)
(define (->string x)
(if (string? x) x (str-num-str x)))
(define (parts x)
(define (digit? c)
(memv c (string->list "0123456789")))
(let loop ((x (reverse (string->list x)))
(str (list))
(num (list))
(accum (list)))
(define (accum-str)
(if (null? str)
accum
(cons (list->string str) accum)))
(define (accum-num)
(if (null? num)
accum
(let ((s (list->string num)))
(cons (str-num s (string->number s))
accum))))
(cond
((null? x)
(cond ((not (null? str))
(assert (null? num))
(accum-str))
((not (null? num))
(assert (null? str))
(accum-num))
(else accum)))
((digit? (car x))
(loop (cdr x)
(list)
(cons (car x) num)
(accum-str)))
(else
(loop (cdr x)
(cons (car x) str)
(list)
(accum-num))))))
(let loop ((a (parts a))
(b (parts b)))
(cond
((null? a)
(not (null? b)))
((null? b) #F)
((and (string? (car a))
(string? (car b)))
(if (string=? (car a) (car b))
(loop (cdr a) (cdr b))
(string<? (car a) (car b))))
((and (not (string? (car a)))
(not (string? (car b))))
(if (= (str-num-num (car a))
(str-num-num (car b)))
(loop (cdr a) (cdr b))
(< (str-num-num (car a))
(str-num-num (car b)))))
(else
(string<? (->string (car a))
(->string (car b)))))))
> (my-string<? "123" "45")
#F
> (my-string<? "ciao3" "ciao10")
#T
> (my-string<? "foo4bar3zab10" "foo4bar3zab2")
#F
> (my-string<? "foo4bar3zab" "foo4bar10")
#T
> (my-string<? "foo12" "12foo")
#F
> (my-string<? "12bar" "foobar")
#T
> (my-string<? "12.3" "12.10")
#T
> (my-string<? "12.3" "12,10")
#F
> (list-sort my-string<? (quote ("foo123" "foo42" "foo7")))
("foo7" "foo42" "foo123")
--
: Derick
----------------------------------------------------------------
It looks like it does. Thanks!
--
Marco Maggi
(import (rnrs)
(checks))
(define (my-string<? a b)
(define (%string->parts input-string)
;;Split a string into the list of its parts. Example:
;;
;; "foo4bar3zab10"
;;
;;becomes:
;;
;; ("foo" ("4" . 4) "bar" ("3" . 3) "zab" ("10" . 10))
;;
(let loop ((chars (reverse (string->list input-string)))
(str '())
(num '())
(parts '()))
(define (%accumulate-string-part)
(if (null? str)
parts
(cons (list->string str) parts)))
(define (%accumulate-number-part)
(if (null? num)
parts
(let ((s (list->string num)))
(cons `(,s . ,(string->number s)) parts))))
(cond ((null? chars)
(cond ((not (null? str))
(assert (null? num))
(%accumulate-string-part))
((not (null? num))
(assert (null? str))
(%accumulate-number-part))
(else parts)))
((memv (car chars) '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))
(loop (cdr chars)
'()
(cons (car chars) num)
(%accumulate-string-part)))
(else
(loop (cdr chars)
(cons (car chars) str)
'()
(%accumulate-number-part))))))
(let loop ((a (%string->parts a))
(b (%string->parts b)))
(cond ((null? a)
(not (null? b)))
((null? b) #f)
((and (string? (car a))
(string? (car b)))
(if (string=? (car a) (car b))
(loop (cdr a) (cdr b))
(string<? (car a) (car b))))
((and (not (string? (car a)))
(not (string? (car b))))
(let ((num-a (cdar a))
(num-b (cdar b)))
(if (= num-a num-b)
(loop (cdr a) (cdr b))
(< num-a num-b))))
((string? (car a))
(string<? (car a) (caar b)))
(else
(string<? (caar a) (car b))))))
(check-set-mode! 'report-failed)
(check
(my-string<? "" "")
=> #f)
(check
(my-string<? "a" "")
=> #f)
(check
(my-string<? "" "a")
=> #t)
(check
(my-string<? "a" "a")
=> #f)
(check
(my-string<? "1" "")
=> #f)
(check
(my-string<? "" "1")
=> #t)
(check
(my-string<? "1" "1")
=> #f)
(check
(my-string<? "1" "2")
=> #t)
(check
(my-string<? "2" "1")
=> #f)
(check
(my-string<? "a" "ab")
=> #t)
(check
(my-string<? "ab" "a")
=> #f)
(check
(my-string<? "a" "a1")
=> #t)
(check
(my-string<? "a1" "a")
=> #f)
(check
(my-string<? "1" "1a")
=> #t)
(check
(my-string<? "1a" "1")
=> #f)
;;; --------------------------------------------------------------------
(check
(my-string<? "123" "45")
=> #f)
(check
(my-string<? "45" "123")
=> #t)
(check
(my-string<? "ciao3" "ciao10")
=> #t)
(check
(my-string<? "ciao10" "ciao3")
=> #f)
(check
(my-string<? "foo4bar3zab10" "foo4bar3zab2")
=> #f)
(check
(my-string<? "foo4bar3zab2" "foo4bar3zab10")
=> #t)
(check
(my-string<? "foo4bar3zab" "foo4bar10")
=> #t)
(check
(my-string<? "foo4bar10" "foo4bar3zab")
=> #f)
(check
(my-string<? "foo12" "12foo")
=> #f)
(check
(my-string<? "12foo" "foo12")
=> #t)
(check
(my-string<? "12bar" "foobar")
=> #t)
(check
(my-string<? "12.3" "12.10")
=> #t)
(check
(my-string<? "12.10" "12.3")
=> #f)
(check
(my-string<? "12.3" "12,10")
=> #f)
(check
(my-string<? "12,10" "12.3")
=> #t)
(check
(list-sort my-string<? (quote ("foo123" "foo42" "foo7")))
=> '("foo7" "foo42" "foo123"))
(check-report)
--
Marco Maggi
Hahaha :) I'm going through a phase of trying no-reader-sugar--extra-
parens--assume-compilers-are-sophisticated style.
> (import (rnrs)
> (checks))
What the hell is this (checks) library!? Whose is it? It doesn't
tell us. What if Jack and Jill also make (checks) libraries? You are
making us have to endlessly reconfigure our search paths to use
different same-named libraries and making it impossible to use
multiple such libraries in the same Scheme system instance. I will be
boycotting such no-unique-global-namespace libraries...
--
: Derick
----------------------------------------------------------------
It is whatever lightweight testing SRFI is on your system.
Wait... what's its number? 23? No. 82? Neither. 49!
Nope. I have to start the browser and do a search... ;-)
--
Marco Maggi
That's not related to the issues I'm talking about. I agree
remembering SRFI numbers can be annoying (but I understand why they're
necessary, and I always remember (srfi :78 lightweight-testing)
because I use it frequently).
The issues are the three I said:
On Nov 16, 12:31 am, Derick Eddington <derick.edding...@gmail.com>
wrote:
> Whose is it? It doesn't
> tell us. What if Jack and Jill also make (checks) libraries? You are
> making us have to endlessly reconfigure our search paths to use
> different same-named libraries and making it impossible to use
> multiple such libraries in the same Scheme system instance.
I suggest something like: (nausicaa checks)
--
: Derick
----------------------------------------------------------------
I compared the execution times of your changes, incrementally, to my
original function.
Changing
(define (digit? c)
(memv c (string->list "0123456789")))
to
(define (digit? c)
(memv c (quote (#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))))
makes a huge improvement with Ikarus and Larceny. I knew the string-
>list would really slow it down if it was not optimized to be the same
as the other. It looks like neither Ikarus nor Larceny do. I knew
string->list is specified to "return a newly allocated list" and that
makes optimizing it more challenging, and I was hoping analysis would
see the list goes into and out of the memv and then it goes to an if
test and so it cannot possibly need to be newly allocated and so the
string->list can be optimized.
I also tried changing it to
(define (digit? c) (char<=? #\0 c #\9))
but that doesn't make a difference compared to the memv on the
quote'ed list. I didn't use char<=? because I didn't want to assume
any character ordering.
Your other changes don't make a difference.
--
: Derick
----------------------------------------------------------------
Yes. The "Chapter Ordering" package in SLIB does:
http://people.csail.mit.edu/jaffer/slib_7.html#SEC223
> (require 'chapter-order)
;loading /home/jaffer/slib/chap
;done loading /home/jaffer/slib/chap.scm
#<unspecified>
> (chap:string<? "ciao3" "ciao10")
#t
I don't know enough Scheme to convert this from Ruby.
Perhaps someone else can make it work.
def split_into_strings_and_ints str
tmp = str.split( /(\d+)/ )
# The odd-numbered strings contain numerals. Convert to integers.
tmp.each_index{|i| tmp[i] = tmp[i].to_i if i.odd? }
tmp
end
list = %w(zzz foo7 foo42 foo123 8bar 22bar foo33bar88 foo44bar99
foo44bar222 aaa)
p list.map{|s| split_into_strings_and_ints( s ) }
puts list.sort_by{|s| split_into_strings_and_ints( s ) }
=== output ===
[["zzz"], ["foo", 7], ["foo", 42], ["foo", 123], ["", 8, "bar"],
["", 22, "bar"], ["foo", 33, "bar", 88], ["foo", 44, "bar", 99],
["foo", 44, "bar", 222], ["aaa"]]
8bar
22bar
aaa
foo7
foo33bar88
foo42
foo44bar99
foo44bar222
foo123
zzz
--
>Marco Maggi wrote:
>> the Tcl language has the [lsort] command with `dictionary'
>> sorting for strings; it means that when two strings have an
>> embedded number, the numbers are considered as integers by
>> the comparison function; so "ciao3" sorts before "ciao10".
>>
>> I wonder if someone has already written such comparison
>> function in pure Scheme and is distributing it under a Libre
>> license, so that I can shamelessly include it in my
>> libraries. :-)
>I don't know enough Scheme to convert this from Ruby.
>Perhaps someone else can make it work.
Try this on for size. It's a little basic in that I'm not using any
extra features of Scheme except for LET-VALUES. It's probably better to
use a proper abstraction here, but *shrug*. It works for the simple
tests. It's not robust, but you get the idea.
Aaron W. Hsu
(define (lexi-sort strings)
(list-sort lexi-sort<?
(map split-strings/ints strings)))
(define (lexi-sort<? a b)
(let loop ([a a] [b b])
(cond
[(not (pair? a)) (if (pair? b) #t #f)]
[(not (pair? b)) (if (pair? a) #f #t)]
[(string? (car a))
(if (string? (car b))
(if (string=? (car a) (car b))
(loop (cdr a) (cdr b))
(string<? (car a) (car b)))
#t)]
[(string? (car b)) #f]
[else
(if (= (car a) (car b))
(loop (cdr a) (cdr b))
(< (car a) (car b)))])))
(define (split-strings/ints str)
(define str-len (string-length str))
(define (grab-string i)
(let loop ([res '()] [j i])
(cond
[(negative? j) (values (list->string res) j)]
[(char-numeric? (string-ref str j))
(values (list->string res) j)]
[else (loop (cons (string-ref str j) res) (- j 1))])))
(define (grab-int i)
(let loop ([n 0] [place 1] [j i])
(cond
[(negative? j) (values n j)]
[(char-numeric? (string-ref str j))
(loop (+ n (* place (int-val (string-ref str j))))
(* place 10)
(- j 1))]
[else (values n j)])))
(let loop ([res '()] [i (- str-len 1)])
(cond
[(negative? i) res]
[(char-numeric? (string-ref str i))
(let-values ([(n j) (grab-int i)])
(loop (cons n res) j))]
[else
(let-values ([(seq j) (grab-string i)])
(loop (cons seq res) j))])))
(define (int-val x)
(- (char->integer x) (char->integer #\0)))
--
A professor is one who talks in someone else's sleep.
Bigloo Scheme has string-natural-compare3:
1:=> (string-natural-compare3 "Chapter 8" "Chapter 22")
-1
1:=> (string-natural-compare3 "Chapter 88" "Chapter 22")
1
1:=> (string-natural-compare3 "Chapter 88" "Chapter 88")
0
1:=> (sort (lambda(a b)(negative?(string-natural-compare3 a b)))
'("ciao10" "ciao3"))
(ciao3 ciao10)
--
Using the pregexp package and Bigloo:
; Unlike split, this includes the separators in the list.
; (Warning: the text must not contain "\001".)
(define (shatter splitter text)
(pregexp-split "\001"
(pregexp-replace* splitter text "\001\\&\001")))
; Returns 0 if equal; negative integer if a<b;
; otherwise positive integer.
(define (string<=> a b)
(if (string=? a b) 0 (if (string<? a b) -1 1)))
; If the strings can be converted to integers, they are compared as
; numbers. Returns 0 if equal; negative integer if a<b; otherwise
; positive integer.
(define (compare-as-string-or-number a b)
(let [(aa (string->number a))]
(if aa
(- aa (string->number b))
(string<=> a b))))
; Returns 0 if equal; negative integer if list1<list2; otherwise,
; positive integer.
(define (string-list-natural<=> list1 list2)
(let [(len (min (length list1) (length list2)))]
(or
(any (lambda (a b)
(let [(n (compare-as-string-or-number a b))]
(and (not (zero? n)) n)))
(take list1 len) (take list2 len))
(- (length list1) (length list2)))))
; Runs of numerals in the strings are compared as numbers.
; Returns 0 if equal; negative integer if a<b; otherwise,
; positive integer.
(define (string-natural<=> a b)
(string-list-natural<=>
(shatter "\\d+" a)
(shatter "\\d+" b)))
;;; Test.
(pp (sort (lambda (a b) (negative? (string-natural<=> a b)))
(list "zz" "z" "a8" "a22" "aa8" "aa22" "aa" "a"
"foo8bar" "foo22bar" "foo8bar2" "foo22bar55" "foo22bar8"
"4.22.100.7" "4.22.6.7" "4.5.6.7"
)))
=== output ===
("4.5.6.7"
"4.22.6.7"
"4.22.100.7"
"a"
"a8"
"a22"
"aa"
"aa8"
"aa22"
"foo8bar"
"foo8bar2"
"foo22bar"
"foo22bar8"
"foo22bar55"
"z"
"zz")
--