Feedback on idiomatic clojure

33 views
Skip to first unread message

Damon Snyder

unread,
Aug 18, 2010, 6:46:10 PM8/18/10
to Clojure
Hi Everyone,
I'm looking for some feedback on a small get-your-feet wet program I
wrote in clojure (my first). In particular, I hope to get feedback on
how to better make use of clojure's data structures and functional
programming in general.

Below is the toy program and the sample output. It reads in words from
the OS X dictionary file, catalogs the anagrams, and then prints out
the top ten by frequency. Although performance is not my primary
concern here, it is a bit slow. It takes about 15s on my mac book
pro.

As an example, is this the "proper" way to update multiple fields at
once in a nested hash-map:

(update-in (update-in anagrams [akey :count] inc) [akey :words] conj
word)))

I understand this as returning an copy of an updated copy of an
updated copy.

Any feedback would be appreciated. Thanks,
Damon

## begin
(defn f-to-seq[file]
(with-open [rdr (java.io.BufferedReader.
(java.io.FileReader. file))]
(doall (line-seq rdr))))

(defn str-sort[str]
(if (nil? str)
str
(String. (into-array (. Character TYPE) (sort str)))))

(defn str-to-lower[str]
(if (nil? str)
str
(.toLowerCase str)))


(defn anagram-add[anagrams akey word]
(if (empty? (get anagrams akey))
(assoc anagrams akey (hash-map :count 1, :words (list word)))
(update-in (update-in anagrams [akey :count] inc) [akey :words]
conj word)))

(defn word[seq] (first seq))

(defn build-anagrams[seq]
(loop [seq seq
akey (str-sort (str-to-lower (word seq)))
anagrams {}]
(if (empty? seq)
anagrams
(recur (rest seq)
(str-sort (str-to-lower (word (rest seq))))
(anagram-add anagrams akey (word seq))))))


(defn print-anagram[v]
(println (str (:count (first (rest v))) " " (first v) ": " (:words
(first (rest v))))))

(defn print-anagrams[ana]
(doseq [v ana]
(print-anagram v)))

(defn anagram-key[elem] (:count (first (rest elem))))

(def *words* (f-to-seq "/usr/share/dict/web2"))
(def *anagrams* (sort-by anagram-key (build-anagrams *words*)))
(print-anagrams (take 10 (reverse *anagrams*)))

## output
11 agnor: ("Ronga" "rogan" "organ" "orang" "Orang" "nagor" "groan"
"grano" "goran" "argon" "angor")
10 aelps: ("speal" "spale" "slape" "sepal" "saple" "salep" "Pales"
"Lepas" "lapse" "Elaps")
9 eerst: ("tsere" "terse" "stree" "stere" "steer" "reset" "reest"
"estre" "ester")
9 aeerst: ("teaser" "staree" "seater" "saeter" "reseat" "Eastre"
"easter" "Easter" "asteer")
9 aacinr: ("narica" "crania" "Crania" "carina" "Carian" "canari"
"Canari" "arnica" "acinar")
9 acert: ("trace" "recta" "react" "creta" "creat" "crate" "cater"
"carte" "caret")
8 ailr: ("rial" "rail" "lira" "liar" "lari" "Lari" "lair" "aril")
8 aelpt: ("tepal" "pleat" "plate" "petal" "pelta" "patel" "palet"
"leapt")
8 airst: ("Trias" "tisar" "tarsi" "stria" "stair" "sitar" "astir"
"arist")
8 aemrt: ("Trema" "trame" "terma" "tamer" "ramet" "metra" "mater"
"armet")

Nicolas Oury

unread,
Aug 19, 2010, 7:37:53 AM8/19/10
to clo...@googlegroups.com
A few remarks:

>
> ## begin
> (defn f-to-seq[file]
>  (with-open [rdr (java.io.BufferedReader.
>                    (java.io.FileReader. file))]
>    (doall (line-seq rdr))))
>
Do you need the doall?

> (defn str-sort[str]
>  (if (nil? str)
>    str
>  (String. (into-array (. Character TYPE) (sort str)))))
>

(and str


(String. (into-array (. Character TYPE) (sort str))))

(if str is nil this will return nil)


> (defn anagram-add[anagrams akey word]
>  (if (empty? (get anagrams akey))
>    (assoc anagrams akey (hash-map :count 1, :words (list word)))
>    (update-in (update-in anagrams [akey :count] inc) [akey :words]
> conj word)))
>

You could use a literal {:count 1 :words (list words)} instead of hash-map

I believe that (get anagrams akey) will give nil in case there is known.
If it is the case (or (get anagrams key) ...) will be more readable.

(update-in ...) =>

(-> anagrams
(update-in [akey :count] inc)
(update-in [akey :words] conj word))

I will have a look to the rest of the program later.

Best,

Nicolas.

Meikel Brandmeyer

unread,
Aug 19, 2010, 8:41:39 AM8/19/10
to Clojure
Hi,

here my turn. Comments inline. Hope this helps.

Sincerely
Meikel

(defn f-to-seq
[file]
(with-open [rdr (java.io.BufferedReader.
(java.io.FileReader. file))]
(doall (line-seq rdr))))
; Yes. doall is required here. Alternatively you can wrap the whole
thing
; in the with-open. Then you can process also larger files without
keeping
; all data in memory. YMMV.


(defn str-sort
[string]
(when string
(apply str (sort string))))
; One can also skip the when here. Then nil input returns an empty
string.
; Whether that is better depends on the usage of the return value.
YMMV.
; (if (nil? x) x ...) is usually written as (when x ...).

(defn str-to-lower
[string]
(when string
(.toLowerCase string)))

(defn anagram-add
[anagrams akey word]
(if (contains? anagrams akey)
(-> anagrams
(update-in [akey :count] inc)
(update-in [akey :words] conj word))
(assoc anagrams akey {:count 1 :words [word]})))
; I would prefer vectors over lists to store stuff. You get nice
; things for free, like O(1) appending, O(1) reverse, O(1) random
; access, ...

(defn word
[s]
(first s))
; or: (def word first)
; not needed for my solution

(def normalise (comp str-to-lower str-sort))

(defn build-anagrams
[words]
(reduce #(anagram-add %1 (normalise %2) %2) {} words))
; Looping with an accumulator, just returning the latter when the
; input is exhausted cries for reduce.

(defn print-anagram
[v]
(println (str (:count (second v)) " " (first v) ": " (:words (second
v)))))
; one could use destructuring to access things.

(defn print-anagram
[anagram]
(let [[normal-form {:keys [count words]}] anagram]
(println (str count " " normal-form ": " words))))

(defn print-anagrams
[ana]
(doseq [v ana]
(print-anagram v)))
; more descriptive names would be good, I guess

(defn anagram-key
[elem]
(- (:count (second elem))))
; the minus should take care of the reverse?

(def *words* (f-to-seq "/usr/share/dict/web2"))
(def *anagrams* (sort-by anagram-key (build-anagrams *words*)))
(print-anagrams (take 10 *anagrams*))

Justin Kramer

unread,
Aug 19, 2010, 10:10:19 AM8/19/10
to Clojure
Meikel's suggestions are all good and I would follow them.

There are a number of built-in functions you can take advantage of if
you're using 1.2 (they're also available in contrib for 1.1):

clojure.string/lower-case
clojure.java.io/reader -- obviates java.io.BufferedReader and friends:
(reader "/usr/share/dict/words")

As a point of comparison, I did a similar code kata recently. I've
posted it here, with a few lines added to show the top 10:

http://gist.github.com/537945

It runs in a couple seconds on my machine. For maximum speed,
transients might help.

Justin

Nicolas Oury

unread,
Aug 19, 2010, 4:14:46 PM8/19/10
to clojure
Damon reply to me and not the list, so I forward.

On Thu, Aug 19, 2010 at 9:09 PM, Damon Snyder <drsn...@gmail.com> wrote:
> Hi Nicolas,
> Thanks for the suggestions. Regarding the first one: ah, I see. That
> is a nice compact way to test to see if the str is nil. I added that

I reckon that Meikel's suggestion of using when is surely better in
this situation (for a and).
But the (or something? default) or (or known? recompute?) can be useful.

> in because I was getting null pointer exceptions when the string was
> null. What is the difference between {:count 1 :words (list words)}
> and a hash-map? I was under the impression that {} created a hash.
>
I just find it easier to read because it looks like a value and not a
function call.

Meikel Brandmeyer

unread,
Aug 19, 2010, 4:23:31 PM8/19/10
to clo...@googlegroups.com
Hi.

Am 19.08.2010 um 22:14 schrieb Nicolas Oury:

>> in because I was getting null pointer exceptions when the string was
>> null. What is the difference between {:count 1 :words (list words)}
>> and a hash-map? I was under the impression that {} created a hash.
>>
> I just find it easier to read because it looks like a value and not a
> function call.

Another difference is that in this case it creates not a hash map but an array map. Rule of thumb: up to 8 keys => array map, more than 8 => hash map. The array map is simply a short alist, not a full blown hash map. For such small maps this is faster than pulling at the hash cannon. So the difference is not limited to syntax alone.

Sincerely
Meikel

Damon Snyder

unread,
Aug 20, 2010, 12:39:44 AM8/20/10
to Clojure
Hi Meikel, Nicolas, and Justin,
Thank you for the great feedback! I learned a lot. I was puzzled about
(update-in (update-in)) and after doing that the -> operator makes a
lot of sense. The reduce is clever and fits nicely as well.

I dropped the function that read in the lines of the file and used
read-lines instead. Below is what I put together after the feedback.

Thanks,
Damon

PS- I was mistaken before. The first version was taking over a minute.
This version takes about 15s.

(ns clojure.example.anagrams
(:require clojure.contrib.duck-streams)
(:gen-class))

(defn str-sort[string]
(when string
(apply str (sort string))))


(defn str-to-lower[string]
(when string
(.toLowerCase string)))


(defn anagram-add[anagrams akey word]
(if (contains? anagrams akey)
(-> anagrams
(update-in [akey :count] inc)
(update-in [akey :words] conj word))
(assoc anagrams akey {:count 1 :words [word]})))

(def normalise (comp str-to-lower str-sort))

(defn build-anagrams[words]
(reduce #(anagram-add %1 (normalise %2) %2) {} words))


(defn print-anagram[v]
(println (str (:count (second v)) " " (first v) ": " (:words (second
v)))))

(defn print-anagrams[ana]
(doseq [v ana]
(print-anagram v)))

(defn anagram-key[elem]
(- (:count (second elem))))


;(def *words* (f-to-seq "/usr/share/dict/web2"))
;(def *anagrams* (sort-by anagram-key (build-anagrams *words*)))
;(print-anagrams (take 10 *anagrams*))

(defn -main[file]
(time (print-anagrams
(take 10
(sort-by anagram-key
(build-anagrams
(clojure.contrib.duck-streams/read-lines
file)))))))

Btsai

unread,
Aug 20, 2010, 12:14:07 PM8/20/10
to Clojure
I believe duck-streams is deprecated since clojure 1.2. You may want
to consider bringing back f-to-seq, which can be simplified slightly
using reader from clojure.java.io:

(ns clojure.example.anagrams
(:use [clojure.java.io :only (reader)])
(:gen-class))

(defn f-to-seq [file]
(with-open [rdr (reader file)]
(doall (line-seq rdr))))
Reply all
Reply to author
Forward
0 new messages