about summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Osheim <d_m@plastic-idolatry.com>2015-03-15 01:37:15 -0400
committerErik Osheim <d_m@plastic-idolatry.com>2015-03-15 01:37:15 -0400
commit573294ba752823d2fb6a708acf2533a6e14752c9 (patch)
tree0877789d068a84acd6845a8bb34d4c263310859e
parent08cadfb6d3bf39d43a05db6e14d6b43bcad9ee60 (diff)
Add code to disambiguate pronouns dynamically.
This commit improves the display code so that the
smallest unambiguous abbreviation is calculated
based on the contents of pronouns.tab.
-rw-r--r--src/pronouns/pages.clj12
-rw-r--r--src/pronouns/util.clj29
2 files changed, 34 insertions, 7 deletions
diff --git a/src/pronouns/pages.clj b/src/pronouns/pages.clj
index d7ef9fc..eb0253c 100644
--- a/src/pronouns/pages.clj
+++ b/src/pronouns/pages.clj
@@ -90,16 +90,14 @@
       (take 5 inputs)
       (u/table-lookup inputs pronouns-table))))
 
-;; we could choose to display the entire row for the label.
-;; currently the first two entries are enough to disambiguate the
-;; pronouns -- will that always be true?
-(defn make-link [row]
-  (let [link (str "/" (s/join "/" row))
-        label (str (first row) "/" (first (rest row)))]
+(defn make-link [pair]
+  (let [link (str "/" (s/join "/" (second pair)))
+        label (str (s/join "/" (first pair)))]
     [:li [:a {:href link} label]]))
 
 (defn front [pronouns-table]
-  (let [links (map make-link (sort pronouns-table))
+  (let [abbreviations (u/abbreviate (sort pronouns-table))
+        links (map make-link abbreviations)
         title "Pronoun Island"]
     (html
      [:html
diff --git a/src/pronouns/util.clj b/src/pronouns/util.clj
index 1269664..1d4c09a 100644
--- a/src/pronouns/util.clj
+++ b/src/pronouns/util.clj
@@ -14,3 +14,32 @@
 (defn tabfile-lookup
   [query-key tabfile]
   (table-lookup query-key (slurp-tabfile tabfile)))
+
+; given a row and its lexically-closest neighbors,
+; determine the smallest abbreviation which is still
+; distinct.
+(defn disambiguate
+  [prev row next]
+  (loop [n 1]
+    (let [row-n (take n row)]
+      (cond
+        (>= n 5) row
+        (= row-n (take n prev)) (recur (+ n 1))
+        (= row-n (take n next)) (recur (+ n 1))
+        :else row-n))))
+
+; given a list of pronoun rows, return a list of
+; pairs, where the first item is the abbreviation
+; and the second is the original pronoun row.
+(defn abbreviate
+  [sorted-table]
+  (loop [acc nil
+         prev nil
+         row (first sorted-table)
+         todo (rest sorted-table)]
+    (let [next (first todo)
+          abbrev (disambiguate prev row next)
+          pair (list abbrev row)
+          acc2 (conj acc pair)]
+      (if (empty? todo) (reverse acc2)
+          (recur acc2 row next (rest todo))))))