about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMorgan <m@morganastra.me>2015-03-15 02:07:06 -0700
committerMorgan <m@morganastra.me>2015-03-15 02:07:06 -0700
commit25bab46051adf02b9d4a82077ff8e69ed219c6aa (patch)
treecc46e5e261fffdd7648f5eb9a8651fa5bc49b72a
parent398d0e1958b1f357f650146a5fc1942adf033de6 (diff)
parent2be8d2cd5e6b29b75aa93108bf738dd2d3b0bb21 (diff)
Merge pull request #17 from non/topic/disambiguate
Add code to disambiguate pronouns dynamically.
-rw-r--r--src/pronouns/pages.clj10
-rw-r--r--src/pronouns/util.clj29
2 files changed, 33 insertions, 6 deletions
diff --git a/src/pronouns/pages.clj b/src/pronouns/pages.clj
index d7ef9fc..1992328 100644
--- a/src/pronouns/pages.clj
+++ b/src/pronouns/pages.clj
@@ -90,16 +90,14 @@
       (take 5 inputs)
       (u/table-lookup inputs pronouns-table))))
 
-;; we could choose to display the entire row for the label.
-;; currently the first two entries are enough to disambiguate the
-;; pronouns -- will that always be true?
-(defn make-link [row]
+(defn make-link [abbrev row]
   (let [link (str "/" (s/join "/" row))
-        label (str (first row) "/" (first (rest row)))]
+        label (s/join "/" abbrev)]
     [:li [:a {:href link} label]]))
 
 (defn front [pronouns-table]
-  (let [links (map make-link (sort pronouns-table))
+  (let [abbreviations (u/abbreviate (sort pronouns-table))
+        links (map (fn [entry] (make-link (first entry) (second entry))) abbreviations)
         title "Pronoun Island"]
     (html
      [:html
diff --git a/src/pronouns/util.clj b/src/pronouns/util.clj
index 1269664..8327568 100644
--- a/src/pronouns/util.clj
+++ b/src/pronouns/util.clj
@@ -14,3 +14,32 @@
 (defn tabfile-lookup
   [query-key tabfile]
   (table-lookup query-key (slurp-tabfile tabfile)))
+
+(defn disambiguate
+  "given a row and its lexically-closest neighbors,
+  determine the smallest abbreviation which is still
+  distinct."
+  [prev row next]
+  (loop [n 1]
+    (let [row-n (take n row)]
+      (cond
+        (>= n 5) row
+        (= row-n (take n prev)) (recur (+ n 1))
+        (= row-n (take n next)) (recur (+ n 1))
+        :else row-n))))
+
+(defn abbreviate
+  "given a list of pronoun rows, return a list of
+  pairs, where the first item is the abbreviation
+  and the second is the original pronoun row."
+  [sorted-table]
+  (loop [acc nil
+         prev nil
+         row (first sorted-table)
+         todo (rest sorted-table)]
+    (let [next (first todo)
+          abbrev (disambiguate prev row next)
+          pair (list abbrev row)
+          acc2 (conj acc pair)]
+      (if (empty? todo) (reverse acc2)
+          (recur acc2 row next (rest todo))))))