lotsa changes and inclusion of elpy
[emacs.git] / .emacs.d / elisp / fuzzy / fuzzy.el
1 ;;; fuzzy.el --- Fuzzy Matching
2
3 ;; Copyright (C) 2010, 2011, 2012 Tomohiro Matsuyama
4
5 ;; Author: Tomohiro Matsuyama <tomo@cx4a.org>
6 ;; Keywords: convenience
7 ;; Version: 20131025.2343
8 ;; X-Original-Version: 0.2
9
10 ;; This program is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
14
15 ;; This program is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
22
23 ;;; Commentary:
24
25 ;;
26
27 ;;; Code:
28
29 (require 'cl)
30 (require 'regexp-opt)
31
32 (defgroup fuzzy nil
33 "Fuzzy Matching"
34 :group 'convenience
35 :prefix "fuzzy-")
36
37 \f
38
39 ;;; Utilities
40
41 (defun fuzzy-current-time-float ()
42 (let ((time (current-time)))
43 (+ (* (float (first time))
44 (lsh 2 16))
45 (float (second time))
46 (/ (float (third time))
47 1000000))))
48
49 (defmacro* fuzzy-with-stopwatch ((&optional (elapsed-name 'elapsed)) &body body)
50 (declare (indent 1))
51 (let ((start (gensym "START")))
52 `(let ((,start (fuzzy-current-time-float)))
53 (flet ((,elapsed-name () (- (fuzzy-current-time-float) ,start)))
54 ,@body))))
55
56 (defun* fuzzy-add-to-list-as-sorted (list-var value &key (test '<) (key 'identity))
57 (let ((list (symbol-value list-var)))
58 (if (or (null list)
59 (funcall test
60 (funcall key value)
61 (funcall key (car list))))
62 (set list-var (cons value list))
63 (while (and list
64 (cdr list)
65 (funcall test
66 (funcall key (cadr list))
67 (funcall key value)))
68 (setq list (cdr list)))
69 (setcdr list (cons value (cdr list))))))
70
71 (defmacro* fuzzy-with-timeout ((timeout &optional timeout-result (tick-name 'tick)) &body body)
72 (declare (indent 1))
73 (let ((elapsed (gensym "ELAPSED")))
74 `(catch 'timeout
75 (fuzzy-with-stopwatch (,elapsed)
76 (flet ((,tick-name ()
77 (when (and ,timeout (< ,timeout (,elapsed)))
78 (throw 'timeout ,timeout-result))))
79 ,@body)))))
80
81 (defun fuzzy-count-matches-in-string (regexp string &optional start end)
82 (setq start (or start 0)
83 end (or end (length string)))
84 (loop for start = start then (1+ matched)
85 for matched = (let ((case-fold-search nil))
86 (string-match regexp string start))
87 while (and matched (< (1+ matched) end))
88 count matched))
89
90 \f
91
92 ;;; Jaro-Winkler Distance
93
94 (defun fuzzy-jaro-winkler-distance (s1 s2)
95 "Compute Jaro-Winkler distance. See
96 http://en.wikipedia.org/wiki/Jaro-Winkler_distance."
97 (let* ((l1 (length s1))
98 (l2 (length s2))
99 (r (max 1 (1- (/ (max l1 l2) 2))))
100 (m 0)
101 (tr 0)
102 (p 0)
103 cs1 cs2)
104 (loop with seen = (make-vector l2 nil)
105 for i below l1
106 for c1 = (aref s1 i) do
107 (loop for j from (max 0 (- i r)) below (min l2 (+ i r))
108 for c2 = (aref s2 j)
109 if (and (char-equal c1 c2)
110 (null (aref seen j))) do
111 (push c1 cs1)
112 (aset seen j c2)
113 (incf m)
114 and return nil)
115 finally
116 (setq cs1 (nreverse cs1)
117 cs2 (loop for i below l2
118 for c = (aref seen i)
119 if c collect c)))
120 (loop for c1 in cs1
121 for c2 in cs2
122 if (not (char-equal c1 c2))
123 do (incf tr))
124 (loop for i below (min m 5)
125 for c1 across s1
126 for c2 across s2
127 while (char-equal c1 c2)
128 do (incf p))
129 (if (eq m 0)
130 0.0
131 (setq m (float m))
132 (let* ((dj (/ (+ (/ m l1) (/ m l2) (/ (- m (/ tr 2)) m)) 3))
133 (dw (+ dj (* p 0.1 (- 1 dj)))))
134 dw))))
135
136 ;; Make sure byte-compiled.
137 (eval-when (eval)
138 (byte-compile 'fuzzy-jaro-winkler-distance))
139
140 (defalias 'fuzzy-jaro-winkler-score 'fuzzy-jaro-winkler-distance)
141
142 \f
143
144 ;;; Fuzzy Matching
145
146 (defcustom fuzzy-match-score-function 'fuzzy-jaro-winkler-score
147 "Score function for fuzzy matching."
148 :type 'function
149 :group 'fuzzy)
150
151 (defcustom fuzzy-match-accept-error-rate 0.10
152 "Fuzzy matching error threshold."
153 :type 'number
154 :group 'fuzzy)
155
156 (defcustom fuzzy-match-accept-length-difference 2
157 "Fuzzy matching length difference threshold."
158 :type 'number
159 :group 'fuzzy)
160
161 (defvar fuzzy-match-score-cache
162 (make-hash-table :test 'equal :weakness t))
163
164 (defun fuzzy-match-score (s1 s2 function)
165 (let ((cache-key (list function s1 s2)))
166 (or (gethash cache-key fuzzy-match-score-cache)
167 (puthash cache-key
168 (funcall function s1 s2)
169 fuzzy-match-score-cache))))
170
171 (defun* fuzzy-match (s1 s2 &optional (function fuzzy-match-score-function))
172 "Return t if S1 and S2 are matched. FUNCTION is a function
173 scoring between S1 and S2. The score must be between 0.0 and
174 1.0."
175 (and (<= (abs (- (length s1) (length s2)))
176 fuzzy-match-accept-length-difference)
177 (>= (fuzzy-match-score s1 s2 function)
178 (- 1 fuzzy-match-accept-error-rate))))
179
180 \f
181
182 ;;; Fuzzy Completion
183
184 (defun fuzzy-all-completions (string collection)
185 "`all-completions' with fuzzy matching."
186 (loop with length = (length string)
187 for str in collection
188 for len = (min (length str) (+ length fuzzy-match-accept-length-difference))
189 if (fuzzy-match string (substring str 0 len))
190 collect str))
191
192 \f
193
194 ;;; Fuzzy Search
195
196 (defvar fuzzy-search-some-char-regexp
197 (format ".\\{0,%s\\}" fuzzy-match-accept-length-difference))
198
199 (defun fuzzy-search-regexp-compile (string)
200 (flet ((opt (n)
201 (regexp-opt-charset
202 (append (substring string
203 (max 0 (- n 1))
204 (min (length string) (+ n 2)))
205 nil))))
206 (concat
207 "\\("
208 (loop for i below (length string)
209 for c = (if (evenp i) (opt i) fuzzy-search-some-char-regexp)
210 concat c)
211 "\\|"
212 (loop for i below (length string)
213 for c = (if (oddp i) (opt i) fuzzy-search-some-char-regexp)
214 concat c)
215 "\\)")))
216
217 (defun fuzzy-search-forward (string &optional bound noerror count)
218 (let ((regexp (fuzzy-search-regexp-compile string))
219 match-data)
220 (save-excursion
221 (while (and (null match-data)
222 (re-search-forward regexp bound t))
223 (if (fuzzy-match string (match-string 1))
224 (setq match-data (match-data))
225 (goto-char (1+ (match-beginning 1))))))
226 (when match-data
227 (store-match-data match-data)
228 (goto-char (match-end 1)))))
229
230 (defun fuzzy-search-backward (string &optional bound noerror count)
231 (let* ((regexp (fuzzy-search-regexp-compile string))
232 match-data begin end)
233 (save-excursion
234 (while (and (null match-data)
235 (re-search-backward regexp bound t))
236 (setq begin (match-beginning 1)
237 end (match-end 1))
238 (store-match-data nil)
239 (goto-char (max (point-min) (- begin (* (length string) 2))))
240 (while (re-search-forward regexp end t)
241 (if (fuzzy-match string (match-string 1))
242 (setq match-data (match-data))
243 (goto-char (1+ (match-beginning 1)))))
244 (unless match-data
245 (goto-char begin)))
246 (if match-data
247 (progn
248 (store-match-data match-data)
249 (goto-char (match-beginning 1)))
250 (store-match-data nil)))))
251
252 \f
253
254 ;;; Fuzzy Incremental Search
255
256 (defvar fuzzy-isearch nil)
257 (defvar fuzzy-isearch-failed-count 0)
258 (defvar fuzzy-isearch-enabled 'on-failed)
259 (defvar fuzzy-isearch-original-search-fun nil)
260 (defvar fuzzy-isearch-message-prefix
261 (concat (propertize "[FUZZY]" 'face 'bold) " "))
262
263 (defun fuzzy-isearch-activate ()
264 (setq fuzzy-isearch t)
265 (setq fuzzy-isearch-failed-count 0))
266
267 (defun fuzzy-isearch-deactivate ()
268 (setq fuzzy-isearch nil)
269 (setq fuzzy-isearch-failed-count 0))
270
271 (defun fuzzy-isearch ()
272 (cond (isearch-word
273 (if isearch-forward 'word-search-forward 'word-search-backward))
274 (isearch-regexp
275 (if isearch-forward 're-search-forward 're-search-backward))
276 ((or fuzzy-isearch
277 (eq fuzzy-isearch-enabled 'always)
278 (and (eq fuzzy-isearch-enabled 'on-failed)
279 (null isearch-success)
280 isearch-wrapped
281 (> (incf fuzzy-isearch-failed-count) 1)))
282 (unless fuzzy-isearch
283 (fuzzy-isearch-activate))
284 (if isearch-forward 'fuzzy-search-forward 'fuzzy-search-backward))
285 (t
286 (if isearch-forward 'search-forward 'search-backward))))
287
288 (defun fuzzy-isearch-end-hook ()
289 (fuzzy-isearch-deactivate))
290
291 (defun turn-on-fuzzy-isearch ()
292 (interactive)
293 (setq fuzzy-isearch-original-search-fun isearch-search-fun-function)
294 (setq isearch-search-fun-function 'fuzzy-isearch)
295 (add-hook 'isearch-mode-end-hook 'fuzzy-isearch-end-hook))
296
297 (defun turn-off-fuzzy-isearch ()
298 (interactive)
299 (setq isearch-search-fun-function fuzzy-isearch-original-search-fun)
300 (remove-hook 'isearch-mode-end-hook 'fuzzy-isearch-end-hook))
301
302 (defadvice isearch-message-prefix (after fuzzy-isearch-message-prefix activate)
303 (if fuzzy-isearch
304 (setq ad-return-value (concat fuzzy-isearch-message-prefix ad-return-value))
305 ad-return-value))
306
307 \f
308
309 ;;; QuickSilver's Abbreviation Scoring
310
311 (defun fuzzy-quicksilver-make-abbrev-regexp (abbrev)
312 (concat "^"
313 (loop for char across (downcase abbrev) concat
314 (format ".*?\\(%s\\)"
315 (regexp-quote (string char))))))
316
317 (defun fuzzy-quicksilver-abbrev-penalty (string skip-start skip-end)
318 (let ((skipped (- skip-end skip-start)))
319 (cond
320 ((zerop skipped) 0)
321 ((string-match "[ \\t\\r\\n_-]+$" (substring string skip-start skip-end))
322 (let ((seps (- (match-end 0) (match-beginning 0))))
323 (+ seps (* (- skipped seps) 0.15))))
324 ((let ((case-fold-search nil))
325 (eq (string-match "[[:upper:]]" string skip-end) skip-end))
326 (let ((ups (let ((case-fold-search nil))
327 (fuzzy-count-matches-in-string
328 "[[:upper:]]" string skip-start skip-end))))
329 (+ ups (* (- skipped ups) 0.15))))
330 (t skipped))))
331
332 (defun fuzzy-quicksilver-abbrev-score-nocache (string abbrev)
333 (cond
334 ((zerop (length abbrev)) 0.9)
335 ((< (length string) (length abbrev)) 0.0)
336 ((let ((regexp (fuzzy-quicksilver-make-abbrev-regexp abbrev))
337 (case-fold-search t))
338 (string-match regexp string))
339 (loop with groups = (cddr (match-data))
340 while groups
341 for prev = 0 then end
342 for start = (pop groups)
343 for end = (pop groups)
344 for matched = (- end start)
345 for skipped = (- start prev)
346 for penalty = (fuzzy-quicksilver-abbrev-penalty string prev start)
347 sum (+ matched (- skipped penalty)) into point
348 finally return
349 (let* ((length (length string))
350 (rest (- length end)))
351 (/ (+ point (* rest 0.9)) (float length)))))
352 (t 0.0)))
353
354 ;; Make sure byte-compiled.
355 (eval-when (eval)
356 (byte-compile 'fuzzy-quicksilver-abbrev-score-nocache))
357
358 (defvar fuzzy-quicksilver-abbrev-score-cache
359 (make-hash-table :test 'equal :weakness t))
360
361 (defun fuzzy-quicksilver-abbrev-score (string abbrev)
362 (let ((cache-key (cons string abbrev)))
363 (or (gethash cache-key fuzzy-quicksilver-abbrev-score-cache)
364 (puthash cache-key
365 (fuzzy-quicksilver-abbrev-score-nocache string abbrev)
366 fuzzy-quicksilver-abbrev-score-cache))))
367
368 (defun* fuzzy-quicksilver-realtime-abbrev-score (list
369 abbrev
370 &key
371 limit
372 timeout
373 (quality 0.7)
374 &aux new-list)
375 (fuzzy-with-timeout (timeout (nreverse new-list))
376 (loop with length = 0
377 for string in list
378 for score = (fuzzy-quicksilver-abbrev-score string abbrev)
379 if (>= score quality) do
380 (fuzzy-add-to-list-as-sorted
381 'new-list (cons string score)
382 :test '<
383 :key 'cdr)
384 (incf length)
385 if (and limit (> length limit)) do
386 (pop new-list)
387 (setq length limit)
388 do (tick)
389 finally return (nreverse new-list))))
390
391 (provide 'fuzzy)
392 ;;; fuzzy.el ends here