From 8f6b83583fcec190b1bc6d083a8390a9ba3cc241 Mon Sep 17 00:00:00 2001 From: 813 <813gan@protonmail.com> Date: Thu, 23 Dec 2021 23:58:56 +0100 Subject: [PATCH 1/3] Add alternative way of language detection. --- auto-dictionary.el | 101 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 94 insertions(+), 7 deletions(-) diff --git a/auto-dictionary.el b/auto-dictionary.el index 6cdc7c6..6ff411a 100644 --- a/auto-dictionary.el +++ b/auto-dictionary.el @@ -176,6 +176,23 @@ Each pair's car corresponds to a value in `adict-language-list'" (defalias 'switch-language-hook 'adict-change-dictionary-hook) +(defvar-local adict-dictionaries-stats '()) + +(defcustom adict-operation-mode 'legacy + "Method used by adict to guess buffer language" + :group 'auto-dictionary + :type '(choice (const :tag "ispell" ispell) + (const :tag "legacy" legacy))) + +(defcustom adict-dictionary-list-ispell + '("en" "de" "fr" "es" "sv" "sl" "hu" "ro" "pt" "nb" + "da" "grc" "el" "hi" "nn" "ca" "eo" "sk" "ru" "uk") + "List of dictionaries to be checked when adict is in ispell-based guess mode." + :group 'auto-dictionary + :type '(repeat string)) + +(defvar adict-ignored-dictionaries-ispell '()) + ;;;###autoload (define-minor-mode auto-dictionary-mode "A minor mode that automatically sets `ispell-dictionary`." @@ -733,13 +750,26 @@ If IDLE-ONLY is set, abort when an input event occurs." pos)) (defun adict--evaluate-buffer-find-dictionary (idle-only) - (if (consp (car adict-dictionary-list)) - ;; current format - (cdr (assoc (adict--evaluate-buffer-find-lang idle-only) - adict-dictionary-list)) - ;; old format (<= 1.0.2) - (nth (adict--evaluate-buffer-find-max-index idle-only) - adict-dictionary-list))) + (cond + ((equal adict-operation-mode 'ispell) + (adict-evaluate-words-ispell-paragraph idle-only)) + ((equal adict-operation-mode 'legacy) + (if (consp (car adict-dictionary-list)) + ;; current format + (cdr (assoc (adict--evaluate-buffer-find-lang idle-only) + adict-dictionary-list)) + ;; old format (<= 1.0.2) + (nth (adict--evaluate-buffer-find-max-index idle-only) + adict-dictionary-list))) )) + +(defun adict-evaluate-words-ispell-paragraph (idle-only) + (let ((begin nil) + (end nil)) + (save-excursion + (setq begin (progn (backward-paragraph) (point)) + end (progn (forward-paragraph) (point))) ) + (adict-evaluate-words-ispell + (split-string (buffer-substring-no-properties begin end)) idle-only)) ) (defun adict--evaluate-buffer-find-lang (idle-only) (nth (adict--evaluate-buffer-find-max-index idle-only) @@ -818,6 +848,63 @@ You can use this, for instance, to localize the \" writes\" text in Gnus: ov)))))) +(defun adict-evaluate-words-ispell (word-list &optional idle-only) + (let* ((old-local-dict ispell-local-dictionary) + (poss nil) + (old-rank nil) + (valid-dicts nil) + (adict-dictionaries-stats-not-checked nil) + (results nil) + (old-rank-raw nil) + (word-list-filter + (remove-if-not (lambda (word) (string-match "^\\w+$" word)) word-list)) + (words (seq-take word-list-filter 200) )) + (setq valid-dicts (seq-filter + (lambda (d) (not (member d adict-ignored-dictionaries-ispell))) + adict-dictionary-list-ispell)) + (unless adict-dictionaries-stats + (setq adict-dictionaries-stats + (mapcar (lambda (d) (cons d 'nil)) valid-dicts))) + (setq adict-dictionaries-stats-not-checked + (mapcar 'car + (seq-filter + (lambda (d) (not (cdr d))) adict-dictionaries-stats))) + + (dolist (dict-name adict-dictionaries-stats-not-checked) + (when (and idle-only (input-pending-p)) + (message "Aborting lang analysis due to pending user input") + (return)) + (setq ispell-local-dictionary dict-name) + (setcdr (assoc dict-name adict-dictionaries-stats) 0) + ;; following code is copy-paste from `flyspell-word' + (if (eq 'error + (condition-case nil + (ispell-accept-buffer-local-defs) + (error 'error))) + (add-to-list 'adict-ignored-dictionaries-ispell dict-name) + (dolist (word words) + (ispell-send-string "%\n") + (ispell-send-string (concat "^" word "\n")) + (set-process-query-on-exit-flag ispell-process nil) + (while (progn + (accept-process-output ispell-process) + (not (string= "" (car ispell-filter))))) + (setq ispell-filter (cdr ispell-filter)) + (or ispell-filter + (setq ispell-filter '(*))) + (if (consp ispell-filter) + (setq poss (ispell-parse-output (car ispell-filter)))) + (when (eq poss 't) + (setq old-rank (alist-get dict-name adict-dictionaries-stats)) + (setcdr (assoc dict-name adict-dictionaries-stats) (1+ old-rank)) )))) + (setq ispell-local-dictionary old-local-dict) + + (setq results adict-dictionaries-stats) + (setq adict-dictionaries-stats nil) + + (caar (sort results (lambda (x y) (> (cdr x) (cdr y))))) + )) + ;;; Functions for 3rd Party Use ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defun adict-guess-word-language (word) From ef1e79bbb18adde0b4f39d127b752897c5b827ae Mon Sep 17 00:00:00 2001 From: 813 <813gan@protonmail.com> Date: Sun, 11 Sep 2022 02:13:03 +0200 Subject: [PATCH 2/3] Deduplicate words in ispell test set --- auto-dictionary.el | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/auto-dictionary.el b/auto-dictionary.el index 6ff411a..9ae7c24 100644 --- a/auto-dictionary.el +++ b/auto-dictionary.el @@ -857,7 +857,8 @@ You can use this, for instance, to localize the \" writes\" text in Gnus: (results nil) (old-rank-raw nil) (word-list-filter - (remove-if-not (lambda (word) (string-match "^\\w+$" word)) word-list)) + (remove-if-not (lambda (word) (string-match "^\\w+$" word)) + (delete-dups word-list))) (words (seq-take word-list-filter 200) )) (setq valid-dicts (seq-filter (lambda (d) (not (member d adict-ignored-dictionaries-ispell))) From e57a8380047cec5b0c7126cfdc83b356fbf3fff9 Mon Sep 17 00:00:00 2001 From: 813 <813gan@protonmail.com> Date: Sun, 11 Sep 2022 02:24:08 +0200 Subject: [PATCH 3/3] Check fixed number of words instead relying on paragraph detection --- auto-dictionary.el | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/auto-dictionary.el b/auto-dictionary.el index 9ae7c24..6fa426c 100644 --- a/auto-dictionary.el +++ b/auto-dictionary.el @@ -752,7 +752,7 @@ If IDLE-ONLY is set, abort when an input event occurs." (defun adict--evaluate-buffer-find-dictionary (idle-only) (cond ((equal adict-operation-mode 'ispell) - (adict-evaluate-words-ispell-paragraph idle-only)) + (adict-evaluate-words-ispell-chunk idle-only)) ((equal adict-operation-mode 'legacy) (if (consp (car adict-dictionary-list)) ;; current format @@ -762,12 +762,15 @@ If IDLE-ONLY is set, abort when an input event occurs." (nth (adict--evaluate-buffer-find-max-index idle-only) adict-dictionary-list))) )) -(defun adict-evaluate-words-ispell-paragraph (idle-only) +(defun adict-evaluate-words-ispell-chunk (idle-only) (let ((begin nil) (end nil)) (save-excursion - (setq begin (progn (backward-paragraph) (point)) - end (progn (forward-paragraph) (point))) ) + (dotimes (cnt 100) (backward-word) ) + (setq begin (point))) + (save-excursion + (dotimes (cnt 100) (forward-word) ) + (setq end (point))) (adict-evaluate-words-ispell (split-string (buffer-substring-no-properties begin end)) idle-only)) ) @@ -856,10 +859,9 @@ You can use this, for instance, to localize the \" writes\" text in Gnus: (adict-dictionaries-stats-not-checked nil) (results nil) (old-rank-raw nil) - (word-list-filter + (words (remove-if-not (lambda (word) (string-match "^\\w+$" word)) - (delete-dups word-list))) - (words (seq-take word-list-filter 200) )) + (delete-dups word-list))) ) (setq valid-dicts (seq-filter (lambda (d) (not (member d adict-ignored-dictionaries-ispell))) adict-dictionary-list-ispell))