-
Notifications
You must be signed in to change notification settings - Fork 2
/
yeis.el
236 lines (178 loc) · 7.67 KB
/
yeis.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
;;; yeis.el --- Yeis's Emacs' Input Switcher -*- lexical-binding: t; -*-
;; Copyright © 2020, 2021, 2022 André A. Gomes <[email protected]>
;; Version: 0.0.1
;; URL: https://github.com/aadcg/emacs-yeis
;; Yeis is NOT part of GNU Emacs.
;; Yeis is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;; Yeis is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;; For a full copy of the GNU General Public License see
;; <http://www.gnu.org/licenses/>.
;;; Commentary:
;; Yeis's Emacs' Input Switcher. It should be pronounced as yes.
;; It pretends to:
;; 1) Transform text as if it had been inserted by any non-CJK input
;; method;
;; 2) Auto set the input method and auto transform text as it's typed
;; (yeis-mode);
;; Please note that IM stands for input method.
;;; Code:
(require 'robin)
(require 'ispell)
(require 's) ; due to s-trim
(defvar yeis-toggle-input-method-after-translation t
"Whether to toggle the IM after a translation.")
(defvar yeis-nonsense-word-regex "[]}{[<>`~]\\|[.,;:][^ \n]"
"Regex that matches a nonsense word in English.")
(defvar yeis-path-plain-word-list nil
"Path for the plain english word-list.")
(defun yeis-last-whitespace (arg)
"Return the point of the last whitespace.
With prefix argument ARG, do it ARG times."
(let ((regex "[\n[:space:]][^[:space:]\n]"))
(save-excursion
(re-search-backward regex nil t arg))))
(defun yeis-transform-previous-word (arg)
"Transform the previous word to the other IM.
Other IM means - if `current-input-method' is nil, then
transform the previous word to a non-nil `current-input-method',
and vice-versa.
The previous word is the set of characters bounded by a
whitespace on the left. The reference point is the cursor
position.
With prefix argument ARG, transform ARG words from the cursor
position.
The transformation acts on the region active, in case it exists."
(interactive "p")
(let ((beg (if (use-region-p)
(region-beginning)
(or (yeis-last-whitespace arg) (point-min))))
(end (if (use-region-p)
(region-end)
(point))))
(if current-input-method
(robin-invert-region beg end)
(robin-convert-region beg end))
(when yeis-toggle-input-method-after-translation
(toggle-input-method))))
;; The above suffices to achieve goal number one.
;; Users might find it useful to bind `yeis-transform-previous-word' to "C-|".
;; (global-set-key (kbd "C-|") 'yeis-transform-previous-word)
;; If you want this functionality for a different IM, please read the
;; section "How to define conversion rules" of the `robin' package.
;; What follows is a hacky suggestion to achieve goal number two.
;; If the user makes a typo while inserting an english word, then it gets
;; transformed. That reflects the alpha state of what you'll find below.
(define-minor-mode yeis-mode
"Toggle automatic IM selection (Yeis mode)."
:global nil
:lighter " Ye"
(if yeis-mode
(add-hook 'post-self-insert-hook #'yeis-rules nil t)
(remove-hook 'post-self-insert-hook #'yeis-rules t)))
(defun yeis-rules ()
"Transform the word at point and change IM automatically.
Conditions must be met to trigger `yeis-transform-previous-word'.
Namely, there are two kinds of rules. Some run after pressing RET
or SPC. Others run otherwise."
(let ((yeis-toggle-input-method-after-translation t)
(inserted-whitespace-p (member (char-before) '(13 32))))
(if inserted-whitespace-p
(when (or (yeis-l1-p) (yeis-word-p))
(yeis-transform-previous-word 1))
(when (or (yeis-nonsense-word-p) (yeis-prefix-p))
(yeis-transform-previous-word 1)))))
(defun yeis-nonsense-word-p ()
"Return t if previous word is nonsense.
The check only makes sense when no IM is selected. Nonsense means
that there's a match for the regex `yeis-nonsense-word-regex'.
That regex basically checks for the presence of characters that
don't constitute a well-formed word in english.
Let me provide some examples in the case of the traditional
йцукен keyboard.
[jhjij <-> хорошо
k.,k. <-> люблю
;tcnm <-> жесть"
(unless current-input-method
(save-excursion
(re-search-backward yeis-nonsense-word-regex (yeis-last-whitespace 1) t))))
(defun yeis-l1-p ()
"Return t if the previous word of length 1 requires translation.
English only has two words of length one - \"a\" and \"I\".
The necessary boolean is computed, taking into account the
selected IM.
It could be argued that there are other length 1 words like \"w\"."
(let ((word (downcase (yeis-previous-word))))
(and
(eq (length word) 1)
(if current-input-method
;; "a" before "i" since it is more common in English
(or (string-equal word "a") (string-equal word "i"))
(not (or (string-equal word "a") (string-equal word "i")))))))
(defun yeis-prefix-p ()
"Return t if the previous prefix requires translation.
A prefix is a word of length between 3 and 4.
The necessary boolean is computed, taking into account the
selected IM."
(let ((inhibit-message t)
(word (yeis-previous-word))
(wordlist yeis-path-plain-word-list))
(and
(>= (length word) 3)
(<= (length word) 4)
(not (string-match yeis-nonsense-word-regex word))
(if current-input-method
(ispell-lookup-words word wordlist)
(not (ispell-lookup-words word wordlist))))))
(defun yeis-word-p ()
"Return t if the previous word requires translation.
This is similar to `yeis-word-p', whereas in this method the
boolean reflects the existence of a full word match."
(let ((inhibit-message t)
(word (downcase (yeis-previous-word)))
(wordlist yeis-path-plain-word-list))
(and (>= (length word) 2)
(if current-input-method
(string-equal word (car (ispell-lookup-words word wordlist)))
(not (string-equal word (car (ispell-lookup-words word wordlist))))))))
(defun yeis-previous-word ()
"Return the previous word as string, as it is without an IM selected.
In short, regard the RULES of `robin-define-package' as a
bijection. This method provides the inverse function when an IM
is active.
When no IM is active, then the above is bypassed.
In either case, the word is stripped of punctuation for obvious
reasons.
Recall that a word, in the context of yeis, is a text string
composed by any non-whitespace characters and delimited by
whitespaces (or borders like the beginning/end of a buffer).
Notice that this contrasts with the Emacs' definition of a word.
Let me give you an example of a word that qualifies as such in
the context of yeis, but not in the context of Emacs' definiton.
Take \".kz\" (юля). Run `backward-word' with the cursor placed at
the end of the following line.
.kz"
(let ((beg (or (yeis-last-whitespace 1) (point-min)))
(end (point)))
(if current-input-method
(yeis-strip-punctuation
(mapconcat
(lambda (x)
(get-char-code-property x (intern robin-current-package-name)))
(buffer-substring-no-properties beg end) ""))
(yeis-strip-punctuation
(s-trim
(buffer-substring-no-properties beg end))))))
(defun yeis-strip-punctuation (word)
"Strip WORD from punctuation.
TODO extend this method to strip quotes"
(if (string-match "[.,;:!?]$" word)
(substring word 0 -1)
word))
(provide 'yeis)
;;; yeis.el ends here