From 606bc47eb5005b8e329ac3f2c63623369c184c1d Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 13 Dec 2024 17:03:32 +0100 Subject: [PATCH 01/15] System definition cleanup --- data/version-string.sexp | 1 + spell.asd | 18 ++++++++---------- 2 files changed, 9 insertions(+), 10 deletions(-) create mode 100644 data/version-string.sexp diff --git a/data/version-string.sexp b/data/version-string.sexp new file mode 100644 index 0000000..6e0c2f3 --- /dev/null +++ b/data/version-string.sexp @@ -0,0 +1 @@ +"0.0.1" diff --git a/spell.asd b/spell.asd index 83123cc..6fcd554 100644 --- a/spell.asd +++ b/spell.asd @@ -1,22 +1,20 @@ -;;;; spell.asd - -(asdf:defsystem #:spell +(defsystem "spell" :description "Spellchecking package for Common Lisp" - :author "Robert Strandh -Michał \"phoe\" Herda " + :author ("Robert Strandh " + "Michał \"phoe\" Herda ") :license "BSD" - :version "0.0.1" + :version (:read-file-form "data/version-string.sexp") :serial t :components ((:file "spell") (:file "word") (:file "english"))) -(asdf:defsystem #:spell/simple +(defsystem "spell/simple" :description "Spellchecking package for Common Lisp - simple version" - :author "Robert Strandh -Michał \"phoe\" Herda " + :author ("Robert Strandh " + "Michał \"phoe\" Herda ") :license "BSD" - :version "0.0.1" + :version (:read-file-form "data/version-string.sexp") :serial t :components ((:file "spell") (:file "simple") From 48a6bb31d7be078a544acf074d08568f0cbc86a9 Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 13 Dec 2024 17:21:33 +0100 Subject: [PATCH 02/15] Place functions with INLINE declamations before first uses --- english.lisp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/english.lisp b/english.lisp index 240b5f8..1d11bb8 100644 --- a/english.lisp +++ b/english.lisp @@ -10,18 +10,6 @@ (or (lookup word *english-dictionary*) (lookup decapitalized *english-dictionary*))))) -(defun english-check-paragraph (string) - ;; TODO optimize, mayhaps. We do not need to create subsequences of the - ;; paragraph, we can traverse the dictionary using offsets of that paragraph. - (loop with position = 0 - with length = (length string) - for word-start = (find-start string position) - for word-end = (find-end string word-start) - until (= word-start word-end length) - do (setf position word-end) - unless (english-lookup (subseq string word-start word-end)) - collect (cons word-start word-end))) - (declaim (inline english-text-char-p find-start find-end)) (defun english-text-char-p (character) (declare (character character)) @@ -41,3 +29,15 @@ when (or (= i length) (not (english-text-char-p (char string i)))) return i)) + +(defun english-check-paragraph (string) + ;; TODO optimize, mayhaps. We do not need to create subsequences of the + ;; paragraph, we can traverse the dictionary using offsets of that paragraph. + (loop with position = 0 + with length = (length string) + for word-start = (find-start string position) + for word-end = (find-end string word-start) + until (= word-start word-end length) + do (setf position word-end) + unless (english-lookup (subseq string word-start word-end)) + collect (cons word-start word-end))) From a3881ae1c1e0c51e87f38ccd85ed453b4306a1a9 Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 20 Dec 2024 09:12:40 +0100 Subject: [PATCH 03/15] Specialize all method parameters This is more uniform and avoids warnings in CCL. --- spell.lisp | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/spell.lisp b/spell.lisp index 274f601..b945fb0 100644 --- a/spell.lisp +++ b/spell.lisp @@ -34,21 +34,20 @@ (incf counter)))) (values *dictionary* counter)))) -(defmethod lookup (string (dictionary dictionary)) +(defmethod lookup ((string string) (dictionary dictionary)) (assert (plusp (length string))) (%lookup string (length string) (contents dictionary))) (defgeneric entries (node)) (defgeneric %lookup (string suffix node) - (:method (string suffix node) - (declare (ignore string suffix node)) + (:method ((string string) (suffix t) (node t)) '())) (defclass leaf-mixin () ((%entries :initform '() :initarg :entries :accessor entries))) -(defmethod %lookup (string (suffix (eql 0)) (node leaf-mixin)) +(defmethod %lookup ((string string) (suffix (eql 0)) (node leaf-mixin)) (entries node)) (defclass interior-mixin () @@ -56,12 +55,12 @@ (defclass interior-node (interior-mixin node) ()) -(defmethod %lookup (string (suffix (eql 0)) (node interior-node)) +(defmethod %lookup ((string string) (suffix (eql 0)) (node interior-node)) '()) -(defmethod %lookup (string suffix (node interior-mixin)) - (let ((child (find-child (aref string (- (length string) suffix)) - (children node)))) +(defmethod %lookup ((string string) (suffix integer) (node interior-mixin)) + (let* ((character (aref string (- (length string) suffix))) + (child (find-child character (children node)))) (if (null child) nil (%lookup string (1- suffix) child)))) @@ -71,26 +70,30 @@ (defgeneric %insert (object string suffix node)) -(defmethod %insert (object string (suffix (eql 0)) (node leaf-mixin)) +(defmethod %insert + ((object t) (string string) (suffix (eql 0)) (node leaf-mixin)) (push object (entries node))) -(defmethod %insert (object string (suffix (eql 0)) (node node)) +(defmethod %insert ((object t) (string string) (suffix (eql 0)) (node node)) (change-class node 'leaf-node) (%insert object string 0 node)) -(defmethod %insert (object string (suffix (eql 0)) (node interior-node)) +(defmethod %insert + ((object t) (string string) (suffix (eql 0)) (node interior-node)) (change-class node 'interior-leaf-node) (%insert object string 0 node)) -(defmethod %insert (object string suffix (node leaf-mixin)) +(defmethod %insert + ((object t) (string string) (suffix integer) (node leaf-mixin)) (change-class node 'interior-leaf-node) (%insert object string suffix node)) -(defmethod %insert (object string suffix (node node)) +(defmethod %insert ((object t) (string string) (suffix integer) (node node)) (change-class node 'interior-node) (%insert object string suffix node)) -(defmethod %insert (object string suffix (node interior-mixin)) +(defmethod %insert + ((object t) (string string) (suffix integer) (node interior-mixin)) (let ((child (find-child (aref string (- (length string) suffix)) (children node)))) (when (null child) @@ -101,22 +104,23 @@ (children node)))) (%insert object string (1- suffix) child))) -(defmethod insert (object string (dictionary dictionary)) +(defmethod insert ((object t) (string string) (dictionary dictionary)) (%insert object string (length string) (contents dictionary))) (defgeneric find-child (char entries)) -(defmethod find-child (char (entries list)) +(defmethod find-child ((char character) (entries list)) (cdr (assoc char entries))) -(defmethod find-child (char (entries vector)) - (aref entries (- (char-code char) #.(char-code #\a)))) +(defmethod find-child ((char character) (entries vector)) + (let ((index (- (char-code char) #.(char-code #\a)))) + (aref entries index))) (defgeneric add-child (node char entries)) -(defmethod add-child (node char (entries list)) +(defmethod add-child ((node t) (char character) (entries list)) (acons char node entries)) -(defmethod add-child (node char (entries vector)) - (setf (aref entries (- (char-code char) #.(char-code #\a))) - node)) +(defmethod add-child ((node t) (char character) (entries vector)) + (let ((index (- (char-code char) #.(char-code #\a)))) + (setf (aref entries index) node))) From 00519cc01741579da00eeeaf05889510f7d693d5 Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 20 Dec 2024 10:16:53 +0100 Subject: [PATCH 04/15] Remove unused slot MOOD from VERB class The file english.txt does not contain any :VERB entries with a :MOOD property. --- word.lisp | 1 - 1 file changed, 1 deletion(-) diff --git a/word.lisp b/word.lisp index 7a41028..88b74fb 100644 --- a/word.lisp +++ b/word.lisp @@ -34,7 +34,6 @@ ((%person :initform :any :initarg :person :reader person) (%number :initform :any :initarg :number :reader %number) (%tense :initarg :tense :reader tense) - (%mood :initarg :mood :reader mood) (%negative :initarg :negative :initform nil :reader negative) (%contraction :initarg :contraction :initform nil :reader contraction) (%strength :initarg :strength :initform :weak :reader strength) From 5b55205c12c3d51492f2b46c26384eed69eb4cca Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 20 Dec 2024 10:22:44 +0100 Subject: [PATCH 05/15] Remove mixin classes {NEGATIVE,CONTRACTION}-MIXIN * CONTRACTION-MIXIN was used only once * NEGATIVE-MIXIN was used twice in VERB and PRONOUN but the %NEGATIVE slot was repeated in VERB anyway --- word.lisp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/word.lisp b/word.lisp index 88b74fb..b869b3b 100644 --- a/word.lisp +++ b/word.lisp @@ -24,13 +24,7 @@ (defword proper-noun (noun) ()) -(defword negative-mixin () - ((%negative :initform nil :initarg :negative :reader negative))) - -(defword contraction-mixin () - ((%contraction :initform nil :initarg :contraction :reader contraction))) - -(defword verb (word negative-mixin contraction-mixin) +(defword verb (word) ((%person :initform :any :initarg :person :reader person) (%number :initform :any :initarg :number :reader %number) (%tense :initarg :tense :reader tense) @@ -46,11 +40,12 @@ (defword adverb (word) ()) -(defword pronoun (word negative-mixin) +(defword pronoun (word) ((%person :initarg :person :reader person) (%number :initarg :number :reader %number) (%gender :initarg :gender :reader gender) - (%case :initarg :case :initform :nominative :reader %case))) + (%case :initarg :case :initform :nominative :reader %case) + (%negative :initarg :negative :initform nil :reader negative))) (defword personal-pronoun (pronoun) ()) From e7b0f41e002d58b7086beb9df6cbb59fdf42a649 Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 20 Dec 2024 10:25:11 +0100 Subject: [PATCH 06/15] Loosen mutual dependencies between {word,simple}.lisp and spell.lisp In particular, {word,simple}.lisp no longer use *DICTIONARY*. --- simple.lisp | 6 +++--- spell.lisp | 10 +++++++--- word.lisp | 13 ++++--------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/simple.lisp b/simple.lisp index 4d9bfe6..d42c52b 100644 --- a/simple.lisp +++ b/simple.lisp @@ -1,8 +1,8 @@ (in-package #:spell) -(defun word (&rest arguments &key spelling &allow-other-keys) - (declare (ignore arguments)) - (insert t spelling *dictionary*)) +(defun word (spelling type &rest initargs &key &allow-other-keys) + (declare (ignore spelling type initargs)) + t) (defmethod %insert (object string (suffix (eql 0)) (node leaf-mixin)) (setf (entries node) t)) diff --git a/spell.lisp b/spell.lisp index b945fb0..3ecead0 100644 --- a/spell.lisp +++ b/spell.lisp @@ -28,9 +28,13 @@ (read-line stream nil stream))) ((eq stream line)) (unless (eq #\; (aref line 0)) - (let* ((string (concatenate 'string "(" line ")")) - (args (read-from-string string))) - (apply #'word :spelling args) + (let ((string (concatenate 'string "(" line ")"))) + (destructuring-bind + (spelling &rest args &key type &allow-other-keys) + (read-from-string string) + (remf args :type) + (let ((word (apply #'word spelling type args))) + (insert word spelling *dictionary*))) (incf counter)))) (values *dictionary* counter)))) diff --git a/word.lisp b/word.lisp index b869b3b..d74ad0c 100644 --- a/word.lisp +++ b/word.lisp @@ -5,9 +5,6 @@ (defclass word () ((%base :initarg :base :reader base))) -(defmethod initialize-instance :after ((object word) &key spelling) - (declare (ignore spelling))) - (defmethod make-load-form ((object word) &optional environment) (make-load-form-saving-slots object :environment environment)) @@ -84,9 +81,7 @@ (defword verb-verb-contraction (verb) ()) -(defun word (&rest arguments &key type spelling &allow-other-keys) - (let ((arguments (copy-list arguments))) - (remf arguments :type) - (insert (apply #'make-instance (gethash type *word-types*) arguments) - spelling - *dictionary*))) +(defun word (spelling type &rest initargs &key &allow-other-keys) + (declare (ignore spelling)) + (let ((class (gethash type *word-types*))) + (apply #'make-instance class initargs))) From 16b42cc4156b9528d4439994184d6b091c6b0534 Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 20 Dec 2024 10:42:30 +0100 Subject: [PATCH 07/15] Move code and data to separate sub-directories --- english.lisp => code/english.lisp | 3 ++- simple.lisp => code/simple.lisp | 0 spell.lisp => code/spell.lisp | 0 traverse.lisp => code/traverse.lisp | 0 word.lisp => code/word.lisp | 0 english.txt => data/english.txt | 0 spell.asd | 18 ++++++++++-------- 7 files changed, 12 insertions(+), 9 deletions(-) rename english.lisp => code/english.lisp (94%) rename simple.lisp => code/simple.lisp (100%) rename spell.lisp => code/spell.lisp (100%) rename traverse.lisp => code/traverse.lisp (100%) rename word.lisp => code/word.lisp (100%) rename english.txt => data/english.txt (100%) diff --git a/english.lisp b/code/english.lisp similarity index 94% rename from english.lisp rename to code/english.lisp index 1d11bb8..51d186d 100644 --- a/english.lisp +++ b/code/english.lisp @@ -1,7 +1,8 @@ (in-package #:spell) (defparameter *english-dictionary* - #.(load-dictionary (asdf:system-relative-pathname :spell "english.txt"))) + #.(load-dictionary + (asdf:system-relative-pathname "spell" "data/english.txt"))) (defun english-lookup (word) (when (and word (string/= word "")) diff --git a/simple.lisp b/code/simple.lisp similarity index 100% rename from simple.lisp rename to code/simple.lisp diff --git a/spell.lisp b/code/spell.lisp similarity index 100% rename from spell.lisp rename to code/spell.lisp diff --git a/traverse.lisp b/code/traverse.lisp similarity index 100% rename from traverse.lisp rename to code/traverse.lisp diff --git a/word.lisp b/code/word.lisp similarity index 100% rename from word.lisp rename to code/word.lisp diff --git a/english.txt b/data/english.txt similarity index 100% rename from english.txt rename to data/english.txt diff --git a/spell.asd b/spell.asd index 6fcd554..6d67204 100644 --- a/spell.asd +++ b/spell.asd @@ -4,10 +4,11 @@ "Michał \"phoe\" Herda ") :license "BSD" :version (:read-file-form "data/version-string.sexp") - :serial t - :components ((:file "spell") - (:file "word") - (:file "english"))) + :components ((:module "code" + :serial t + :components ((:file "spell") + (:file "word") + (:file "english"))))) (defsystem "spell/simple" :description "Spellchecking package for Common Lisp - simple version" @@ -15,7 +16,8 @@ "Michał \"phoe\" Herda ") :license "BSD" :version (:read-file-form "data/version-string.sexp") - :serial t - :components ((:file "spell") - (:file "simple") - (:file "english"))) + :components ((:module "code" + :serial t + :components ((:file "spell") + (:file "simple") + (:file "english"))))) From 0cb09b4d67cbedb6b7da472c734e9f8c48b00592 Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 20 Dec 2024 10:43:58 +0100 Subject: [PATCH 08/15] Move package definition into a separate file This also allows fixing the dependency between word.lisp and spell.lisp. spell.lisp uses definitions from word.lisp. --- code/package.lisp | 7 +++++++ code/spell.lisp | 4 ---- spell.asd | 6 ++++-- 3 files changed, 11 insertions(+), 6 deletions(-) create mode 100644 code/package.lisp diff --git a/code/package.lisp b/code/package.lisp new file mode 100644 index 0000000..b3a3e4a --- /dev/null +++ b/code/package.lisp @@ -0,0 +1,7 @@ +(cl:defpackage #:spell + (:use + #:cl) + + (:export + #:english-lookup + #:english-check-paragraph)) diff --git a/code/spell.lisp b/code/spell.lisp index 3ecead0..a0c64ea 100644 --- a/code/spell.lisp +++ b/code/spell.lisp @@ -1,7 +1,3 @@ -(defpackage #:spell - (:use #:cl) - (:export #:english-lookup - #:english-check-paragraph)) (in-package #:spell) (defgeneric lookup (string dictionary)) diff --git a/spell.asd b/spell.asd index 6d67204..53a1902 100644 --- a/spell.asd +++ b/spell.asd @@ -6,8 +6,9 @@ :version (:read-file-form "data/version-string.sexp") :components ((:module "code" :serial t - :components ((:file "spell") + :components ((:file "package") (:file "word") + (:file "spell") (:file "english"))))) (defsystem "spell/simple" @@ -18,6 +19,7 @@ :version (:read-file-form "data/version-string.sexp") :components ((:module "code" :serial t - :components ((:file "spell") + :components ((:file "package") (:file "simple") + (:file "spell") (:file "english"))))) From 24b6a4c5cc5e416d7dbcb389a9a7a873ac841083 Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 20 Dec 2024 10:45:40 +0100 Subject: [PATCH 09/15] Move generic functions into separate file --- code/protocol.lisp | 15 +++++ code/spell.lisp | 148 ++++++++++++++++++++++----------------------- spell.asd | 2 + 3 files changed, 90 insertions(+), 75 deletions(-) create mode 100644 code/protocol.lisp diff --git a/code/protocol.lisp b/code/protocol.lisp new file mode 100644 index 0000000..d9b289c --- /dev/null +++ b/code/protocol.lisp @@ -0,0 +1,15 @@ +(in-package #:spell) + +;;; Dictionary protocol + +(defgeneric lookup (string dictionary)) + +(defgeneric insert (object string dictionary)) + +;;; Trie node protocol + +(defgeneric entries (node)) + +(defgeneric find-child (char entries)) + +(defgeneric add-child (node char entries)) diff --git a/code/spell.lisp b/code/spell.lisp index a0c64ea..9200320 100644 --- a/code/spell.lisp +++ b/code/spell.lisp @@ -1,48 +1,27 @@ (in-package #:spell) -(defgeneric lookup (string dictionary)) -(defgeneric insert (object string dictionary)) +;;; Internal protocols -(defclass node () ()) +(defgeneric %lookup (string suffix node) + (:method ((string string) (suffix t) (node t)) + '())) -(defmethod make-load-form ((object node) &optional environment) - (make-load-form-saving-slots object :environment environment)) +(defgeneric %insert (object string suffix node)) -(defvar *dictionary*) +;;; Node classes -(defclass dictionary () - ((%contents :initform (make-instance 'node) :accessor contents))) +(defclass node () ()) -(defmethod make-load-form ((object dictionary) &optional environment) +(defmethod make-load-form ((object node) &optional environment) (make-load-form-saving-slots object :environment environment)) -(defun load-dictionary (filename) - (with-open-file (stream filename) - (let* ((counter 0) - (*dictionary* (make-instance 'dictionary))) - (do ((line (read-line stream nil stream) - (read-line stream nil stream))) - ((eq stream line)) - (unless (eq #\; (aref line 0)) - (let ((string (concatenate 'string "(" line ")"))) - (destructuring-bind - (spelling &rest args &key type &allow-other-keys) - (read-from-string string) - (remf args :type) - (let ((word (apply #'word spelling type args))) - (insert word spelling *dictionary*))) - (incf counter)))) - (values *dictionary* counter)))) - -(defmethod lookup ((string string) (dictionary dictionary)) - (assert (plusp (length string))) - (%lookup string (length string) (contents dictionary))) - -(defgeneric entries (node)) +(defmethod %insert ((object t) (string string) (suffix (eql 0)) (node node)) + (change-class node 'leaf-node) + (%insert object string 0 node)) -(defgeneric %lookup (string suffix node) - (:method ((string string) (suffix t) (node t)) - '())) +(defmethod %insert ((object t) (string string) (suffix integer) (node node)) + (change-class node 'interior-node) + (%insert object string suffix node)) (defclass leaf-mixin () ((%entries :initform '() :initarg :entries :accessor entries))) @@ -50,13 +29,17 @@ (defmethod %lookup ((string string) (suffix (eql 0)) (node leaf-mixin)) (entries node)) -(defclass interior-mixin () - ((%children :initform '() :initarg :children :accessor children))) +(defmethod %insert + ((object t) (string string) (suffix (eql 0)) (node leaf-mixin)) + (push object (entries node))) -(defclass interior-node (interior-mixin node) ()) +(defmethod %insert + ((object t) (string string) (suffix integer) (node leaf-mixin)) + (change-class node 'interior-leaf-node) + (%insert object string suffix node)) -(defmethod %lookup ((string string) (suffix (eql 0)) (node interior-node)) - '()) +(defclass interior-mixin () + ((%children :initform '() :initarg :children :accessor children))) (defmethod %lookup ((string string) (suffix integer) (node interior-mixin)) (let* ((character (aref string (- (length string) suffix))) @@ -65,49 +48,31 @@ nil (%lookup string (1- suffix) child)))) -(defclass leaf-node (leaf-mixin node) ()) -(defclass interior-leaf-node (interior-mixin leaf-mixin node) ()) - -(defgeneric %insert (object string suffix node)) - (defmethod %insert - ((object t) (string string) (suffix (eql 0)) (node leaf-mixin)) - (push object (entries node))) + ((object t) (string string) (suffix integer) (node interior-mixin)) + (let* ((character (aref string (- (length string) suffix))) + (children (children node)) + (child (find-child character children))) + (when (null child) + (setf child (make-instance 'node) + (children node) (add-child child character children))) + (%insert object string (1- suffix) child))) -(defmethod %insert ((object t) (string string) (suffix (eql 0)) (node node)) - (change-class node 'leaf-node) - (%insert object string 0 node)) +(defclass interior-node (interior-mixin node) ()) + +(defmethod %lookup ((string string) (suffix (eql 0)) (node interior-node)) + '()) (defmethod %insert ((object t) (string string) (suffix (eql 0)) (node interior-node)) (change-class node 'interior-leaf-node) (%insert object string 0 node)) -(defmethod %insert - ((object t) (string string) (suffix integer) (node leaf-mixin)) - (change-class node 'interior-leaf-node) - (%insert object string suffix node)) - -(defmethod %insert ((object t) (string string) (suffix integer) (node node)) - (change-class node 'interior-node) - (%insert object string suffix node)) - -(defmethod %insert - ((object t) (string string) (suffix integer) (node interior-mixin)) - (let ((child (find-child (aref string (- (length string) suffix)) - (children node)))) - (when (null child) - (setf child (make-instance 'node)) - (setf (children node) - (add-child child - (aref string (- (length string) suffix)) - (children node)))) - (%insert object string (1- suffix) child))) +(defclass leaf-node (leaf-mixin node) ()) -(defmethod insert ((object t) (string string) (dictionary dictionary)) - (%insert object string (length string) (contents dictionary))) +(defclass interior-leaf-node (interior-mixin leaf-mixin node) ()) -(defgeneric find-child (char entries)) +;;; Child node access methods (defmethod find-child ((char character) (entries list)) (cdr (assoc char entries))) @@ -116,11 +81,44 @@ (let ((index (- (char-code char) #.(char-code #\a)))) (aref entries index))) -(defgeneric add-child (node char entries)) - (defmethod add-child ((node t) (char character) (entries list)) (acons char node entries)) (defmethod add-child ((node t) (char character) (entries vector)) (let ((index (- (char-code char) #.(char-code #\a)))) (setf (aref entries index) node))) + +;;; Dictionary + +(defclass dictionary () + ((%contents :initform (make-instance 'node) :accessor contents))) + +(defmethod make-load-form ((object dictionary) &optional environment) + (make-load-form-saving-slots object :environment environment)) + +(defmethod lookup ((string string) (dictionary dictionary)) + (assert (plusp (length string))) + (%lookup string (length string) (contents dictionary))) + +(defmethod insert ((object t) (string string) (dictionary dictionary)) + (%insert object string (length string) (contents dictionary))) + +(defvar *dictionary*) + +(defun load-dictionary (filename) + (with-open-file (stream filename) + (let* ((counter 0) + (*dictionary* (make-instance 'dictionary))) + (do ((line (read-line stream nil stream) + (read-line stream nil stream))) + ((eq stream line)) + (unless (eq #\; (aref line 0)) + (let ((string (concatenate 'string "(" line ")"))) + (destructuring-bind + (spelling &rest args &key type &allow-other-keys) + (read-from-string string) + (remf args :type) + (let ((word (apply #'word spelling type args))) + (insert word spelling *dictionary*))) + (incf counter)))) + (values *dictionary* counter)))) diff --git a/spell.asd b/spell.asd index 53a1902..535bd4b 100644 --- a/spell.asd +++ b/spell.asd @@ -7,6 +7,7 @@ :components ((:module "code" :serial t :components ((:file "package") + (:file "protocol") (:file "word") (:file "spell") (:file "english"))))) @@ -20,6 +21,7 @@ :components ((:module "code" :serial t :components ((:file "package") + (:file "protocol") (:file "simple") (:file "spell") (:file "english"))))) From ee7b5c4afa2d11168f8d2acf12290948c454738a Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 20 Dec 2024 10:47:13 +0100 Subject: [PATCH 10/15] Shadow CL:{CASE,NUMBER} and use the proper names for the readers --- code/package.lisp | 4 ++++ code/word.lisp | 14 +++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/code/package.lisp b/code/package.lisp index b3a3e4a..a06bde0 100644 --- a/code/package.lisp +++ b/code/package.lisp @@ -2,6 +2,10 @@ (:use #:cl) + (:shadow + #:case + #:number) + (:export #:english-lookup #:english-check-paragraph)) diff --git a/code/word.lisp b/code/word.lisp index d74ad0c..3100336 100644 --- a/code/word.lisp +++ b/code/word.lisp @@ -14,8 +14,8 @@ (defclass ,class-name ,@body)))) (defword noun (word) - ((%number :initarg :number :reader %number) - (%case :initarg :case :initform :nominative :reader %case) + ((%number :initarg :number :reader number) + (%case :initarg :case :initform :nominative :reader case) (%gender :initarg :gender :initform :any :reader gender) (%singular :initarg :singular :reader singular))) @@ -23,7 +23,7 @@ (defword verb (word) ((%person :initform :any :initarg :person :reader person) - (%number :initform :any :initarg :number :reader %number) + (%number :initform :any :initarg :number :reader number) (%tense :initarg :tense :reader tense) (%negative :initarg :negative :initform nil :reader negative) (%contraction :initarg :contraction :initform nil :reader contraction) @@ -39,9 +39,9 @@ (defword pronoun (word) ((%person :initarg :person :reader person) - (%number :initarg :number :reader %number) + (%number :initarg :number :reader number) (%gender :initarg :gender :reader gender) - (%case :initarg :case :initform :nominative :reader %case) + (%case :initarg :case :initform :nominative :reader case) (%negative :initarg :negative :initform nil :reader negative))) (defword personal-pronoun (pronoun) ()) @@ -60,10 +60,10 @@ (defword subordinate (conjunction) ()) (defword determiner (word) - ((%number :initform :any :initarg :number :reader %number))) + ((%number :initform :any :initarg :number :reader number))) (defword article (determiner) - ((%number :initarg :number :reader %number) + ((%number :initarg :number :reader number) (%determinate :initform nil :initarg :determinate :reader determinate))) (defword quantifier (determiner) ()) From d79579dc9dbec8dbf93dff1ce476639ad3376e4e Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 20 Dec 2024 10:49:28 +0100 Subject: [PATCH 11/15] Export names of readers for WORD subclasses --- code/package.lisp | 51 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/code/package.lisp b/code/package.lisp index a06bde0..316bc81 100644 --- a/code/package.lisp +++ b/code/package.lisp @@ -6,6 +6,57 @@ #:case #:number) + ;; Word protocol + (:export + #:base) + + ;; Noun protocol + (:export + #:number + #:case + #:gender + #:singular) + + ;; Verb protocol + (:export + #:person + #:tense + #:mood + #:negative + #:contraction + #:strength + #:infinitive) + + ;; Adjective protocol + (:export + #:degree) + + ;; Pronoun protocol + (:export + #:person + #:number + #:gender + #:case) + + ;; Possessive pronoun protocol + (:export + #:refnumber) + + ;; Determiner protocol + (:export + #:number) + + ;; Article protocol + (:export + #:number + #:determinate) + + ;; Possessive adjective protocol + (:export + #:gender + #:person + #:refnumber) + (:export #:english-lookup #:english-check-paragraph)) From 91d4e5bc51193f10d6add1dba3e4f94ae91d302b Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 20 Dec 2024 10:50:50 +0100 Subject: [PATCH 12/15] Improve case-folding in ENGLISH-LOOKUP * Do not destroy WORD when producing the de-capitalized version * Produce the de-capitalized version only if WORD itself is not in the dictionary and WORD starts with a capital letter * If all characters of WORD are in upper case, try changing all characters to lower case --- code/english.lisp | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/code/english.lisp b/code/english.lisp index 51d186d..a403140 100644 --- a/code/english.lisp +++ b/code/english.lisp @@ -6,10 +6,26 @@ (defun english-lookup (word) (when (and word (string/= word "")) - (let ((decapitalized (copy-seq word))) - (setf (aref word 0) (char-downcase (aref word 0))) - (or (lookup word *english-dictionary*) - (lookup decapitalized *english-dictionary*))))) + (let ((dictionary *english-dictionary*)) + (flet ((try (variant) + (let ((result (lookup variant dictionary))) + (when result + (return-from english-lookup result))))) + (try word) + (let* ((initial (aref word 0)) + (downcased (char-downcase initial))) + (unless (char= initial downcased) + ;; We change, for example, "Anti-Semitic" at the beginning + ;; of a sentence to "anti-Semitic" which is in the + ;; dictionary. + (let ((decapitalized (copy-seq word))) + (setf (aref decapitalized 0) downcased) + (try decapitalized)) + ;; We change, for example, "PARAMETER" (which is typical + ;; for some commenting styles) to "parameter" which is in + ;; the dictionary. + (when (every #'upper-case-p word) + (try (string-downcase word))))))))) (declaim (inline english-text-char-p find-start find-end)) (defun english-text-char-p (character) From 9679c5fc518676e6468fccb63c341ca9573d173d Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 20 Dec 2024 10:52:08 +0100 Subject: [PATCH 13/15] Add function LOAD-ENGLISH-DICTIONARY and some optional verbosity --- code/english.lisp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/code/english.lisp b/code/english.lisp index a403140..46cec91 100644 --- a/code/english.lisp +++ b/code/english.lisp @@ -1,8 +1,23 @@ (in-package #:spell) -(defparameter *english-dictionary* - #.(load-dictionary - (asdf:system-relative-pathname "spell" "data/english.txt"))) +(eval-when (:compile-toplevel :load-toplevel :execute) + (defun load-english-dictionary (&key (verbose *compile-verbose*)) + (let ((filename (asdf:system-relative-pathname + "spell" "data/english.txt")) + dictionary entry-count) + (flet ((note (format-control &rest format-arguments) + (when verbose + (let ((stream *standard-output*)) + (fresh-line stream) + (pprint-logical-block (stream nil :per-line-prefix "; ") + (apply #'format stream format-control format-arguments)) + (force-output stream))))) + (note "Loading dictionary ~:_~S" filename) + (setf (values dictionary entry-count) (load-dictionary filename)) + (note "Will dump dictionary with ~:_~:D entr~:@P" entry-count)) + dictionary))) + +(defparameter *english-dictionary* #.(load-english-dictionary)) (defun english-lookup (word) (when (and word (string/= word "")) From a94e365b970e83fe9a68bbbc636acf0536e5139d Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 20 Dec 2024 11:34:21 +0100 Subject: [PATCH 14/15] Add myself to authors in spell.asd --- spell.asd | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spell.asd b/spell.asd index 535bd4b..7f6d4d7 100644 --- a/spell.asd +++ b/spell.asd @@ -1,7 +1,8 @@ (defsystem "spell" :description "Spellchecking package for Common Lisp" :author ("Robert Strandh " - "Michał \"phoe\" Herda ") + "Michał \"phoe\" Herda " + "Jan Moringen ") :license "BSD" :version (:read-file-form "data/version-string.sexp") :components ((:module "code" @@ -15,7 +16,8 @@ (defsystem "spell/simple" :description "Spellchecking package for Common Lisp - simple version" :author ("Robert Strandh " - "Michał \"phoe\" Herda ") + "Michał \"phoe\" Herda " + "Jan Moringen ") :license "BSD" :version (:read-file-form "data/version-string.sexp") :components ((:module "code" From a82f8962dfd74649d8058bedaa84301c2fbb3ae7 Mon Sep 17 00:00:00 2001 From: Jan Moringen Date: Fri, 20 Dec 2024 11:35:32 +0100 Subject: [PATCH 15/15] Bump version from 0.0.1 to 0.0.2 in data/version-string.sexp --- data/version-string.sexp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/version-string.sexp b/data/version-string.sexp index 6e0c2f3..a582d77 100644 --- a/data/version-string.sexp +++ b/data/version-string.sexp @@ -1 +1 @@ -"0.0.1" +"0.0.2"