diff --git a/code/english.lisp b/code/english.lisp new file mode 100644 index 0000000..46cec91 --- /dev/null +++ b/code/english.lisp @@ -0,0 +1,75 @@ +(in-package #:spell) + +(eval-when (:compile-toplevel :load-toplevel :execute) + (defun load-english-dictionary (&key (verbose *compile-verbose*)) + (let ((filename (asdf:system-relative-pathname + "spell" "data/english.txt")) + dictionary entry-count) + (flet ((note (format-control &rest format-arguments) + (when verbose + (let ((stream *standard-output*)) + (fresh-line stream) + (pprint-logical-block (stream nil :per-line-prefix "; ") + (apply #'format stream format-control format-arguments)) + (force-output stream))))) + (note "Loading dictionary ~:_~S" filename) + (setf (values dictionary entry-count) (load-dictionary filename)) + (note "Will dump dictionary with ~:_~:D entr~:@P" entry-count)) + dictionary))) + +(defparameter *english-dictionary* #.(load-english-dictionary)) + +(defun english-lookup (word) + (when (and word (string/= word "")) + (let ((dictionary *english-dictionary*)) + (flet ((try (variant) + (let ((result (lookup variant dictionary))) + (when result + (return-from english-lookup result))))) + (try word) + (let* ((initial (aref word 0)) + (downcased (char-downcase initial))) + (unless (char= initial downcased) + ;; We change, for example, "Anti-Semitic" at the beginning + ;; of a sentence to "anti-Semitic" which is in the + ;; dictionary. + (let ((decapitalized (copy-seq word))) + (setf (aref decapitalized 0) downcased) + (try decapitalized)) + ;; We change, for example, "PARAMETER" (which is typical + ;; for some commenting styles) to "parameter" which is in + ;; the dictionary. + (when (every #'upper-case-p word) + (try (string-downcase word))))))))) + +(declaim (inline english-text-char-p find-start find-end)) +(defun english-text-char-p (character) + (declare (character character)) + (or (char= character #\') + (alpha-char-p character))) + +(defun find-start (string position) + (loop with length = (length string) + for i from position + when (or (= i length) + (english-text-char-p (char string i))) + return i)) + +(defun find-end (string position) + (loop with length = (length string) + for i from position + when (or (= i length) + (not (english-text-char-p (char string i)))) + return i)) + +(defun english-check-paragraph (string) + ;; TODO optimize, mayhaps. We do not need to create subsequences of the + ;; paragraph, we can traverse the dictionary using offsets of that paragraph. + (loop with position = 0 + with length = (length string) + for word-start = (find-start string position) + for word-end = (find-end string word-start) + until (= word-start word-end length) + do (setf position word-end) + unless (english-lookup (subseq string word-start word-end)) + collect (cons word-start word-end))) diff --git a/code/package.lisp b/code/package.lisp new file mode 100644 index 0000000..316bc81 --- /dev/null +++ b/code/package.lisp @@ -0,0 +1,62 @@ +(cl:defpackage #:spell + (:use + #:cl) + + (:shadow + #:case + #:number) + + ;; Word protocol + (:export + #:base) + + ;; Noun protocol + (:export + #:number + #:case + #:gender + #:singular) + + ;; Verb protocol + (:export + #:person + #:tense + #:mood + #:negative + #:contraction + #:strength + #:infinitive) + + ;; Adjective protocol + (:export + #:degree) + + ;; Pronoun protocol + (:export + #:person + #:number + #:gender + #:case) + + ;; Possessive pronoun protocol + (:export + #:refnumber) + + ;; Determiner protocol + (:export + #:number) + + ;; Article protocol + (:export + #:number + #:determinate) + + ;; Possessive adjective protocol + (:export + #:gender + #:person + #:refnumber) + + (:export + #:english-lookup + #:english-check-paragraph)) diff --git a/code/protocol.lisp b/code/protocol.lisp new file mode 100644 index 0000000..d9b289c --- /dev/null +++ b/code/protocol.lisp @@ -0,0 +1,15 @@ +(in-package #:spell) + +;;; Dictionary protocol + +(defgeneric lookup (string dictionary)) + +(defgeneric insert (object string dictionary)) + +;;; Trie node protocol + +(defgeneric entries (node)) + +(defgeneric find-child (char entries)) + +(defgeneric add-child (node char entries)) diff --git a/code/simple.lisp b/code/simple.lisp new file mode 100644 index 0000000..d42c52b --- /dev/null +++ b/code/simple.lisp @@ -0,0 +1,8 @@ +(in-package #:spell) + +(defun word (spelling type &rest initargs &key &allow-other-keys) + (declare (ignore spelling type initargs)) + t) + +(defmethod %insert (object string (suffix (eql 0)) (node leaf-mixin)) + (setf (entries node) t)) diff --git a/code/spell.lisp b/code/spell.lisp new file mode 100644 index 0000000..9200320 --- /dev/null +++ b/code/spell.lisp @@ -0,0 +1,124 @@ +(in-package #:spell) + +;;; Internal protocols + +(defgeneric %lookup (string suffix node) + (:method ((string string) (suffix t) (node t)) + '())) + +(defgeneric %insert (object string suffix node)) + +;;; Node classes + +(defclass node () ()) + +(defmethod make-load-form ((object node) &optional environment) + (make-load-form-saving-slots object :environment environment)) + +(defmethod %insert ((object t) (string string) (suffix (eql 0)) (node node)) + (change-class node 'leaf-node) + (%insert object string 0 node)) + +(defmethod %insert ((object t) (string string) (suffix integer) (node node)) + (change-class node 'interior-node) + (%insert object string suffix node)) + +(defclass leaf-mixin () + ((%entries :initform '() :initarg :entries :accessor entries))) + +(defmethod %lookup ((string string) (suffix (eql 0)) (node leaf-mixin)) + (entries node)) + +(defmethod %insert + ((object t) (string string) (suffix (eql 0)) (node leaf-mixin)) + (push object (entries node))) + +(defmethod %insert + ((object t) (string string) (suffix integer) (node leaf-mixin)) + (change-class node 'interior-leaf-node) + (%insert object string suffix node)) + +(defclass interior-mixin () + ((%children :initform '() :initarg :children :accessor children))) + +(defmethod %lookup ((string string) (suffix integer) (node interior-mixin)) + (let* ((character (aref string (- (length string) suffix))) + (child (find-child character (children node)))) + (if (null child) + nil + (%lookup string (1- suffix) child)))) + +(defmethod %insert + ((object t) (string string) (suffix integer) (node interior-mixin)) + (let* ((character (aref string (- (length string) suffix))) + (children (children node)) + (child (find-child character children))) + (when (null child) + (setf child (make-instance 'node) + (children node) (add-child child character children))) + (%insert object string (1- suffix) child))) + +(defclass interior-node (interior-mixin node) ()) + +(defmethod %lookup ((string string) (suffix (eql 0)) (node interior-node)) + '()) + +(defmethod %insert + ((object t) (string string) (suffix (eql 0)) (node interior-node)) + (change-class node 'interior-leaf-node) + (%insert object string 0 node)) + +(defclass leaf-node (leaf-mixin node) ()) + +(defclass interior-leaf-node (interior-mixin leaf-mixin node) ()) + +;;; Child node access methods + +(defmethod find-child ((char character) (entries list)) + (cdr (assoc char entries))) + +(defmethod find-child ((char character) (entries vector)) + (let ((index (- (char-code char) #.(char-code #\a)))) + (aref entries index))) + +(defmethod add-child ((node t) (char character) (entries list)) + (acons char node entries)) + +(defmethod add-child ((node t) (char character) (entries vector)) + (let ((index (- (char-code char) #.(char-code #\a)))) + (setf (aref entries index) node))) + +;;; Dictionary + +(defclass dictionary () + ((%contents :initform (make-instance 'node) :accessor contents))) + +(defmethod make-load-form ((object dictionary) &optional environment) + (make-load-form-saving-slots object :environment environment)) + +(defmethod lookup ((string string) (dictionary dictionary)) + (assert (plusp (length string))) + (%lookup string (length string) (contents dictionary))) + +(defmethod insert ((object t) (string string) (dictionary dictionary)) + (%insert object string (length string) (contents dictionary))) + +(defvar *dictionary*) + +(defun load-dictionary (filename) + (with-open-file (stream filename) + (let* ((counter 0) + (*dictionary* (make-instance 'dictionary))) + (do ((line (read-line stream nil stream) + (read-line stream nil stream))) + ((eq stream line)) + (unless (eq #\; (aref line 0)) + (let ((string (concatenate 'string "(" line ")"))) + (destructuring-bind + (spelling &rest args &key type &allow-other-keys) + (read-from-string string) + (remf args :type) + (let ((word (apply #'word spelling type args))) + (insert word spelling *dictionary*))) + (incf counter)))) + (values *dictionary* counter)))) diff --git a/traverse.lisp b/code/traverse.lisp similarity index 100% rename from traverse.lisp rename to code/traverse.lisp diff --git a/word.lisp b/code/word.lisp similarity index 66% rename from word.lisp rename to code/word.lisp index 7a41028..3100336 100644 --- a/word.lisp +++ b/code/word.lisp @@ -5,9 +5,6 @@ (defclass word () ((%base :initarg :base :reader base))) -(defmethod initialize-instance :after ((object word) &key spelling) - (declare (ignore spelling))) - (defmethod make-load-form ((object word) &optional environment) (make-load-form-saving-slots object :environment environment)) @@ -17,24 +14,17 @@ (defclass ,class-name ,@body)))) (defword noun (word) - ((%number :initarg :number :reader %number) - (%case :initarg :case :initform :nominative :reader %case) + ((%number :initarg :number :reader number) + (%case :initarg :case :initform :nominative :reader case) (%gender :initarg :gender :initform :any :reader gender) (%singular :initarg :singular :reader singular))) (defword proper-noun (noun) ()) -(defword negative-mixin () - ((%negative :initform nil :initarg :negative :reader negative))) - -(defword contraction-mixin () - ((%contraction :initform nil :initarg :contraction :reader contraction))) - -(defword verb (word negative-mixin contraction-mixin) +(defword verb (word) ((%person :initform :any :initarg :person :reader person) - (%number :initform :any :initarg :number :reader %number) + (%number :initform :any :initarg :number :reader number) (%tense :initarg :tense :reader tense) - (%mood :initarg :mood :reader mood) (%negative :initarg :negative :initform nil :reader negative) (%contraction :initarg :contraction :initform nil :reader contraction) (%strength :initarg :strength :initform :weak :reader strength) @@ -47,11 +37,12 @@ (defword adverb (word) ()) -(defword pronoun (word negative-mixin) +(defword pronoun (word) ((%person :initarg :person :reader person) - (%number :initarg :number :reader %number) + (%number :initarg :number :reader number) (%gender :initarg :gender :reader gender) - (%case :initarg :case :initform :nominative :reader %case))) + (%case :initarg :case :initform :nominative :reader case) + (%negative :initarg :negative :initform nil :reader negative))) (defword personal-pronoun (pronoun) ()) @@ -69,10 +60,10 @@ (defword subordinate (conjunction) ()) (defword determiner (word) - ((%number :initform :any :initarg :number :reader %number))) + ((%number :initform :any :initarg :number :reader number))) (defword article (determiner) - ((%number :initarg :number :reader %number) + ((%number :initarg :number :reader number) (%determinate :initform nil :initarg :determinate :reader determinate))) (defword quantifier (determiner) ()) @@ -90,9 +81,7 @@ (defword verb-verb-contraction (verb) ()) -(defun word (&rest arguments &key type spelling &allow-other-keys) - (let ((arguments (copy-list arguments))) - (remf arguments :type) - (insert (apply #'make-instance (gethash type *word-types*) arguments) - spelling - *dictionary*))) +(defun word (spelling type &rest initargs &key &allow-other-keys) + (declare (ignore spelling)) + (let ((class (gethash type *word-types*))) + (apply #'make-instance class initargs))) diff --git a/english.txt b/data/english.txt similarity index 100% rename from english.txt rename to data/english.txt diff --git a/data/version-string.sexp b/data/version-string.sexp new file mode 100644 index 0000000..a582d77 --- /dev/null +++ b/data/version-string.sexp @@ -0,0 +1 @@ +"0.0.2" diff --git a/english.lisp b/english.lisp deleted file mode 100644 index 240b5f8..0000000 --- a/english.lisp +++ /dev/null @@ -1,43 +0,0 @@ -(in-package #:spell) - -(defparameter *english-dictionary* - #.(load-dictionary (asdf:system-relative-pathname :spell "english.txt"))) - -(defun english-lookup (word) - (when (and word (string/= word "")) - (let ((decapitalized (copy-seq word))) - (setf (aref word 0) (char-downcase (aref word 0))) - (or (lookup word *english-dictionary*) - (lookup decapitalized *english-dictionary*))))) - -(defun english-check-paragraph (string) - ;; TODO optimize, mayhaps. We do not need to create subsequences of the - ;; paragraph, we can traverse the dictionary using offsets of that paragraph. - (loop with position = 0 - with length = (length string) - for word-start = (find-start string position) - for word-end = (find-end string word-start) - until (= word-start word-end length) - do (setf position word-end) - unless (english-lookup (subseq string word-start word-end)) - collect (cons word-start word-end))) - -(declaim (inline english-text-char-p find-start find-end)) -(defun english-text-char-p (character) - (declare (character character)) - (or (char= character #\') - (alpha-char-p character))) - -(defun find-start (string position) - (loop with length = (length string) - for i from position - when (or (= i length) - (english-text-char-p (char string i))) - return i)) - -(defun find-end (string position) - (loop with length = (length string) - for i from position - when (or (= i length) - (not (english-text-char-p (char string i)))) - return i)) diff --git a/simple.lisp b/simple.lisp deleted file mode 100644 index 4d9bfe6..0000000 --- a/simple.lisp +++ /dev/null @@ -1,8 +0,0 @@ -(in-package #:spell) - -(defun word (&rest arguments &key spelling &allow-other-keys) - (declare (ignore arguments)) - (insert t spelling *dictionary*)) - -(defmethod %insert (object string (suffix (eql 0)) (node leaf-mixin)) - (setf (entries node) t)) diff --git a/spell.asd b/spell.asd index 83123cc..7f6d4d7 100644 --- a/spell.asd +++ b/spell.asd @@ -1,23 +1,29 @@ -;;;; spell.asd - -(asdf:defsystem #:spell +(defsystem "spell" :description "Spellchecking package for Common Lisp" - :author "Robert Strandh -Michał \"phoe\" Herda " + :author ("Robert Strandh " + "Michał \"phoe\" Herda " + "Jan Moringen ") :license "BSD" - :version "0.0.1" - :serial t - :components ((:file "spell") - (:file "word") - (:file "english"))) + :version (:read-file-form "data/version-string.sexp") + :components ((:module "code" + :serial t + :components ((:file "package") + (:file "protocol") + (:file "word") + (:file "spell") + (:file "english"))))) -(asdf:defsystem #:spell/simple +(defsystem "spell/simple" :description "Spellchecking package for Common Lisp - simple version" - :author "Robert Strandh -Michał \"phoe\" Herda " + :author ("Robert Strandh " + "Michał \"phoe\" Herda " + "Jan Moringen ") :license "BSD" - :version "0.0.1" - :serial t - :components ((:file "spell") - (:file "simple") - (:file "english"))) + :version (:read-file-form "data/version-string.sexp") + :components ((:module "code" + :serial t + :components ((:file "package") + (:file "protocol") + (:file "simple") + (:file "spell") + (:file "english"))))) diff --git a/spell.lisp b/spell.lisp deleted file mode 100644 index 274f601..0000000 --- a/spell.lisp +++ /dev/null @@ -1,122 +0,0 @@ -(defpackage #:spell - (:use #:cl) - (:export #:english-lookup - #:english-check-paragraph)) -(in-package #:spell) - -(defgeneric lookup (string dictionary)) -(defgeneric insert (object string dictionary)) - -(defclass node () ()) - -(defmethod make-load-form ((object node) &optional environment) - (make-load-form-saving-slots object :environment environment)) - -(defvar *dictionary*) - -(defclass dictionary () - ((%contents :initform (make-instance 'node) :accessor contents))) - -(defmethod make-load-form ((object dictionary) &optional environment) - (make-load-form-saving-slots object :environment environment)) - -(defun load-dictionary (filename) - (with-open-file (stream filename) - (let* ((counter 0) - (*dictionary* (make-instance 'dictionary))) - (do ((line (read-line stream nil stream) - (read-line stream nil stream))) - ((eq stream line)) - (unless (eq #\; (aref line 0)) - (let* ((string (concatenate 'string "(" line ")")) - (args (read-from-string string))) - (apply #'word :spelling args) - (incf counter)))) - (values *dictionary* counter)))) - -(defmethod lookup (string (dictionary dictionary)) - (assert (plusp (length string))) - (%lookup string (length string) (contents dictionary))) - -(defgeneric entries (node)) - -(defgeneric %lookup (string suffix node) - (:method (string suffix node) - (declare (ignore string suffix node)) - '())) - -(defclass leaf-mixin () - ((%entries :initform '() :initarg :entries :accessor entries))) - -(defmethod %lookup (string (suffix (eql 0)) (node leaf-mixin)) - (entries node)) - -(defclass interior-mixin () - ((%children :initform '() :initarg :children :accessor children))) - -(defclass interior-node (interior-mixin node) ()) - -(defmethod %lookup (string (suffix (eql 0)) (node interior-node)) - '()) - -(defmethod %lookup (string suffix (node interior-mixin)) - (let ((child (find-child (aref string (- (length string) suffix)) - (children node)))) - (if (null child) - nil - (%lookup string (1- suffix) child)))) - -(defclass leaf-node (leaf-mixin node) ()) -(defclass interior-leaf-node (interior-mixin leaf-mixin node) ()) - -(defgeneric %insert (object string suffix node)) - -(defmethod %insert (object string (suffix (eql 0)) (node leaf-mixin)) - (push object (entries node))) - -(defmethod %insert (object string (suffix (eql 0)) (node node)) - (change-class node 'leaf-node) - (%insert object string 0 node)) - -(defmethod %insert (object string (suffix (eql 0)) (node interior-node)) - (change-class node 'interior-leaf-node) - (%insert object string 0 node)) - -(defmethod %insert (object string suffix (node leaf-mixin)) - (change-class node 'interior-leaf-node) - (%insert object string suffix node)) - -(defmethod %insert (object string suffix (node node)) - (change-class node 'interior-node) - (%insert object string suffix node)) - -(defmethod %insert (object string suffix (node interior-mixin)) - (let ((child (find-child (aref string (- (length string) suffix)) - (children node)))) - (when (null child) - (setf child (make-instance 'node)) - (setf (children node) - (add-child child - (aref string (- (length string) suffix)) - (children node)))) - (%insert object string (1- suffix) child))) - -(defmethod insert (object string (dictionary dictionary)) - (%insert object string (length string) (contents dictionary))) - -(defgeneric find-child (char entries)) - -(defmethod find-child (char (entries list)) - (cdr (assoc char entries))) - -(defmethod find-child (char (entries vector)) - (aref entries (- (char-code char) #.(char-code #\a)))) - -(defgeneric add-child (node char entries)) - -(defmethod add-child (node char (entries list)) - (acons char node entries)) - -(defmethod add-child (node char (entries vector)) - (setf (aref entries (- (char-code char) #.(char-code #\a))) - node))