Skip to content

Commit

Permalink
Merge pull request #4 from s-expressionists/baseline-improvements
Browse files Browse the repository at this point in the history
Baseline Improvements
  • Loading branch information
scymtym authored Dec 20, 2024
2 parents d2ae948 + a82f896 commit a42be41
Show file tree
Hide file tree
Showing 13 changed files with 323 additions and 216 deletions.
75 changes: 75 additions & 0 deletions code/english.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
(in-package #:spell)

(eval-when (:compile-toplevel :load-toplevel :execute)
(defun load-english-dictionary (&key (verbose *compile-verbose*))
(let ((filename (asdf:system-relative-pathname
"spell" "data/english.txt"))
dictionary entry-count)
(flet ((note (format-control &rest format-arguments)
(when verbose
(let ((stream *standard-output*))
(fresh-line stream)
(pprint-logical-block (stream nil :per-line-prefix "; ")
(apply #'format stream format-control format-arguments))
(force-output stream)))))
(note "Loading dictionary ~:_~S" filename)
(setf (values dictionary entry-count) (load-dictionary filename))
(note "Will dump dictionary with ~:_~:D entr~:@P" entry-count))
dictionary)))

(defparameter *english-dictionary* #.(load-english-dictionary))

(defun english-lookup (word)
(when (and word (string/= word ""))
(let ((dictionary *english-dictionary*))
(flet ((try (variant)
(let ((result (lookup variant dictionary)))
(when result
(return-from english-lookup result)))))
(try word)
(let* ((initial (aref word 0))
(downcased (char-downcase initial)))
(unless (char= initial downcased)
;; We change, for example, "Anti-Semitic" at the beginning
;; of a sentence to "anti-Semitic" which is in the
;; dictionary.
(let ((decapitalized (copy-seq word)))
(setf (aref decapitalized 0) downcased)
(try decapitalized))
;; We change, for example, "PARAMETER" (which is typical
;; for some commenting styles) to "parameter" which is in
;; the dictionary.
(when (every #'upper-case-p word)
(try (string-downcase word)))))))))

(declaim (inline english-text-char-p find-start find-end))
(defun english-text-char-p (character)
(declare (character character))
(or (char= character #\')
(alpha-char-p character)))

(defun find-start (string position)
(loop with length = (length string)
for i from position
when (or (= i length)
(english-text-char-p (char string i)))
return i))

(defun find-end (string position)
(loop with length = (length string)
for i from position
when (or (= i length)
(not (english-text-char-p (char string i))))
return i))

(defun english-check-paragraph (string)
;; TODO optimize, mayhaps. We do not need to create subsequences of the
;; paragraph, we can traverse the dictionary using offsets of that paragraph.
(loop with position = 0
with length = (length string)
for word-start = (find-start string position)
for word-end = (find-end string word-start)
until (= word-start word-end length)
do (setf position word-end)
unless (english-lookup (subseq string word-start word-end))
collect (cons word-start word-end)))
62 changes: 62 additions & 0 deletions code/package.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
(cl:defpackage #:spell
(:use
#:cl)

(:shadow
#:case
#:number)

;; Word protocol
(:export
#:base)

;; Noun protocol
(:export
#:number
#:case
#:gender
#:singular)

;; Verb protocol
(:export
#:person
#:tense
#:mood
#:negative
#:contraction
#:strength
#:infinitive)

;; Adjective protocol
(:export
#:degree)

;; Pronoun protocol
(:export
#:person
#:number
#:gender
#:case)

;; Possessive pronoun protocol
(:export
#:refnumber)

;; Determiner protocol
(:export
#:number)

;; Article protocol
(:export
#:number
#:determinate)

;; Possessive adjective protocol
(:export
#:gender
#:person
#:refnumber)

(:export
#:english-lookup
#:english-check-paragraph))
15 changes: 15 additions & 0 deletions code/protocol.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
(in-package #:spell)

;;; Dictionary protocol

(defgeneric lookup (string dictionary))

(defgeneric insert (object string dictionary))

;;; Trie node protocol

(defgeneric entries (node))

(defgeneric find-child (char entries))

(defgeneric add-child (node char entries))
8 changes: 8 additions & 0 deletions code/simple.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
(in-package #:spell)

(defun word (spelling type &rest initargs &key &allow-other-keys)
(declare (ignore spelling type initargs))
t)

(defmethod %insert (object string (suffix (eql 0)) (node leaf-mixin))
(setf (entries node) t))
124 changes: 124 additions & 0 deletions code/spell.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
(in-package #:spell)

;;; Internal protocols

(defgeneric %lookup (string suffix node)
(:method ((string string) (suffix t) (node t))
'()))

(defgeneric %insert (object string suffix node))

;;; Node classes

(defclass node () ())

(defmethod make-load-form ((object node) &optional environment)
(make-load-form-saving-slots object :environment environment))

(defmethod %insert ((object t) (string string) (suffix (eql 0)) (node node))
(change-class node 'leaf-node)
(%insert object string 0 node))

(defmethod %insert ((object t) (string string) (suffix integer) (node node))
(change-class node 'interior-node)
(%insert object string suffix node))

(defclass leaf-mixin ()
((%entries :initform '() :initarg :entries :accessor entries)))

(defmethod %lookup ((string string) (suffix (eql 0)) (node leaf-mixin))
(entries node))

(defmethod %insert
((object t) (string string) (suffix (eql 0)) (node leaf-mixin))
(push object (entries node)))

(defmethod %insert
((object t) (string string) (suffix integer) (node leaf-mixin))
(change-class node 'interior-leaf-node)
(%insert object string suffix node))

(defclass interior-mixin ()
((%children :initform '() :initarg :children :accessor children)))

(defmethod %lookup ((string string) (suffix integer) (node interior-mixin))
(let* ((character (aref string (- (length string) suffix)))
(child (find-child character (children node))))
(if (null child)
nil
(%lookup string (1- suffix) child))))

(defmethod %insert
((object t) (string string) (suffix integer) (node interior-mixin))
(let* ((character (aref string (- (length string) suffix)))
(children (children node))
(child (find-child character children)))
(when (null child)
(setf child (make-instance 'node)
(children node) (add-child child character children)))
(%insert object string (1- suffix) child)))

(defclass interior-node (interior-mixin node) ())

(defmethod %lookup ((string string) (suffix (eql 0)) (node interior-node))
'())

(defmethod %insert
((object t) (string string) (suffix (eql 0)) (node interior-node))
(change-class node 'interior-leaf-node)
(%insert object string 0 node))

(defclass leaf-node (leaf-mixin node) ())

(defclass interior-leaf-node (interior-mixin leaf-mixin node) ())

;;; Child node access methods

(defmethod find-child ((char character) (entries list))
(cdr (assoc char entries)))

(defmethod find-child ((char character) (entries vector))
(let ((index (- (char-code char) #.(char-code #\a))))
(aref entries index)))

(defmethod add-child ((node t) (char character) (entries list))
(acons char node entries))

(defmethod add-child ((node t) (char character) (entries vector))
(let ((index (- (char-code char) #.(char-code #\a))))
(setf (aref entries index) node)))

;;; Dictionary

(defclass dictionary ()
((%contents :initform (make-instance 'node) :accessor contents)))

(defmethod make-load-form ((object dictionary) &optional environment)
(make-load-form-saving-slots object :environment environment))

(defmethod lookup ((string string) (dictionary dictionary))
(assert (plusp (length string)))
(%lookup string (length string) (contents dictionary)))

(defmethod insert ((object t) (string string) (dictionary dictionary))
(%insert object string (length string) (contents dictionary)))

(defvar *dictionary*)

(defun load-dictionary (filename)
(with-open-file (stream filename)
(let* ((counter 0)
(*dictionary* (make-instance 'dictionary)))
(do ((line (read-line stream nil stream)
(read-line stream nil stream)))
((eq stream line))
(unless (eq #\; (aref line 0))
(let ((string (concatenate 'string "(" line ")")))
(destructuring-bind
(spelling &rest args &key type &allow-other-keys)
(read-from-string string)
(remf args :type)
(let ((word (apply #'word spelling type args)))
(insert word spelling *dictionary*)))
(incf counter))))
(values *dictionary* counter))))
File renamed without changes.
39 changes: 14 additions & 25 deletions word.lisp → code/word.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
(defclass word ()
((%base :initarg :base :reader base)))

(defmethod initialize-instance :after ((object word) &key spelling)
(declare (ignore spelling)))

(defmethod make-load-form ((object word) &optional environment)
(make-load-form-saving-slots object :environment environment))

Expand All @@ -17,24 +14,17 @@
(defclass ,class-name ,@body))))

(defword noun (word)
((%number :initarg :number :reader %number)
(%case :initarg :case :initform :nominative :reader %case)
((%number :initarg :number :reader number)
(%case :initarg :case :initform :nominative :reader case)
(%gender :initarg :gender :initform :any :reader gender)
(%singular :initarg :singular :reader singular)))

(defword proper-noun (noun) ())

(defword negative-mixin ()
((%negative :initform nil :initarg :negative :reader negative)))

(defword contraction-mixin ()
((%contraction :initform nil :initarg :contraction :reader contraction)))

(defword verb (word negative-mixin contraction-mixin)
(defword verb (word)
((%person :initform :any :initarg :person :reader person)
(%number :initform :any :initarg :number :reader %number)
(%number :initform :any :initarg :number :reader number)
(%tense :initarg :tense :reader tense)
(%mood :initarg :mood :reader mood)
(%negative :initarg :negative :initform nil :reader negative)
(%contraction :initarg :contraction :initform nil :reader contraction)
(%strength :initarg :strength :initform :weak :reader strength)
Expand All @@ -47,11 +37,12 @@

(defword adverb (word) ())

(defword pronoun (word negative-mixin)
(defword pronoun (word)
((%person :initarg :person :reader person)
(%number :initarg :number :reader %number)
(%number :initarg :number :reader number)
(%gender :initarg :gender :reader gender)
(%case :initarg :case :initform :nominative :reader %case)))
(%case :initarg :case :initform :nominative :reader case)
(%negative :initarg :negative :initform nil :reader negative)))

(defword personal-pronoun (pronoun) ())

Expand All @@ -69,10 +60,10 @@
(defword subordinate (conjunction) ())

(defword determiner (word)
((%number :initform :any :initarg :number :reader %number)))
((%number :initform :any :initarg :number :reader number)))

(defword article (determiner)
((%number :initarg :number :reader %number)
((%number :initarg :number :reader number)
(%determinate :initform nil :initarg :determinate :reader determinate)))

(defword quantifier (determiner) ())
Expand All @@ -90,9 +81,7 @@

(defword verb-verb-contraction (verb) ())

(defun word (&rest arguments &key type spelling &allow-other-keys)
(let ((arguments (copy-list arguments)))
(remf arguments :type)
(insert (apply #'make-instance (gethash type *word-types*) arguments)
spelling
*dictionary*)))
(defun word (spelling type &rest initargs &key &allow-other-keys)
(declare (ignore spelling))
(let ((class (gethash type *word-types*)))
(apply #'make-instance class initargs)))
File renamed without changes.
1 change: 1 addition & 0 deletions data/version-string.sexp
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"0.0.2"
Loading

0 comments on commit a42be41

Please sign in to comment.