-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from s-expressionists/baseline-improvements
Baseline Improvements
- Loading branch information
Showing
13 changed files
with
323 additions
and
216 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
(in-package #:spell) | ||
|
||
(eval-when (:compile-toplevel :load-toplevel :execute) | ||
(defun load-english-dictionary (&key (verbose *compile-verbose*)) | ||
(let ((filename (asdf:system-relative-pathname | ||
"spell" "data/english.txt")) | ||
dictionary entry-count) | ||
(flet ((note (format-control &rest format-arguments) | ||
(when verbose | ||
(let ((stream *standard-output*)) | ||
(fresh-line stream) | ||
(pprint-logical-block (stream nil :per-line-prefix "; ") | ||
(apply #'format stream format-control format-arguments)) | ||
(force-output stream))))) | ||
(note "Loading dictionary ~:_~S" filename) | ||
(setf (values dictionary entry-count) (load-dictionary filename)) | ||
(note "Will dump dictionary with ~:_~:D entr~:@P" entry-count)) | ||
dictionary))) | ||
|
||
(defparameter *english-dictionary* #.(load-english-dictionary)) | ||
|
||
(defun english-lookup (word) | ||
(when (and word (string/= word "")) | ||
(let ((dictionary *english-dictionary*)) | ||
(flet ((try (variant) | ||
(let ((result (lookup variant dictionary))) | ||
(when result | ||
(return-from english-lookup result))))) | ||
(try word) | ||
(let* ((initial (aref word 0)) | ||
(downcased (char-downcase initial))) | ||
(unless (char= initial downcased) | ||
;; We change, for example, "Anti-Semitic" at the beginning | ||
;; of a sentence to "anti-Semitic" which is in the | ||
;; dictionary. | ||
(let ((decapitalized (copy-seq word))) | ||
(setf (aref decapitalized 0) downcased) | ||
(try decapitalized)) | ||
;; We change, for example, "PARAMETER" (which is typical | ||
;; for some commenting styles) to "parameter" which is in | ||
;; the dictionary. | ||
(when (every #'upper-case-p word) | ||
(try (string-downcase word))))))))) | ||
|
||
(declaim (inline english-text-char-p find-start find-end)) | ||
(defun english-text-char-p (character) | ||
(declare (character character)) | ||
(or (char= character #\') | ||
(alpha-char-p character))) | ||
|
||
(defun find-start (string position) | ||
(loop with length = (length string) | ||
for i from position | ||
when (or (= i length) | ||
(english-text-char-p (char string i))) | ||
return i)) | ||
|
||
(defun find-end (string position) | ||
(loop with length = (length string) | ||
for i from position | ||
when (or (= i length) | ||
(not (english-text-char-p (char string i)))) | ||
return i)) | ||
|
||
(defun english-check-paragraph (string) | ||
;; TODO optimize, mayhaps. We do not need to create subsequences of the | ||
;; paragraph, we can traverse the dictionary using offsets of that paragraph. | ||
(loop with position = 0 | ||
with length = (length string) | ||
for word-start = (find-start string position) | ||
for word-end = (find-end string word-start) | ||
until (= word-start word-end length) | ||
do (setf position word-end) | ||
unless (english-lookup (subseq string word-start word-end)) | ||
collect (cons word-start word-end))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
(cl:defpackage #:spell | ||
(:use | ||
#:cl) | ||
|
||
(:shadow | ||
#:case | ||
#:number) | ||
|
||
;; Word protocol | ||
(:export | ||
#:base) | ||
|
||
;; Noun protocol | ||
(:export | ||
#:number | ||
#:case | ||
#:gender | ||
#:singular) | ||
|
||
;; Verb protocol | ||
(:export | ||
#:person | ||
#:tense | ||
#:mood | ||
#:negative | ||
#:contraction | ||
#:strength | ||
#:infinitive) | ||
|
||
;; Adjective protocol | ||
(:export | ||
#:degree) | ||
|
||
;; Pronoun protocol | ||
(:export | ||
#:person | ||
#:number | ||
#:gender | ||
#:case) | ||
|
||
;; Possessive pronoun protocol | ||
(:export | ||
#:refnumber) | ||
|
||
;; Determiner protocol | ||
(:export | ||
#:number) | ||
|
||
;; Article protocol | ||
(:export | ||
#:number | ||
#:determinate) | ||
|
||
;; Possessive adjective protocol | ||
(:export | ||
#:gender | ||
#:person | ||
#:refnumber) | ||
|
||
(:export | ||
#:english-lookup | ||
#:english-check-paragraph)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
(in-package #:spell) | ||
|
||
;;; Dictionary protocol | ||
|
||
(defgeneric lookup (string dictionary)) | ||
|
||
(defgeneric insert (object string dictionary)) | ||
|
||
;;; Trie node protocol | ||
|
||
(defgeneric entries (node)) | ||
|
||
(defgeneric find-child (char entries)) | ||
|
||
(defgeneric add-child (node char entries)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
(in-package #:spell) | ||
|
||
(defun word (spelling type &rest initargs &key &allow-other-keys) | ||
(declare (ignore spelling type initargs)) | ||
t) | ||
|
||
(defmethod %insert (object string (suffix (eql 0)) (node leaf-mixin)) | ||
(setf (entries node) t)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
(in-package #:spell) | ||
|
||
;;; Internal protocols | ||
|
||
(defgeneric %lookup (string suffix node) | ||
(:method ((string string) (suffix t) (node t)) | ||
'())) | ||
|
||
(defgeneric %insert (object string suffix node)) | ||
|
||
;;; Node classes | ||
|
||
(defclass node () ()) | ||
|
||
(defmethod make-load-form ((object node) &optional environment) | ||
(make-load-form-saving-slots object :environment environment)) | ||
|
||
(defmethod %insert ((object t) (string string) (suffix (eql 0)) (node node)) | ||
(change-class node 'leaf-node) | ||
(%insert object string 0 node)) | ||
|
||
(defmethod %insert ((object t) (string string) (suffix integer) (node node)) | ||
(change-class node 'interior-node) | ||
(%insert object string suffix node)) | ||
|
||
(defclass leaf-mixin () | ||
((%entries :initform '() :initarg :entries :accessor entries))) | ||
|
||
(defmethod %lookup ((string string) (suffix (eql 0)) (node leaf-mixin)) | ||
(entries node)) | ||
|
||
(defmethod %insert | ||
((object t) (string string) (suffix (eql 0)) (node leaf-mixin)) | ||
(push object (entries node))) | ||
|
||
(defmethod %insert | ||
((object t) (string string) (suffix integer) (node leaf-mixin)) | ||
(change-class node 'interior-leaf-node) | ||
(%insert object string suffix node)) | ||
|
||
(defclass interior-mixin () | ||
((%children :initform '() :initarg :children :accessor children))) | ||
|
||
(defmethod %lookup ((string string) (suffix integer) (node interior-mixin)) | ||
(let* ((character (aref string (- (length string) suffix))) | ||
(child (find-child character (children node)))) | ||
(if (null child) | ||
nil | ||
(%lookup string (1- suffix) child)))) | ||
|
||
(defmethod %insert | ||
((object t) (string string) (suffix integer) (node interior-mixin)) | ||
(let* ((character (aref string (- (length string) suffix))) | ||
(children (children node)) | ||
(child (find-child character children))) | ||
(when (null child) | ||
(setf child (make-instance 'node) | ||
(children node) (add-child child character children))) | ||
(%insert object string (1- suffix) child))) | ||
|
||
(defclass interior-node (interior-mixin node) ()) | ||
|
||
(defmethod %lookup ((string string) (suffix (eql 0)) (node interior-node)) | ||
'()) | ||
|
||
(defmethod %insert | ||
((object t) (string string) (suffix (eql 0)) (node interior-node)) | ||
(change-class node 'interior-leaf-node) | ||
(%insert object string 0 node)) | ||
|
||
(defclass leaf-node (leaf-mixin node) ()) | ||
|
||
(defclass interior-leaf-node (interior-mixin leaf-mixin node) ()) | ||
|
||
;;; Child node access methods | ||
|
||
(defmethod find-child ((char character) (entries list)) | ||
(cdr (assoc char entries))) | ||
|
||
(defmethod find-child ((char character) (entries vector)) | ||
(let ((index (- (char-code char) #.(char-code #\a)))) | ||
(aref entries index))) | ||
|
||
(defmethod add-child ((node t) (char character) (entries list)) | ||
(acons char node entries)) | ||
|
||
(defmethod add-child ((node t) (char character) (entries vector)) | ||
(let ((index (- (char-code char) #.(char-code #\a)))) | ||
(setf (aref entries index) node))) | ||
|
||
;;; Dictionary | ||
|
||
(defclass dictionary () | ||
((%contents :initform (make-instance 'node) :accessor contents))) | ||
|
||
(defmethod make-load-form ((object dictionary) &optional environment) | ||
(make-load-form-saving-slots object :environment environment)) | ||
|
||
(defmethod lookup ((string string) (dictionary dictionary)) | ||
(assert (plusp (length string))) | ||
(%lookup string (length string) (contents dictionary))) | ||
|
||
(defmethod insert ((object t) (string string) (dictionary dictionary)) | ||
(%insert object string (length string) (contents dictionary))) | ||
|
||
(defvar *dictionary*) | ||
|
||
(defun load-dictionary (filename) | ||
(with-open-file (stream filename) | ||
(let* ((counter 0) | ||
(*dictionary* (make-instance 'dictionary))) | ||
(do ((line (read-line stream nil stream) | ||
(read-line stream nil stream))) | ||
((eq stream line)) | ||
(unless (eq #\; (aref line 0)) | ||
(let ((string (concatenate 'string "(" line ")"))) | ||
(destructuring-bind | ||
(spelling &rest args &key type &allow-other-keys) | ||
(read-from-string string) | ||
(remf args :type) | ||
(let ((word (apply #'word spelling type args))) | ||
(insert word spelling *dictionary*))) | ||
(incf counter)))) | ||
(values *dictionary* counter)))) |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"0.0.2" |
Oops, something went wrong.