thisandagain · nsantini · Sep 12, 2017 · Sep 12, 2017 · Sep 12, 2017 · Sep 12, 2017
diff --git a/lib/distance.js b/lib/distance.js
@@ -0,0 +1,33 @@
+var lev = require('levenshtein');
+var tokenize = require('./tokenize');
+
+/**
+ * Finds the closest match between a statement and a body of words using
+ * Levenshtein Distance
+ * 
+ * @param  {string} string Input string
+ * @param  {string/array} words List of strings to find closest
+ * @return {string} The closest word in the list
+ */
+module.exports = function(string, words) {
+
+    var shortest = words.toString().length;
+    var bestFit  = '';
+
+    if (typeof words === 'string') {
+        words = tokenize(words);
+    }
+
+    words.forEach(function(word) {
+
+        var distance = lev(string, word);
+
+        if (distance < shortest) {
+            bestFit  = word;
+            shortest = distance;
+        }
+
+    });
+
+    return bestFit;
+};
diff --git a/lib/index.js b/lib/index.js
@@ -1,4 +1,6 @@
 var tokenize = require('./tokenize');
+var spelling = require('./spelling');
+var negated = require('./negation');
 var languageProcessor = require('./language-processor');
 
 /**
@@ -63,12 +65,17 @@ Sentiment.prototype.analyze = function (phrase, opts, callback) {
     // Iterate over tokens
     var i = tokens.length;
     while (i--) {
-        var obj = tokens[i];
+        var obj = spelling.getSpellCheckedAfinnWord(labels, tokens[i]);
         if (!labels.hasOwnProperty(obj)) continue;
+
+        // Check for negation
+        var tokenScore = labels[obj];
+        if (i > 0 && negated(labels, tokens, i)) {
+            tokenScore = -tokenScore;
+        }
         words.push(obj);
 
         // Apply scoring strategy
-        var tokenScore = labels[obj];
         // eslint-disable-next-line max-len
         tokenScore = languageProcessor.applyScoringStrategy(languageCode, tokens, i, tokenScore);
         if (tokenScore > 0) positive.push(obj);

diff --git a/lib/negation.js b/lib/negation.js
@@ -0,0 +1,43 @@
+var spelling = require('./spelling');
+
+/**
+ * These words "flip" the sentiment of the following word.
+ */
+var negators = {
+    'cant': 1,
+    'can\'t': 1,
+    'dont': 1,
+    'don\'t': 1,
+    'doesnt': 1,
+    'doesn\'t': 1,
+    'not': 1,
+    'non': 1,
+    'wont': 1,
+    'won\'t': 1,
+    'isnt': 1,
+    'isn\'t': 1
+};
+
+/**
+ * Evaluates wether the current token is negated by a previous token
+ * 
+ * @param {array} afinn words list
+ * @param {array} tokens list of tokens being evaluated
+ * @param {int} pos position of the current word in the tokens list
+ * 
+ * @return {boolean} true if the current pos is being negaed, false otherwise
+ */
+module.exports = function negated(afinn, tokens, pos) {
+    while (pos--) {
+        if (negators[tokens[pos]]) {
+            return true;
+        }
+        var word = spelling.getSpellCheckedWord(tokens[pos]);
+        if (negators[word]) {
+            return true;
+        } else if (afinn.hasOwnProperty(word)) {
+            return false;
+        }
+    }
+    return false;
+};
diff --git a/lib/spelling.js b/lib/spelling.js
@@ -0,0 +1,39 @@
+var spellChecker = require ('spellchecker');
+var distance = require('./distance');
+
+/**
+ * These two functions atempt to spell check and correct a given word, using
+ * Levenshtein Distance to choose the most appropriate correction.
+ * getSpellCheckedAfinnWord also looks for the word to be present on Afinn
+ */
+module.exports = {
+    getSpellCheckedAfinnWord: function (afinn, word) {
+        if (!afinn.hasOwnProperty(word) && spellChecker.isMisspelled(word)) {
+            var checked = spellChecker.getCorrectionsForMisspelling(word);
+            if (checked.length === 0) {
+                return word;
+            } else {
+                var closest = distance(word, checked);
+                if (closest && afinn.hasOwnProperty(closest)) {
+                    return closest;
+                }
+            }
+        }
+        return word;
+    },
+
+    getSpellCheckedWord: function (word) {
+        if (spellChecker.isMisspelled(word)) {
+            var checked = spellChecker.getCorrectionsForMisspelling(word);
+            if (checked.length === 0) {
+                return word;
+            } else {
+                var closest = distance(word, checked);
+                if (closest) {
+                    return closest;
+                }
+            }
+        }
+        return word;
+    }
+};
diff --git a/package.json b/package.json
@@ -36,5 +36,9 @@
   },
   "engines": {
     "node": ">=8.0"
+  },
+  "dependencies": {
+    "levenshtein": "^1.0.5",
+    "spellchecker": "^3.4.3"
   }
 }
diff --git a/test/integration/sync_corpus.js b/test/integration/sync_corpus.js
@@ -8,7 +8,7 @@ var result = sentiment.analyze(dataset);
 
 test('synchronous corpus', function (t) {
     t.type(result, 'object');
-    t.equal(result.score, -3);
+    t.equal(result.score, -19);
     t.equal(result.tokens.length, 1416);
     t.equal(result.words.length, 73);
     t.end();