-
Notifications
You must be signed in to change notification settings - Fork 310
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
#116 Allow token processing "middleware" #144
base: master
Are you sure you want to change the base?
Changes from 15 commits
588de8d
90417ad
131e4b8
9d5218f
6a0521f
08409bf
7e7dc9e
58b1a70
52e62cf
d08dfc2
aaacd29
ecc5de2
6a298f7
1f03b7d
ef9ecda
d22e26f
9d25b4b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
var read = require('fs').readFileSync; | ||
var dictiaonary = null; | ||
|
||
module.exports = function() { | ||
if (dictiaonary === null) { | ||
var base = require.resolve('dictionary-en-us'); | ||
dictiaonary = { | ||
'aff': read(base.replace('.js', '.aff'), 'utf-8'), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you explain why do we need to perform such gymnastics when loading the dictionary, instead of simply requiring it and passing it over to Also, there is a typo in the spelling of the "dictionary" variable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed the typo. Regarding "gymnastics": the dictionary can be loaded on an async fashion. But the whole sentiment library works synchronously. So I decided to not pollute the whole library with callbacks or promises, and given that I couldn't use async/await to do it (eslint complained about it) I decided to load it this way. |
||
'dic': read(base.replace('.js', '.dic'), 'utf-8') | ||
}; | ||
} | ||
return dictiaonary; | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
module.exports = { | ||
labels: require('./labels.json'), | ||
scoringStrategy: require('./scoring-strategy') | ||
scoringStrategy: require('./scoring-strategy'), | ||
getDictionary: require('./dictionary') | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/** | ||
* Spellchecking library | ||
*/ | ||
var spelling = require('../../lib/spelling'); | ||
|
||
/** | ||
* These words "flip" the sentiment of the following word. | ||
*/ | ||
var negators = require('./negators.json'); | ||
|
||
/** | ||
* Language labels and scores | ||
*/ | ||
var labels = require('./labels.json'); | ||
|
||
/** | ||
* Evaluates wether the current token is negated by a previous token | ||
* | ||
* @param {array} tokens list of tokens being evaluated | ||
* @param {int} pos position of the current word in the tokens list | ||
* | ||
* @return {boolean} true if the current pos is being negaed, false otherwise | ||
*/ | ||
module.exports = function negated(tokens, pos, spellCheck) { | ||
while (pos--) { | ||
if (negators[tokens[pos]]) { | ||
return true; | ||
} | ||
var word = spellCheck ? | ||
spelling.getSpellCheckedWord(tokens[pos]) : | ||
tokens[pos]; | ||
if (negators[word]) { | ||
return true; | ||
} else if (labels.hasOwnProperty(word)) { | ||
return false; | ||
} | ||
} | ||
return false; | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
var lev = require('levenshtein'); | ||
|
||
/** | ||
* Finds the closest match between a statement and a body of words using | ||
* Levenshtein Distance | ||
* | ||
* @param {string} string Input string | ||
* @param {string/array} words List of strings to find closest | ||
* @return {string} The closest word in the list | ||
*/ | ||
module.exports = function(string, words) { | ||
|
||
var shortest = words.toString().length; | ||
var bestFit = ''; | ||
|
||
words.forEach(function(word) { | ||
|
||
var distance = lev(string, word); | ||
|
||
if (distance < shortest) { | ||
bestFit = word; | ||
shortest = distance; | ||
} | ||
|
||
}); | ||
|
||
return bestFit; | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
var nspell = require('nspell'); | ||
var distance = require('./distance'); | ||
var spellChecker = null; | ||
|
||
/** | ||
* These two functions atempt to spell check and correct a given word, using | ||
* Levenshtein Distance to choose the most appropriate correction. | ||
* getSpellCheckedAfinnWord also looks for the word to be present on Afinn | ||
*/ | ||
module.exports = { | ||
setUp: function(dictionaray) { | ||
spellChecker = nspell(dictionaray); | ||
}, | ||
getSpellCheckedAfinnWord: function (afinn, word) { | ||
if (!afinn.hasOwnProperty(word) && !spellChecker.correct(word)) { | ||
var checked = spellChecker.suggest(word); | ||
if (checked.length === 0) { | ||
return word; | ||
} else { | ||
var closest = distance(word, checked); | ||
if (closest && afinn.hasOwnProperty(closest)) { | ||
return closest; | ||
} | ||
} | ||
} | ||
return word; | ||
}, | ||
|
||
getSpellCheckedWord: function (word) { | ||
if (!spellChecker.correct(word)) { | ||
var checked = spellChecker.suggest(word); | ||
if (checked.length === 0) { | ||
return word; | ||
} else { | ||
var closest = distance(word, checked); | ||
if (closest) { | ||
return closest; | ||
} | ||
} | ||
} | ||
return word; | ||
} | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
var test = require('tap').test; | ||
var Sentiment = require('../../lib/index'); | ||
var sentiment = new Sentiment(); | ||
|
||
var input = 'this is not very bad'; | ||
var result = sentiment.analyze(input); | ||
|
||
test('synchronous negation', function (t) { | ||
t.type(result, 'object'); | ||
t.equal(result.score, 3); | ||
t.equal(result.comparative, 0.6); | ||
t.equal(result.tokens.length, 5); | ||
t.equal(result.words.length, 1); | ||
t.end(); | ||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
var test = require('tap').test; | ||
var Sentiment = require('../../lib/index'); | ||
var sentiment = new Sentiment(); | ||
|
||
var input = 'I hatee you'; | ||
var result = sentiment.analyze(input, { spellCheck: true }); | ||
|
||
test('synchronous spell checking active', function (t) { | ||
t.type(result, 'object'); | ||
t.equal(result.score, -3); | ||
t.equal(result.comparative, -1); | ||
t.equal(result.tokens.length, 3); | ||
t.equal(result.words.length, 1); | ||
t.end(); | ||
}); | ||
|
||
result = sentiment.analyze(input); | ||
|
||
test('synchronous spell checking inactive', function (t) { | ||
t.type(result, 'object'); | ||
t.equal(result.score, 0); | ||
t.equal(result.comparative, 0); | ||
t.equal(result.tokens.length, 3); | ||
t.equal(result.words.length, 0); | ||
t.end(); | ||
}); | ||
|
||
result = sentiment.analyze('I dontt hate you', { spellCheck: true }); | ||
|
||
test('synchronous spell checking active with negation', function (t) { | ||
t.type(result, 'object'); | ||
t.equal(result.score, 3); | ||
t.equal(result.comparative, 0.75); | ||
t.equal(result.tokens.length, 4); | ||
t.equal(result.words.length, 1); | ||
t.end(); | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do these
aff
anddic
properties correspond to? Could you give an example?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Explained more in the README file
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't seem to be the case, are you sure that you have pushed your changes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
new commit, it references the explanation I gave in the "Spell checked example" section