Skip to content

Commit

Permalink
add flags for string normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
komed3 committed Oct 23, 2023
1 parent 00e1cc2 commit 5b70b9c
Showing 1 changed file with 68 additions and 29 deletions.
97 changes: 68 additions & 29 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* lightweight npm package to calculate string similarity
*
* @author komed3 (Paul Köhler)
* @version 1.0.1
* @version 1.0.2
* @license MIT
*/

Expand All @@ -17,11 +17,40 @@
/**
* normalize string
* @param {String} str string
* @param {Null|String} flags options
* @returns normalized string
*/
const normalize = ( str ) => {
const normalize = ( str, flags = null ) => {

return str.toString();
str = str.toString();

( flags || '' ).toString().split( '' ).forEach( ( f ) => {

/**
* normalize options
* i case insensitive
* s non-whitespace
*/

switch( f.toLowerCase() ) {

case 'i':
str = str.toLowerCase();
break;

case 's':
str = str.replace( /[^\S]+/g, '' );
break;

default:
/* do nothing */
break;

}

} );

return str;

};

Expand Down Expand Up @@ -51,17 +80,18 @@ const str2bigrams = ( str ) => {
* @param {String} algo algorithm to use
* @param {String} a string 1
* @param {String} b string 2
* @param {Null|String} flags options
* @returns similarity
*/
const cpmByAlgo = ( algo, a, b ) => {
const cpmByAlgo = ( algo, a, b, flags ) => {

switch( algo ) {

case 'levenshtein':
return levenshtein( a, b );
return levenshtein( a, b, flags );

case 'diceCoefficient':
return diceCoefficient( a, b );
return diceCoefficient( a, b, flags );

default:
return 0;
Expand All @@ -75,19 +105,20 @@ const cpmByAlgo = ( algo, a, b ) => {
* @param {String} algo algorithm to use
* @param {String} test test string
* @param {Array} arr targets to test
* @param {Null|String} flags options
* @returns closest target
*/
const findClosest = ( algo, test, arr ) => {
const findClosest = ( algo, test, arr, flags ) => {

let best = -Infinity,
idx = 0,
pct;

/* search for closest element in arr */

arr.forEach( ( str, i ) => {
[ ...arr ].forEach( ( str, i ) => {

pct = cpmByAlgo( algo, test, str );
pct = cpmByAlgo( algo, test, str, flags );

if( pct > best ) {

Expand All @@ -111,18 +142,19 @@ const findClosest = ( algo, test, arr ) => {
* @param {String} algo algorithm to use
* @param {String} test test string
* @param {Array} arr targets to test
* @param {Null|String} flags options
* @returns sorted matches
*/
const bestMatch = ( algo, test, arr ) => {
const bestMatch = ( algo, test, arr, flags = null ) => {

let matches = [],
pct;

/* calculate similarity for each arr items */

arr.forEach( ( str ) => {
[ ...arr ].forEach( ( str ) => {

pct = cpmByAlgo( algo, test, str );
pct = cpmByAlgo( algo, test, str, flags );

matches.push( {
target: str,
Expand Down Expand Up @@ -152,14 +184,15 @@ const bestMatch = ( algo, test, arr ) => {
* calculate levenshtein similarity (in percent)
* @param {String} a string 1
* @param {String} b string 2
* @param {Null|String} flags options
* @returns similarity 0..1
*/
const levenshtein = ( a, b ) => {
const levenshtein = ( a, b, flags = null ) => {

/* normalize string */

a = normalize( a );
b = normalize( b );
a = normalize( a, flags );
b = normalize( b, flags );

if( a == b ) {

Expand Down Expand Up @@ -196,14 +229,15 @@ const levenshtein = ( a, b ) => {
* get levenshtein distance
* @param {String} a string 1
* @param {String} b string 2
* @param {Null|String} flags options
* @returns distance
*/
const levenshteinDistance = ( a, b ) => {
const levenshteinDistance = ( a, b, flags = null ) => {

/* normalize string */

a = normalize( a );
b = normalize( b );
a = normalize( a, flags );
b = normalize( b, flags );

if( a == b ) {

Expand Down Expand Up @@ -281,38 +315,41 @@ const levenshteinDistance = ( a, b ) => {
* search for closest target to test string
* @param {String} test test string
* @param {Array} arr targets to test
* @param {Null|String} flags options
* @returns closest target
*/
const levenshteinClosest = ( test, arr ) => {
const levenshteinClosest = ( test, arr, flags = null ) => {

return findClosest( 'levenshtein', test, arr );
return findClosest( 'levenshtein', test, arr, flags );

};

/**
* sort best matches to test string
* @param {String} test test string
* @param {Array} arr targets to test
* @param {Null|String} flags options
* @returns sorted matches
*/
const levenshteinMatch = ( test, arr ) => {
const levenshteinMatch = ( test, arr, flags = null ) => {

return bestMatch( 'levenshtein', test, arr );
return bestMatch( 'levenshtein', test, arr, flags );

};

/**
* calculate dice coefficient
* @param {String} a string 1
* @param {String} b string 2
* @param {Null|String} flags options
* @returns dice coefficient
*/
const diceCoefficient = ( a, b ) => {
const diceCoefficient = ( a, b, flags = null ) => {

/* normalize string */

a = normalize( a );
b = normalize( b );
a = normalize( a, flags );
b = normalize( b, flags );

if( a == b ) {

Expand Down Expand Up @@ -352,23 +389,25 @@ const diceCoefficient = ( a, b ) => {
* search for closest target to test string
* @param {String} test test string
* @param {Array} arr targets to test
* @param {Null|String} flags options
* @returns closest target
*/
const diceClosest = ( test, arr ) => {
const diceClosest = ( test, arr, flags = null ) => {

return findClosest( 'diceCoefficient', test, arr );
return findClosest( 'diceCoefficient', test, arr, flags );

};

/**
* sort best matches to test string
* @param {String} test test string
* @param {Array} arr targets to test
* @param {Null|String} flags options
* @returns sorted matches
*/
const diceMatch = ( test, arr ) => {
const diceMatch = ( test, arr, flags = null ) => {

return bestMatch( 'diceCoefficient', test, arr );
return bestMatch( 'diceCoefficient', test, arr, flags );

};

Expand Down

0 comments on commit 5b70b9c

Please sign in to comment.