From 3fa369c1cd2b0d6d0a7964638fb35ace5010e8b8 Mon Sep 17 00:00:00 2001 From: komed3 Date: Wed, 27 Dec 2023 11:21:22 +0100 Subject: [PATCH] add threshold to specify the minimum required similarity --- README.md | 18 +++++++++++------- index.js | 33 ++++++++++++++++++++------------- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 7c2acc5..5615637 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This lightweight npm package can be used to __calculate the similarity of string ## Install -Using Node.js install the package using shell command: +Using __Node.js__, install the package with the following shell command: ```sh npm install cmpstr @@ -63,7 +63,7 @@ let matches = cmpstr.levenshteinMatch( 'best', [ Using JavaScript load this package by embed this file via [jsDelivr](https://www.jsdelivr.com/package/npm/cmpstr): ```js -import cmpstr from "https://cdn.jsdelivr.net/npm/cmpstr@1.0.2/+esm"; +import cmpstr from "https://cdn.jsdelivr.net/npm/cmpstr@1.0.3/+esm"; ``` Remember: To use ``import`` you need to load your JavaScript file as ``type="module"``. @@ -91,9 +91,9 @@ Returns the match percentage of two strings ``a`` and ``b``. The output value is Returns the best match of the string ``str`` against the array ``arr`` of passed strings. The function returns the most closely matched string found in the array. -#### ``levenshteinMatch( str, arr [, flags = null ] )`` +#### ``levenshteinMatch( str, arr [, flags = null [, threshold = 0 ] ] )`` -Calculates the similarity of all strings contained in the array ``arr`` according to Levenshtein compared to ``str`` and returns an array of all samples sorted by matching in descending order. +Calculates the similarity of all strings contained in the array ``arr`` according to Levenshtein compared to ``str`` and returns an array of all samples sorted by matching in descending order. The ``threshold`` specifies the minimum required similarity. ### Sørensen-Dice coefficient @@ -105,9 +105,9 @@ This function evaluates the similarity of two given strings ``a`` and ``b`` as p As another way to find the best match between the string ``str`` and a given array ``arr`` of samples, this function uses the Sørensen-Dice coefficient. It returns the most matching string as well. -#### ``diceMatch( str, arr [, flags = null ] )`` +#### ``diceMatch( str, arr [, flags = null [, threshold = 0 ] ] )`` -Calculates the similarity of all strings contained in the array ``arr`` according to Sørensen-Dice coefficient compared to ``str`` and returns an array of all samples sorted by matching in descending order. +Calculates the similarity of all strings contained in the array ``arr`` according to Sørensen-Dice coefficient compared to ``str`` and returns an array of all samples sorted by matching in descending order. The ``threshold`` specifies the minimum required similarity. ### Flags @@ -120,6 +120,10 @@ Each method can be passed the ``flags`` options listed below: ## Patch notes +### 1.0.3 + +* Add ``threshold`` to specify the minimum required similarity + ### 1.0.2 * Add normalize options ``i`` and ``s`` @@ -131,4 +135,4 @@ Each method can be passed the ``flags`` options listed below: ### 1.0.0 -* Initial release +* Initial release \ No newline at end of file diff --git a/index.js b/index.js index 5f912f0..b5a4169 100644 --- a/index.js +++ b/index.js @@ -3,11 +3,11 @@ * lightweight npm package to calculate string similarity * * @author komed3 (Paul Köhler) - * @version 1.0.2 + * @version 1.0.3 * @license MIT */ -'use strict' +'use strict'; /** * basic functions @@ -83,7 +83,7 @@ const str2bigrams = ( str ) => { * @param {Null|String} flags options * @returns similarity */ -const cpmByAlgo = ( algo, a, b, flags ) => { +const cpmByAlgo = ( algo, a, b, flags = null ) => { switch( algo ) { @@ -108,7 +108,7 @@ const cpmByAlgo = ( algo, a, b, flags ) => { * @param {Null|String} flags options * @returns closest target */ -const findClosest = ( algo, test, arr, flags ) => { +const findClosest = ( algo, test, arr, flags = null ) => { let best = -Infinity, idx = 0, @@ -143,9 +143,10 @@ const findClosest = ( algo, test, arr, flags ) => { * @param {String} test test string * @param {Array} arr targets to test * @param {Null|String} flags options + * @param {Float} threshold required similarity * @returns sorted matches */ -const bestMatch = ( algo, test, arr, flags = null ) => { +const bestMatch = ( algo, test, arr, flags = null, threshold = 0 ) => { let matches = [], pct; @@ -156,10 +157,14 @@ const bestMatch = ( algo, test, arr, flags = null ) => { pct = cpmByAlgo( algo, test, str, flags ); - matches.push( { - target: str, - match: pct - } ); + if( pct >= threshold ) { + + matches.push( { + target: str, + match: pct + } ); + + } } ); @@ -329,11 +334,12 @@ const levenshteinClosest = ( test, arr, flags = null ) => { * @param {String} test test string * @param {Array} arr targets to test * @param {Null|String} flags options + * @param {Float} threshold required similarity * @returns sorted matches */ -const levenshteinMatch = ( test, arr, flags = null ) => { +const levenshteinMatch = ( test, arr, flags = null, threshold = 0 ) => { - return bestMatch( 'levenshtein', test, arr, flags ); + return bestMatch( 'levenshtein', test, arr, flags, threshold ); }; @@ -403,11 +409,12 @@ const diceClosest = ( test, arr, flags = null ) => { * @param {String} test test string * @param {Array} arr targets to test * @param {Null|String} flags options + * @param {Float} threshold required similarity * @returns sorted matches */ -const diceMatch = ( test, arr, flags = null ) => { +const diceMatch = ( test, arr, flags = null, threshold = 0 ) => { - return bestMatch( 'diceCoefficient', test, arr, flags ); + return bestMatch( 'diceCoefficient', test, arr, flags, threshold ); };