diff --git a/docusaurus-search-local/src/client/utils/smartQueries.ts b/docusaurus-search-local/src/client/utils/smartQueries.ts index 3a10d1cb..29fe8d7b 100644 --- a/docusaurus-search-local/src/client/utils/smartQueries.ts +++ b/docusaurus-search-local/src/client/utils/smartQueries.ts @@ -81,17 +81,6 @@ export function smartQueries( refinedTerms = terms.slice(); } - const MAX_TERMS = 10; - if (refinedTerms.length > MAX_TERMS) { - // Sort terms by length in ascending order., - // And keep the top 10 terms. - refinedTerms.sort((a, b) => a.length - b.length); - refinedTerms.splice(MAX_TERMS, refinedTerms.length - MAX_TERMS); - - terms.sort((a, b) => a.length - b.length); - terms.splice(MAX_TERMS, terms.length - MAX_TERMS); - } - // Also try to add extra terms which miss one of the searched tokens, // when the term contains 3 or more tokens, // to improve the search precision. diff --git a/docusaurus-search-local/src/client/utils/smartTerms.ts b/docusaurus-search-local/src/client/utils/smartTerms.ts index f340894b..93c23e46 100644 --- a/docusaurus-search-local/src/client/utils/smartTerms.ts +++ b/docusaurus-search-local/src/client/utils/smartTerms.ts @@ -1,6 +1,9 @@ import { SmartTerm } from "../../shared/interfaces"; import { cutZhWords } from "./cutZhWords"; +const MAX_TERMS = 12; +const HALF_MAX_TERMS = MAX_TERMS / 2; + /** * Get all possible terms for a list of tokens consists of words mixed in Chinese and non-Chinese, * by a Chinese words dictionary. @@ -14,19 +17,53 @@ export function smartTerms( tokens: string[], zhDictionary: string[] ): SmartTerm[] { - const tokenTerms = tokens.map((token) => { - if (/\p{Unified_Ideograph}/u.test(token)) { - return cutZhWords(token, zhDictionary); + const tokenTerms = tokens + .map((token) => { + if (/\p{Unified_Ideograph}/u.test(token)) { + return cutZhWords(token, zhDictionary); + } else { + return [{ value: token }]; + } + }) + .slice(0, MAX_TERMS); + + const tokenTermsThatAreMultiple = tokenTerms.filter( + (tokenTerm) => tokenTerm.length > 1 + ); + + let termsProduct = 1; + let overflowed = false; + + for (const tokenTerm of tokenTermsThatAreMultiple) { + if (overflowed) { + tokenTerm.splice(1, tokenTerm.length - 1); } else { - return [{ value: token }]; + if (tokenTerm.length > HALF_MAX_TERMS) { + tokenTerm.splice(HALF_MAX_TERMS, tokenTerm.length - HALF_MAX_TERMS); + } + const product = termsProduct * tokenTerm.length; + if (product >= MAX_TERMS) { + if (product > MAX_TERMS) { + const max = Math.floor(MAX_TERMS / termsProduct); + tokenTerm.splice(max, tokenTerm.length - max); + termsProduct = max * termsProduct; + } else { + termsProduct = product; + } + if (termsProduct > HALF_MAX_TERMS) { + overflowed = true; + } + } else { + termsProduct = product; + } } - }); + } // Get all possible combinations of terms. const terms: SmartTerm[] = []; function combine(index: number, carry: SmartTerm): void { - if (index === tokenTerms.length) { - terms.push(carry); + if (index === tokenTerms.length || carry.length >= MAX_TERMS) { + terms.push(carry.slice(0, MAX_TERMS)); return; } for (const term of tokenTerms[index]) {