diff --git a/README.md b/README.md index df61f230..96379ff6 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![CI Status](https://github.com/easyops-cn/docusaurus-search-local/workflows/CI/badge.svg?event=push)](https://github.com/easyops-cn/docusaurus-search-local/actions?query=workflow%3ACI) [![Coverage Status](https://coveralls.io/repos/github/easyops-cn/docusaurus-search-local/badge.svg?branch=master)](https://coveralls.io/github/easyops-cn/docusaurus-search-local?branch=master) -An offline/local search plugin/theme for [Docusaurus v2](https://v2.docusaurus.io/), which supports multiple languages, especially optimized for language of zh. +An offline/local search plugin/theme for [Docusaurus v2](https://docusaurus.io/), which supports multiple languages, especially optimized for language of zh. > Originally forked from [cmfcmf/docusaurus-search-local](https://github.com/cmfcmf/docusaurus-search-local). > diff --git a/docusaurus-search-local/src/client/utils/highlightStemmed.spec.ts b/docusaurus-search-local/src/client/utils/highlightStemmed.spec.ts index 2f6e4be8..43997e53 100644 --- a/docusaurus-search-local/src/client/utils/highlightStemmed.spec.ts +++ b/docusaurus-search-local/src/client/utils/highlightStemmed.spec.ts @@ -1,8 +1,4 @@ -import { - MetadataPosition, - ChunkIndexRef, - HighlightChunk, -} from "../../shared/interfaces"; +import { MetadataPosition, HighlightChunk } from "../../shared/interfaces"; import { highlightStemmed, splitIntoChunks } from "./highlightStemmed"; jest.mock("./proxiedGenerated"); @@ -160,11 +156,10 @@ describe("splitIntoChunks", () => { ])( "splitIntoChunks('%s', %j, %j, 0, 0) should return %j", (text, positions, tokens, chunks, chunkIndex) => { - const chunkIndexRef = {} as ChunkIndexRef; - expect( - splitIntoChunks(text, positions, tokens, 0, 0, chunkIndexRef) - ).toEqual(chunks); - expect(chunkIndexRef.chunkIndex).toBe(chunkIndex); + expect(splitIntoChunks(text, positions, tokens)).toEqual({ + chunkIndex, + chunks, + }); } ); }); diff --git a/docusaurus-search-local/src/client/utils/highlightStemmed.ts b/docusaurus-search-local/src/client/utils/highlightStemmed.ts index 7312622e..b0158058 100644 --- a/docusaurus-search-local/src/client/utils/highlightStemmed.ts +++ b/docusaurus-search-local/src/client/utils/highlightStemmed.ts @@ -1,8 +1,4 @@ -import { - HighlightChunk, - MetadataPosition, - ChunkIndexRef, -} from "../../shared/interfaces"; +import { HighlightChunk, MetadataPosition } from "../../shared/interfaces"; import { escapeHtml } from "./escapeHtml"; import { highlight } from "./highlight"; import { looseTokenize } from "./looseTokenize"; @@ -14,22 +10,12 @@ export function highlightStemmed( tokens: string[], maxLength = searchResultContextMaxLength ): string { - const chunkIndexRef: ChunkIndexRef = { - chunkIndex: -1, - }; - const chunks = splitIntoChunks( - content, - positions, - tokens, - 0, - 0, - chunkIndexRef - ); + const { chunkIndex, chunks } = splitIntoChunks(content, positions, tokens); - const leadingChunks = chunks.slice(0, chunkIndexRef.chunkIndex); - const firstChunk = chunks[chunkIndexRef.chunkIndex]; + const leadingChunks = chunks.slice(0, chunkIndex); + const firstChunk = chunks[chunkIndex]; const html: string[] = [firstChunk.html]; - const trailingChunks = chunks.slice(chunkIndexRef.chunkIndex + 1); + const trailingChunks = chunks.slice(chunkIndex + 1); let currentLength = firstChunk.textLength; let leftPadding = 0; @@ -80,58 +66,59 @@ export function highlightStemmed( export function splitIntoChunks( content: string, positions: MetadataPosition[], - tokens: string[], - positionIndex: number, - cursor: number, - chunkIndexRef?: ChunkIndexRef -): HighlightChunk[] { + tokens: string[] +): { + chunkIndex: number; + chunks: HighlightChunk[]; +} { const chunks: HighlightChunk[] = []; - const [start, length] = positions[positionIndex]; - if (start < cursor) { + let positionIndex = 0; + let cursor = 0; + let chunkIndex = -1; + while (positionIndex < positions.length) { + const [start, length] = positions[positionIndex]; positionIndex += 1; - if (positionIndex < positions.length) { - chunks.push( - ...splitIntoChunks(content, positions, tokens, positionIndex, cursor) - ); + if (start < cursor) { + continue; } - } else { + if (start > cursor) { - chunks.push( - ...looseTokenize(content.substring(cursor, start)).map((token) => ({ + const leadingChunks = looseTokenize(content.substring(cursor, start)).map( + (token) => ({ html: escapeHtml(token), textLength: token.length, - })) + }) ); + for (const item of leadingChunks) { + chunks.push(item); + } } - if (chunkIndexRef) { - chunkIndexRef.chunkIndex = chunks.length; + + if (chunkIndex === -1) { + chunkIndex = chunks.length; } + + cursor = start + length; chunks.push({ - html: highlight(content.substr(start, length), tokens, true), + html: highlight(content.substring(start, cursor), tokens, true), textLength: length, }); - const nextCursor = start + length; - positionIndex += 1; - if (positionIndex < positions.length) { - chunks.push( - ...splitIntoChunks( - content, - positions, - tokens, - positionIndex, - nextCursor - ) - ); - } else { - if (nextCursor < content.length) { - chunks.push( - ...looseTokenize(content.substr(nextCursor)).map((token) => ({ - html: escapeHtml(token), - textLength: token.length, - })) - ); - } + } + + if (cursor < content.length) { + const trailingChunks = looseTokenize(content.substring(cursor)).map( + (token) => ({ + html: escapeHtml(token), + textLength: token.length, + }) + ); + for (const item of trailingChunks) { + chunks.push(item); } } - return chunks; + + return { + chunkIndex, + chunks, + }; } diff --git a/docusaurus-search-local/src/client/utils/looseTokenize.ts b/docusaurus-search-local/src/client/utils/looseTokenize.ts index 8041b595..48b95357 100644 --- a/docusaurus-search-local/src/client/utils/looseTokenize.ts +++ b/docusaurus-search-local/src/client/utils/looseTokenize.ts @@ -12,11 +12,11 @@ export function looseTokenize(content: string): string[] { break; } if ((match.index as number) > 0) { - tokens.push(text.substr(0, match.index)); + tokens.push(text.substring(0, match.index)); } tokens.push(match[0]); start += (match.index as number) + match[0].length; - text = content.substr(start); + text = content.substring(start); } return tokens; } diff --git a/docusaurus-search-local/src/server/utils/getCondensedText.spec.ts b/docusaurus-search-local/src/server/utils/getCondensedText.spec.ts index ea0179f4..a5682a5a 100644 --- a/docusaurus-search-local/src/server/utils/getCondensedText.spec.ts +++ b/docusaurus-search-local/src/server/utils/getCondensedText.spec.ts @@ -12,13 +12,17 @@ describe("getCondensedText", () => {
  • Pear
  • Goodbye fruits. + + Hello
    + World +
    `; const $ = cheerio.load(html); test.each<[string, string]>([ ["#fruits", "Apple Orange Pear"], - ["#root", "Hello fruits. Apple Orange Pear Goodbye fruits."], + ["#root", "Hello fruits. Apple Orange Pear Goodbye fruits. Hello World"], ])("getCondensedText($('%s'), $) should return '%s'", (selector, text) => { expect(getCondensedText($(selector).get(), $)).toBe(text); }); diff --git a/docusaurus-search-local/src/server/utils/getCondensedText.ts b/docusaurus-search-local/src/server/utils/getCondensedText.ts index 81bcb649..5399ed8c 100644 --- a/docusaurus-search-local/src/server/utils/getCondensedText.ts +++ b/docusaurus-search-local/src/server/utils/getCondensedText.ts @@ -51,6 +51,9 @@ export function getCondensedText( return element.data as string; } if (element.type === "tag") { + if (element.name === "br") { + return " "; + } const content = getText($(element).contents().get()); if (BLOCK_TAGS.has(element.name)) { return " " + content + " "; diff --git a/docusaurus-search-local/src/shared/interfaces.ts b/docusaurus-search-local/src/shared/interfaces.ts index c9f3f5f5..a1bcb706 100644 --- a/docusaurus-search-local/src/shared/interfaces.ts +++ b/docusaurus-search-local/src/shared/interfaces.ts @@ -42,10 +42,6 @@ export interface HighlightChunk { textLength: number; } -export interface ChunkIndexRef { - chunkIndex: number; -} - /** * properties of document is shorten for smaller serialized search index. */