Skip to content

Commit

Permalink
fix: fix call stack overflow with large code blocks
Browse files Browse the repository at this point in the history
closes #164
  • Loading branch information
weareoutman committed Jul 22, 2022
1 parent c60d173 commit e0b2cfd
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 77 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[![CI Status](https://github.com/easyops-cn/docusaurus-search-local/workflows/CI/badge.svg?event=push)](https://github.com/easyops-cn/docusaurus-search-local/actions?query=workflow%3ACI)
[![Coverage Status](https://coveralls.io/repos/github/easyops-cn/docusaurus-search-local/badge.svg?branch=master)](https://coveralls.io/github/easyops-cn/docusaurus-search-local?branch=master)

An offline/local search plugin/theme for [Docusaurus v2](https://v2.docusaurus.io/), which supports multiple languages, especially optimized for language of zh.
An offline/local search plugin/theme for [Docusaurus v2](https://docusaurus.io/), which supports multiple languages, especially optimized for language of zh.

> Originally forked from [cmfcmf/docusaurus-search-local](https://github.com/cmfcmf/docusaurus-search-local).
>
Expand Down
15 changes: 5 additions & 10 deletions docusaurus-search-local/src/client/utils/highlightStemmed.spec.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import {
MetadataPosition,
ChunkIndexRef,
HighlightChunk,
} from "../../shared/interfaces";
import { MetadataPosition, HighlightChunk } from "../../shared/interfaces";
import { highlightStemmed, splitIntoChunks } from "./highlightStemmed";

jest.mock("./proxiedGenerated");
Expand Down Expand Up @@ -160,11 +156,10 @@ describe("splitIntoChunks", () => {
])(
"splitIntoChunks('%s', %j, %j, 0, 0) should return %j",
(text, positions, tokens, chunks, chunkIndex) => {
const chunkIndexRef = {} as ChunkIndexRef;
expect(
splitIntoChunks(text, positions, tokens, 0, 0, chunkIndexRef)
).toEqual(chunks);
expect(chunkIndexRef.chunkIndex).toBe(chunkIndex);
expect(splitIntoChunks(text, positions, tokens)).toEqual({
chunkIndex,
chunks,
});
}
);
});
105 changes: 46 additions & 59 deletions docusaurus-search-local/src/client/utils/highlightStemmed.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import {
HighlightChunk,
MetadataPosition,
ChunkIndexRef,
} from "../../shared/interfaces";
import { HighlightChunk, MetadataPosition } from "../../shared/interfaces";
import { escapeHtml } from "./escapeHtml";
import { highlight } from "./highlight";
import { looseTokenize } from "./looseTokenize";
Expand All @@ -14,22 +10,12 @@ export function highlightStemmed(
tokens: string[],
maxLength = searchResultContextMaxLength
): string {
const chunkIndexRef: ChunkIndexRef = {
chunkIndex: -1,
};
const chunks = splitIntoChunks(
content,
positions,
tokens,
0,
0,
chunkIndexRef
);
const { chunkIndex, chunks } = splitIntoChunks(content, positions, tokens);

const leadingChunks = chunks.slice(0, chunkIndexRef.chunkIndex);
const firstChunk = chunks[chunkIndexRef.chunkIndex];
const leadingChunks = chunks.slice(0, chunkIndex);
const firstChunk = chunks[chunkIndex];
const html: string[] = [firstChunk.html];
const trailingChunks = chunks.slice(chunkIndexRef.chunkIndex + 1);
const trailingChunks = chunks.slice(chunkIndex + 1);

let currentLength = firstChunk.textLength;
let leftPadding = 0;
Expand Down Expand Up @@ -80,58 +66,59 @@ export function highlightStemmed(
export function splitIntoChunks(
content: string,
positions: MetadataPosition[],
tokens: string[],
positionIndex: number,
cursor: number,
chunkIndexRef?: ChunkIndexRef
): HighlightChunk[] {
tokens: string[]
): {
chunkIndex: number;
chunks: HighlightChunk[];
} {
const chunks: HighlightChunk[] = [];
const [start, length] = positions[positionIndex];
if (start < cursor) {
let positionIndex = 0;
let cursor = 0;
let chunkIndex = -1;
while (positionIndex < positions.length) {
const [start, length] = positions[positionIndex];
positionIndex += 1;
if (positionIndex < positions.length) {
chunks.push(
...splitIntoChunks(content, positions, tokens, positionIndex, cursor)
);
if (start < cursor) {
continue;
}
} else {

if (start > cursor) {
chunks.push(
...looseTokenize(content.substring(cursor, start)).map((token) => ({
const leadingChunks = looseTokenize(content.substring(cursor, start)).map(
(token) => ({
html: escapeHtml(token),
textLength: token.length,
}))
})
);
for (const item of leadingChunks) {
chunks.push(item);
}
}
if (chunkIndexRef) {
chunkIndexRef.chunkIndex = chunks.length;

if (chunkIndex === -1) {
chunkIndex = chunks.length;
}

cursor = start + length;
chunks.push({
html: highlight(content.substr(start, length), tokens, true),
html: highlight(content.substring(start, cursor), tokens, true),
textLength: length,
});
const nextCursor = start + length;
positionIndex += 1;
if (positionIndex < positions.length) {
chunks.push(
...splitIntoChunks(
content,
positions,
tokens,
positionIndex,
nextCursor
)
);
} else {
if (nextCursor < content.length) {
chunks.push(
...looseTokenize(content.substr(nextCursor)).map((token) => ({
html: escapeHtml(token),
textLength: token.length,
}))
);
}
}

if (cursor < content.length) {
const trailingChunks = looseTokenize(content.substring(cursor)).map(
(token) => ({
html: escapeHtml(token),
textLength: token.length,
})
);
for (const item of trailingChunks) {
chunks.push(item);
}
}
return chunks;

return {
chunkIndex,
chunks,
};
}
4 changes: 2 additions & 2 deletions docusaurus-search-local/src/client/utils/looseTokenize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ export function looseTokenize(content: string): string[] {
break;
}
if ((match.index as number) > 0) {
tokens.push(text.substr(0, match.index));
tokens.push(text.substring(0, match.index));
}
tokens.push(match[0]);
start += (match.index as number) + match[0].length;
text = content.substr(start);
text = content.substring(start);
}
return tokens;
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,17 @@ describe("getCondensedText", () => {
<li class="pear">Pear</li>
</ul>
<span>Good</span><span>bye</span> fruits.
<code>
<span>Hello</span><br>
<span>World</span>
</code>
<div>
`;
const $ = cheerio.load(html);

test.each<[string, string]>([
["#fruits", "Apple Orange Pear"],
["#root", "Hello fruits. Apple Orange Pear Goodbye fruits."],
["#root", "Hello fruits. Apple Orange Pear Goodbye fruits. Hello World"],
])("getCondensedText($('%s'), $) should return '%s'", (selector, text) => {
expect(getCondensedText($(selector).get(), $)).toBe(text);
});
Expand Down
3 changes: 3 additions & 0 deletions docusaurus-search-local/src/server/utils/getCondensedText.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ export function getCondensedText(
return element.data as string;
}
if (element.type === "tag") {
if (element.name === "br") {
return " ";
}
const content = getText($(element).contents().get());
if (BLOCK_TAGS.has(element.name)) {
return " " + content + " ";
Expand Down
4 changes: 0 additions & 4 deletions docusaurus-search-local/src/shared/interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@ export interface HighlightChunk {
textLength: number;
}

export interface ChunkIndexRef {
chunkIndex: number;
}

/**
* properties of document is shorten for smaller serialized search index.
*/
Expand Down

0 comments on commit e0b2cfd

Please sign in to comment.