From b200173266e489f8c4b343f484b8d591f7609216 Mon Sep 17 00:00:00 2001 From: Jannick Kremer Date: Sat, 18 Jun 2022 15:39:23 +0200 Subject: [PATCH] readingTime: compute from CJK and non-CJK parts separately. Fixes #10031 --- .../en/getting-started/configuration.md | 2 +- hugolib/page__per_output.go | 35 +++++++------- hugolib/page_test.go | 46 +++++++++++-------- 3 files changed, 45 insertions(+), 38 deletions(-) diff --git a/docs/content/en/getting-started/configuration.md b/docs/content/en/getting-started/configuration.md index 9393e45347e..859e8660769 100644 --- a/docs/content/en/getting-started/configuration.md +++ b/docs/content/en/getting-started/configuration.md @@ -261,7 +261,7 @@ Google Analytics tracking ID. **Default value:** false -If true, auto-detect Chinese/Japanese/Korean Languages in the content. This will make `.Summary` and `.WordCount` behave correctly for CJK languages. +If true, auto-detect Chinese/Japanese/Korean Languages in the content. This will make `.Summary` behave correctly for CJK languages. ### imaging diff --git a/hugolib/page__per_output.go b/hugolib/page__per_output.go index de70047c9f7..68cc9f3f446 100644 --- a/hugolib/page__per_output.go +++ b/hugolib/page__per_output.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "html/template" + "math" "runtime/debug" "strings" "sync" @@ -204,7 +205,7 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err cp.initPlain = cp.initMain.Branch(func() (any, error) { cp.plain = tpl.StripHTML(string(cp.content)) cp.plainWords = strings.Fields(cp.plain) - cp.setWordCounts(p.m.isCJKLanguage) + cp.setWordCounts() if err := cp.setAutoSummary(); err != nil { return err, nil @@ -656,31 +657,29 @@ func (cp *pageContentOutput) renderContentWithConverter(c converter.Converter, c return r, err } -func (p *pageContentOutput) setWordCounts(isCJKLanguage bool) { - if isCJKLanguage { - p.wordCount = 0 - for _, word := range p.plainWords { - runeCount := utf8.RuneCountInString(word) - if len(word) == runeCount { - p.wordCount++ - } else { - p.wordCount += runeCount - } +func (p *pageContentOutput) setWordCounts() { + cjkWordCount := 0 + nonCjkWordCount := 0 + for _, word := range p.plainWords { + runeCount := utf8.RuneCountInString(word) + if len(word) == runeCount { + nonCjkWordCount++ + } else { + cjkWordCount += runeCount } - } else { - p.wordCount = helpers.TotalWords(p.plain) } + p.wordCount = nonCjkWordCount + cjkWordCount + // TODO(bep) is set in a test. Fix that. if p.fuzzyWordCount == 0 { p.fuzzyWordCount = (p.wordCount + 100) / 100 * 100 } - if isCJKLanguage { - p.readingTime = (p.wordCount + 500) / 501 - } else { - p.readingTime = (p.wordCount + 212) / 213 - } + cjkReadingTime := float64(cjkWordCount) / 501 + nonCjkReadingTime := float64(nonCjkWordCount) / 213 + + p.readingTime = int(math.Ceil(cjkReadingTime + nonCjkReadingTime)) } // A callback to signal that we have inserted a placeholder into the rendered diff --git a/hugolib/page_test.go b/hugolib/page_test.go index 1d9e3e348b2..221ab31c9e0 100644 --- a/hugolib/page_test.go +++ b/hugolib/page_test.go @@ -1117,18 +1117,6 @@ Content } } -func TestWordCountWithAllCJKRunesWithoutHasCJKLanguage(t *testing.T) { - t.Parallel() - assertFunc := func(t *testing.T, ext string, pages page.Pages) { - p := pages[0] - if p.WordCount() != 8 { - t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 8, p.WordCount()) - } - } - - testAllMarkdownEnginesForPages(t, assertFunc, nil, simplePageWithAllCJKRunes) -} - func TestWordCountWithAllCJKRunesHasCJKLanguage(t *testing.T) { t.Parallel() settings := map[string]any{"hasCJKLanguage": true} @@ -1161,7 +1149,31 @@ func TestWordCountWithMainEnglishWithCJKRunes(t *testing.T) { testAllMarkdownEnginesForPages(t, assertFunc, settings, simplePageWithMainEnglishWithCJKRunes) } -func TestWordCountWithIsCJKLanguageFalse(t *testing.T) { +func TestReadingTimeCJKMixed(t *testing.T) { + t.Parallel() + + simplePage := fmt.Sprintf(`--- +title: Simple +--- + +%s + +%s + +`, strings.Repeat("hello 你好 ", 2130), strings.Repeat("好", 750)) + // 2130 * 2 + 750 = 10 * 501 -> 10 minutes of reading + + assertFunc := func(t *testing.T, ext string, pages page.Pages) { + p := pages[0] + if p.ReadingTime() != 20 { + t.Fatalf("[%s] incorrect min read. expected %v, got %v", ext, 20, p.ReadingTime()) + } + } + + testAllMarkdownEnginesForPages(t, assertFunc, nil, simplePage) +} + +func TestSummaryWithIsCJKLanguageFalse(t *testing.T) { t.Parallel() settings := map[string]any{ "hasCJKLanguage": true, @@ -1169,10 +1181,6 @@ func TestWordCountWithIsCJKLanguageFalse(t *testing.T) { assertFunc := func(t *testing.T, ext string, pages page.Pages) { p := pages[0] - if p.WordCount() != 75 { - t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.Plain(), 74, p.WordCount()) - } - if p.Summary() != simplePageWithIsCJKLanguageFalseSummary { t.Fatalf("[%s] incorrect Summary for content '%s'. expected %v, got %v", ext, p.Plain(), simplePageWithIsCJKLanguageFalseSummary, p.Summary()) @@ -1781,9 +1789,9 @@ Summary: In Chinese, 好 means good. b.AssertFileContent("public/p2/index.html", "WordCount: 314\nFuzzyWordCount: 400\nReadingTime: 2\nLen Plain: 1569\nLen PlainWords: 314\nTruncated: true\nLen Summary: 25\nLen Content: 1582") b.AssertFileContent("public/p3/index.html", "WordCount: 206\nFuzzyWordCount: 300\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: true\nLen Summary: 43\nLen Content: 651") - b.AssertFileContent("public/p4/index.html", "WordCount: 7\nFuzzyWordCount: 100\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: true\nLen Summary: 43\nLen Content: 651") + b.AssertFileContent("public/p4/index.html", "WordCount: 206\nFuzzyWordCount: 300\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: true\nLen Summary: 43\nLen Content: 651") b.AssertFileContent("public/p5/index.html", "WordCount: 206\nFuzzyWordCount: 300\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: true\nLen Summary: 229\nLen Content: 652") - b.AssertFileContent("public/p6/index.html", "WordCount: 7\nFuzzyWordCount: 100\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: false\nLen Summary: 637\nLen Content: 652") + b.AssertFileContent("public/p6/index.html", "WordCount: 206\nFuzzyWordCount: 300\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: false\nLen Summary: 637\nLen Content: 652") } func TestScratch(t *testing.T) {