mirror of
https://github.com/gohugoio/hugo.git
synced 2025-04-28 14:40:43 +03:00
WordCount and Summary support CJK Language
* add global `hasCJKLanguage` flag, if true, turn on auto-detecting CJKLanguage * add `isCJKLanguage` frontmatter to force specify whether is CJKLanguage or not * For .Summary: If isCJKLanguage is true, use the runes as basis for truncation, else keep as today. * For WordCount: If isCJKLanguage is true, use the runes as basis for calculation, else keep as today. * Unexport RuneCount Fixes #1377
This commit is contained in:
parent
2c045ac449
commit
823334875d
5 changed files with 247 additions and 93 deletions
|
@ -168,6 +168,7 @@ func LoadDefaultSettings() {
|
||||||
viper.SetDefault("RSSUri", "index.xml")
|
viper.SetDefault("RSSUri", "index.xml")
|
||||||
viper.SetDefault("SectionPagesMenu", "")
|
viper.SetDefault("SectionPagesMenu", "")
|
||||||
viper.SetDefault("DisablePathToLower", false)
|
viper.SetDefault("DisablePathToLower", false)
|
||||||
|
viper.SetDefault("HasCJKLanguage", false)
|
||||||
}
|
}
|
||||||
|
|
||||||
// InitializeConfig initializes a config file with sensible default configuration flags.
|
// InitializeConfig initializes a config file with sensible default configuration flags.
|
||||||
|
|
|
@ -19,9 +19,9 @@ package helpers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"unicode/utf8"
|
|
||||||
"html/template"
|
"html/template"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/miekg/mmark"
|
"github.com/miekg/mmark"
|
||||||
"github.com/russross/blackfriday"
|
"github.com/russross/blackfriday"
|
||||||
|
@ -178,7 +178,6 @@ func GetHTMLRenderer(defaultFlags int, ctx *RenderingContext) blackfriday.Render
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func getMarkdownExtensions(ctx *RenderingContext) int {
|
func getMarkdownExtensions(ctx *RenderingContext) int {
|
||||||
flags := 0 | blackfriday.EXTENSION_NO_INTRA_EMPHASIS |
|
flags := 0 | blackfriday.EXTENSION_NO_INTRA_EMPHASIS |
|
||||||
blackfriday.EXTENSION_TABLES | blackfriday.EXTENSION_FENCED_CODE |
|
blackfriday.EXTENSION_TABLES | blackfriday.EXTENSION_FENCED_CODE |
|
||||||
|
@ -385,61 +384,51 @@ func TruncateWords(s string, max int) string {
|
||||||
return strings.Join(words[:max], " ")
|
return strings.Join(words[:max], " ")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TruncateWordsByRune(words []string, max int) (string, bool) {
|
||||||
|
count := 0
|
||||||
|
for index, word := range words {
|
||||||
|
if count >= max {
|
||||||
|
return strings.Join(words[:index], " "), true
|
||||||
|
}
|
||||||
|
runeCount := utf8.RuneCountInString(word)
|
||||||
|
if len(word) == runeCount {
|
||||||
|
count++
|
||||||
|
} else if count+runeCount < max {
|
||||||
|
count += runeCount
|
||||||
|
} else {
|
||||||
|
for ri, _ := range word {
|
||||||
|
if count >= max {
|
||||||
|
truncatedWords := append(words[:index], word[:ri])
|
||||||
|
return strings.Join(truncatedWords, " "), true
|
||||||
|
} else {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Join(words, " "), false
|
||||||
|
}
|
||||||
|
|
||||||
// TruncateWordsToWholeSentence takes content and an int
|
// TruncateWordsToWholeSentence takes content and an int
|
||||||
// and returns entire sentences from content, delimited by the int
|
// and returns entire sentences from content, delimited by the int
|
||||||
// and whether it's truncated or not.
|
// and whether it's truncated or not.
|
||||||
func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {
|
func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {
|
||||||
count := 0
|
if max >= len(words) {
|
||||||
index, word := 0, ""
|
return strings.Join(words, " "), false
|
||||||
truncated := false
|
}
|
||||||
|
|
||||||
for index, word = range words {
|
for counter, word := range words[max:] {
|
||||||
runeCount := utf8.RuneCountInString(word)
|
if strings.HasSuffix(word, ".") ||
|
||||||
if len(word) == runeCount {
|
strings.HasSuffix(word, "?") ||
|
||||||
count++;
|
strings.HasSuffix(word, ".\"") ||
|
||||||
} else {
|
strings.HasSuffix(word, "!") {
|
||||||
if count + runeCount <= max {
|
upper := max + counter + 1
|
||||||
count += runeCount
|
return strings.Join(words[:upper], " "), (upper < len(words))
|
||||||
} else {
|
|
||||||
offset := 0
|
|
||||||
for count < max {
|
|
||||||
_, width := utf8.DecodeRuneInString(word[offset:])
|
|
||||||
offset += width
|
|
||||||
count++
|
|
||||||
}
|
|
||||||
words[index] = word[:offset]
|
|
||||||
truncated = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if count >= max {
|
|
||||||
if index < len(words) - 1 {
|
|
||||||
truncated = true
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
index += 1
|
return strings.Join(words[:max], " "), true
|
||||||
|
|
||||||
if index < len(words) {
|
|
||||||
for counter, word := range words[index:] {
|
|
||||||
if len(word) != utf8.RuneCountInString(word) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if strings.HasSuffix(word, ".") ||
|
|
||||||
strings.HasSuffix(word, "?") ||
|
|
||||||
strings.HasSuffix(word, ".\"") ||
|
|
||||||
strings.HasSuffix(word, "!") {
|
|
||||||
upper := index + counter + 1
|
|
||||||
return strings.Join(words[:upper], " "), (upper < len(words))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if index > len(words) {
|
|
||||||
return strings.Join(words, " "), truncated
|
|
||||||
}
|
|
||||||
|
|
||||||
return strings.Join(words[:index], " "), truncated
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetAsciidocContent calls asciidoctor or asciidoc as an external helper
|
// GetAsciidocContent calls asciidoctor or asciidoc as an external helper
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
package helpers
|
package helpers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"html/template"
|
"html/template"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
const tstHTMLContent = "<!DOCTYPE html><html><head><script src=\"http://two/foobar.js\"></script></head><body><nav><ul><li hugo-nav=\"section_0\"></li><li hugo-nav=\"section_1\"></li></ul></nav><article>content <a href=\"http://two/foobar\">foobar</a>. Follow up</article><p>This is some text.<br>And some more.</p></body></html>"
|
const tstHTMLContent = "<!DOCTYPE html><html><head><script src=\"http://two/foobar.js\"></script></head><body><nav><ul><li hugo-nav=\"section_0\"></li><li hugo-nav=\"section_1\"></li></ul></nav><article>content <a href=\"http://two/foobar\">foobar</a>. Follow up</article><p>This is some text.<br>And some more.</p></body></html>"
|
||||||
|
@ -54,8 +55,6 @@ func TestTruncateWordsToWholeSentence(t *testing.T) {
|
||||||
{"a b c", "a b c", 12, false},
|
{"a b c", "a b c", 12, false},
|
||||||
{"a b c", "a b c", 3, false},
|
{"a b c", "a b c", 3, false},
|
||||||
{"a", "a", 1, false},
|
{"a", "a", 1, false},
|
||||||
{"Hello 中国", "Hello 中", 2, true},
|
|
||||||
{"Hello 中国", "Hello 中国", 3, false},
|
|
||||||
{"This is a sentence.", "This is a sentence.", 5, false},
|
{"This is a sentence.", "This is a sentence.", 5, false},
|
||||||
{"This is also a sentence!", "This is also a sentence!", 1, false},
|
{"This is also a sentence!", "This is also a sentence!", 1, false},
|
||||||
{"To be. Or not to be. That's the question.", "To be.", 1, true},
|
{"To be. Or not to be. That's the question.", "To be.", 1, true},
|
||||||
|
@ -72,3 +71,36 @@ func TestTruncateWordsToWholeSentence(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestTruncateWordsByRune(t *testing.T) {
|
||||||
|
type test struct {
|
||||||
|
input, expected string
|
||||||
|
max int
|
||||||
|
truncated bool
|
||||||
|
}
|
||||||
|
data := []test{
|
||||||
|
{"", "", 1, false},
|
||||||
|
{"a b c", "a b c", 12, false},
|
||||||
|
{"a b c", "a b c", 3, false},
|
||||||
|
{"a", "a", 1, false},
|
||||||
|
{"Hello 中国", "", 0, true},
|
||||||
|
{"这是中文,全中文。", "这是中文,", 5, true},
|
||||||
|
{"Hello 中国", "Hello 中", 2, true},
|
||||||
|
{"Hello 中国", "Hello 中国", 3, false},
|
||||||
|
{"Hello中国 Good 好的", "Hello中国 Good 好", 9, true},
|
||||||
|
{"This is a sentence.", "This is", 2, true},
|
||||||
|
{"This is also a sentence!", "This", 1, true},
|
||||||
|
{"To be. Or not to be. That's the question.", "To be. Or not", 4, true},
|
||||||
|
{" \nThis is not a sentence\n ", "This is not", 3, true},
|
||||||
|
}
|
||||||
|
for i, d := range data {
|
||||||
|
output, truncated := TruncateWordsByRune(strings.Fields(d.input), d.max)
|
||||||
|
if d.expected != output {
|
||||||
|
t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)
|
||||||
|
}
|
||||||
|
|
||||||
|
if d.truncated != truncated {
|
||||||
|
t.Errorf("Test %d failed. Expected truncated=%t got %t", i, d.truncated, truncated)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -28,6 +28,7 @@ import (
|
||||||
"net/url"
|
"net/url"
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
@ -42,6 +43,10 @@ import (
|
||||||
"github.com/spf13/viper"
|
"github.com/spf13/viper"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
cjk = regexp.MustCompile(`\p{Han}|\p{Hangul}|\p{Hiragana}|\p{Katakana}`)
|
||||||
|
)
|
||||||
|
|
||||||
type Page struct {
|
type Page struct {
|
||||||
Params map[string]interface{}
|
Params map[string]interface{}
|
||||||
Content template.HTML
|
Content template.HTML
|
||||||
|
@ -67,7 +72,6 @@ type Page struct {
|
||||||
contentShortCodes map[string]string
|
contentShortCodes map[string]string
|
||||||
plain string // TODO should be []byte
|
plain string // TODO should be []byte
|
||||||
plainWords []string
|
plainWords []string
|
||||||
plainRuneCount int
|
|
||||||
plainInit sync.Once
|
plainInit sync.Once
|
||||||
plainSecondaryInit sync.Once
|
plainSecondaryInit sync.Once
|
||||||
renderingConfig *helpers.Blackfriday
|
renderingConfig *helpers.Blackfriday
|
||||||
|
@ -78,6 +82,7 @@ type Page struct {
|
||||||
Node
|
Node
|
||||||
pageMenus PageMenus
|
pageMenus PageMenus
|
||||||
pageMenusInit sync.Once
|
pageMenusInit sync.Once
|
||||||
|
isCJKLanguage bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type Source struct {
|
type Source struct {
|
||||||
|
@ -111,12 +116,6 @@ func (p *Page) PlainWords() []string {
|
||||||
return p.plainWords
|
return p.plainWords
|
||||||
}
|
}
|
||||||
|
|
||||||
// RuneCount returns the rune count, excluding any whitespace, of the plain content.
|
|
||||||
func (p *Page) RuneCount() int {
|
|
||||||
p.initPlainSecondary()
|
|
||||||
return p.plainRuneCount
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Page) initPlain() {
|
func (p *Page) initPlain() {
|
||||||
p.plainInit.Do(func() {
|
p.plainInit.Do(func() {
|
||||||
p.plain = helpers.StripHTML(string(p.Content))
|
p.plain = helpers.StripHTML(string(p.Content))
|
||||||
|
@ -125,20 +124,6 @@ func (p *Page) initPlain() {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Page) initPlainSecondary() {
|
|
||||||
p.plainSecondaryInit.Do(func() {
|
|
||||||
p.initPlain()
|
|
||||||
runeCount := 0
|
|
||||||
for _, r := range p.plain {
|
|
||||||
if !helpers.IsWhitespace(r) {
|
|
||||||
runeCount++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
p.plainRuneCount = runeCount
|
|
||||||
return
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Page) IsNode() bool {
|
func (p *Page) IsNode() bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -218,7 +203,13 @@ func (p *Page) setSummary() {
|
||||||
} else {
|
} else {
|
||||||
// If hugo defines split:
|
// If hugo defines split:
|
||||||
// render, strip html, then split
|
// render, strip html, then split
|
||||||
summary, truncated := helpers.TruncateWordsToWholeSentence(p.PlainWords(), helpers.SummaryLength)
|
var summary string
|
||||||
|
var truncated bool
|
||||||
|
if p.isCJKLanguage {
|
||||||
|
summary, truncated = helpers.TruncateWordsByRune(p.PlainWords(), helpers.SummaryLength)
|
||||||
|
} else {
|
||||||
|
summary, truncated = helpers.TruncateWordsToWholeSentence(p.PlainWords(), helpers.SummaryLength)
|
||||||
|
}
|
||||||
p.Summary = template.HTML(summary)
|
p.Summary = template.HTML(summary)
|
||||||
p.Truncated = truncated
|
p.Truncated = truncated
|
||||||
|
|
||||||
|
@ -363,18 +354,27 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Page) analyzePage() {
|
func (p *Page) analyzePage() {
|
||||||
p.WordCount = 0
|
if p.isCJKLanguage {
|
||||||
for _, word := range p.PlainWords() {
|
p.WordCount = 0
|
||||||
runeCount := utf8.RuneCountInString(word)
|
for _, word := range p.PlainWords() {
|
||||||
if len(word) == runeCount {
|
runeCount := utf8.RuneCountInString(word)
|
||||||
p.WordCount++
|
if len(word) == runeCount {
|
||||||
} else {
|
p.WordCount++
|
||||||
p.WordCount += runeCount
|
} else {
|
||||||
|
p.WordCount += runeCount
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
p.WordCount = len(p.PlainWords())
|
||||||
}
|
}
|
||||||
|
|
||||||
p.FuzzyWordCount = int((p.WordCount+100)/100) * 100
|
p.FuzzyWordCount = int((p.WordCount+100)/100) * 100
|
||||||
p.ReadingTime = int((p.WordCount + 212) / 213)
|
|
||||||
|
if p.isCJKLanguage {
|
||||||
|
p.ReadingTime = int((p.WordCount + 500) / 501)
|
||||||
|
} else {
|
||||||
|
p.ReadingTime = int((p.WordCount + 212) / 213)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Page) permalink() (*url.URL, error) {
|
func (p *Page) permalink() (*url.URL, error) {
|
||||||
|
@ -481,7 +481,7 @@ func (p *Page) update(f interface{}) error {
|
||||||
}
|
}
|
||||||
m := f.(map[string]interface{})
|
m := f.(map[string]interface{})
|
||||||
var err error
|
var err error
|
||||||
var draft, published *bool
|
var draft, published, isCJKLanguage *bool
|
||||||
for k, v := range m {
|
for k, v := range m {
|
||||||
loki := strings.ToLower(k)
|
loki := strings.ToLower(k)
|
||||||
switch loki {
|
switch loki {
|
||||||
|
@ -542,6 +542,9 @@ func (p *Page) update(f interface{}) error {
|
||||||
p.Status = cast.ToString(v)
|
p.Status = cast.ToString(v)
|
||||||
case "sitemap":
|
case "sitemap":
|
||||||
p.Sitemap = parseSitemap(cast.ToStringMap(v))
|
p.Sitemap = parseSitemap(cast.ToStringMap(v))
|
||||||
|
case "iscjklanguage":
|
||||||
|
isCJKLanguage = new(bool)
|
||||||
|
*isCJKLanguage = cast.ToBool(v)
|
||||||
default:
|
default:
|
||||||
// If not one of the explicit values, store in Params
|
// If not one of the explicit values, store in Params
|
||||||
switch vv := v.(type) {
|
switch vv := v.(type) {
|
||||||
|
@ -596,6 +599,16 @@ func (p *Page) update(f interface{}) error {
|
||||||
p.Lastmod = p.Date
|
p.Lastmod = p.Date
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if isCJKLanguage != nil {
|
||||||
|
p.isCJKLanguage = *isCJKLanguage
|
||||||
|
} else if viper.GetBool("HasCJKLanguage") {
|
||||||
|
if cjk.Match(p.rawContent) {
|
||||||
|
p.isCJKLanguage = true
|
||||||
|
} else {
|
||||||
|
p.isCJKLanguage = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -766,6 +779,8 @@ func (p *Page) parse(reader io.Reader) error {
|
||||||
|
|
||||||
p.renderable = psr.IsRenderable()
|
p.renderable = psr.IsRenderable()
|
||||||
p.frontmatter = psr.FrontMatter()
|
p.frontmatter = psr.FrontMatter()
|
||||||
|
p.rawContent = psr.Content()
|
||||||
|
|
||||||
meta, err := psr.Metadata()
|
meta, err := psr.Metadata()
|
||||||
if meta != nil {
|
if meta != nil {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -778,8 +793,6 @@ func (p *Page) parse(reader io.Reader) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
p.rawContent = psr.Content()
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -146,16 +146,67 @@ Summary Same Line<!--more-->
|
||||||
Some more text
|
Some more text
|
||||||
`
|
`
|
||||||
|
|
||||||
SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES = `---
|
SIMPLE_PAGE_WITH_ALL_CJK_RUNES = `---
|
||||||
title: Simple
|
title: Simple
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
||||||
€ € € € €
|
€ € € € €
|
||||||
|
你好
|
||||||
|
도형이
|
||||||
|
カテゴリー
|
||||||
|
|
||||||
|
|
||||||
`
|
`
|
||||||
|
|
||||||
|
SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES = `---
|
||||||
|
title: Simple
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
In Chinese, 好 means good. In Chinese, 好 means good.
|
||||||
|
In Chinese, 好 means good. In Chinese, 好 means good.
|
||||||
|
In Chinese, 好 means good. In Chinese, 好 means good.
|
||||||
|
In Chinese, 好 means good. In Chinese, 好 means good.
|
||||||
|
In Chinese, 好 means good. In Chinese, 好 means good.
|
||||||
|
In Chinese, 好 means good. In Chinese, 好 means good.
|
||||||
|
In Chinese, 好 means good. In Chinese, 好 means good.
|
||||||
|
More then 70 words.
|
||||||
|
|
||||||
|
|
||||||
|
`
|
||||||
|
SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES_SUMMARY = "In Chinese, 好 means good. In Chinese, 好 means good. " +
|
||||||
|
"In Chinese, 好 means good. In Chinese, 好 means good. " +
|
||||||
|
"In Chinese, 好 means good. In Chinese, 好 means good. " +
|
||||||
|
"In Chinese, 好 means good. In Chinese, 好 means good. " +
|
||||||
|
"In Chinese, 好 means good. In Chinese, 好 means good. " +
|
||||||
|
"In Chinese, 好 means good. In Chinese, 好 means good. " +
|
||||||
|
"In Chinese, 好 means good. In Chinese, 好 means good."
|
||||||
|
|
||||||
|
SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE = `---
|
||||||
|
title: Simple
|
||||||
|
isCJKLanguage: false
|
||||||
|
---
|
||||||
|
|
||||||
|
In Chinese, 好的啊 means good. In Chinese, 好的呀 means good.
|
||||||
|
In Chinese, 好的啊 means good. In Chinese, 好的呀 means good.
|
||||||
|
In Chinese, 好的啊 means good. In Chinese, 好的呀 means good.
|
||||||
|
In Chinese, 好的啊 means good. In Chinese, 好的呀 means good.
|
||||||
|
In Chinese, 好的啊 means good. In Chinese, 好的呀 means good.
|
||||||
|
In Chinese, 好的啊 means good. In Chinese, 好的呀 means good.
|
||||||
|
In Chinese, 好的啊 means good. In Chinese, 好的呀呀 means good enough.
|
||||||
|
More then 70 words.
|
||||||
|
|
||||||
|
|
||||||
|
`
|
||||||
|
SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE_SUMMARY = "In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +
|
||||||
|
"In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +
|
||||||
|
"In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +
|
||||||
|
"In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +
|
||||||
|
"In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +
|
||||||
|
"In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +
|
||||||
|
"In Chinese, 好的啊 means good. In Chinese, 好的呀呀 means good enough."
|
||||||
|
|
||||||
SIMPLE_PAGE_WITH_LONG_CONTENT = `---
|
SIMPLE_PAGE_WITH_LONG_CONTENT = `---
|
||||||
title: Simple
|
title: Simple
|
||||||
---
|
---
|
||||||
|
@ -584,18 +635,86 @@ func TestPageWithDate(t *testing.T) {
|
||||||
checkPageDate(t, p, d)
|
checkPageDate(t, p, d)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRuneCount(t *testing.T) {
|
func TestWordCountWithAllCJKRunesWithoutHasCJKLanguage(t *testing.T) {
|
||||||
|
viper.Reset()
|
||||||
|
|
||||||
p, _ := NewPage("simple.md")
|
p, _ := NewPage("simple.md")
|
||||||
_, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES))
|
_, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_ALL_CJK_RUNES))
|
||||||
p.Convert()
|
p.Convert()
|
||||||
p.analyzePage()
|
p.analyzePage()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
|
t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.RuneCount() != 5 {
|
if p.WordCount != 8 {
|
||||||
t.Fatalf("incorrect rune count for content '%s'. expected %v, got %v", p.plain, 5, p.RuneCount())
|
t.Fatalf("incorrect word count for content '%s'. expected %v, got %v", p.plain, 8, p.WordCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWordCountWithAllCJKRunesHasCJKLanguage(t *testing.T) {
|
||||||
|
viper.Reset()
|
||||||
|
defer viper.Reset()
|
||||||
|
|
||||||
|
viper.Set("HasCJKLanguage", true)
|
||||||
|
|
||||||
|
p, _ := NewPage("simple.md")
|
||||||
|
_, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_ALL_CJK_RUNES))
|
||||||
|
p.Convert()
|
||||||
|
p.analyzePage()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.WordCount != 15 {
|
||||||
|
t.Fatalf("incorrect word count for content '%s'. expected %v, got %v", p.plain, 15, p.WordCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWordCountWithMainEnglishWithCJKRunes(t *testing.T) {
|
||||||
|
viper.Reset()
|
||||||
|
defer viper.Reset()
|
||||||
|
|
||||||
|
viper.Set("HasCJKLanguage", true)
|
||||||
|
|
||||||
|
p, _ := NewPage("simple.md")
|
||||||
|
_, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES))
|
||||||
|
p.Convert()
|
||||||
|
p.analyzePage()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.WordCount != 74 {
|
||||||
|
t.Fatalf("incorrect word count for content '%s'. expected %v, got %v", p.plain, 74, p.WordCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.Summary != SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES_SUMMARY {
|
||||||
|
t.Fatalf("incorrect Summary for content '%s'. expected %v, got %v", p.plain,
|
||||||
|
SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES_SUMMARY, p.Summary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWordCountWithIsCJKLanguageFalse(t *testing.T) {
|
||||||
|
viper.Reset()
|
||||||
|
defer viper.Reset()
|
||||||
|
|
||||||
|
viper.Set("HasCJKLanguage", true)
|
||||||
|
|
||||||
|
p, _ := NewPage("simple.md")
|
||||||
|
_, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE))
|
||||||
|
p.Convert()
|
||||||
|
p.analyzePage()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.WordCount != 75 {
|
||||||
|
t.Fatalf("incorrect word count for content '%s'. expected %v, got %v", p.plain, 75, p.WordCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.Summary != SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE_SUMMARY {
|
||||||
|
t.Fatalf("incorrect Summary for content '%s'. expected %v, got %v", p.plain,
|
||||||
|
SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE_SUMMARY, p.Summary)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue