Chomp Unicode BOM if present

Useful if using or sharing files with users that use editors that
append a unicode byte order marker header (like Windows notepad).

This will still assume files are UTF-8 encoded.

Closes #2075
This commit is contained in:
Mattias Wadman 2016-04-13 00:14:00 +02:00 committed by Bjørn Erik Pedersen
parent d48b986c45
commit 5d50c46482
2 changed files with 32 additions and 0 deletions

View file

@ -50,6 +50,8 @@ const (
HTMLCommentStart = "<!--"
// HTMLCommentEnd identifies the end of HTML comment.
HTMLCommentEnd = "-->"
// BOM Unicode byte order marker
BOM = '\ufeff'
)
var (
@ -101,6 +103,10 @@ func (p *page) Metadata() (meta interface{}, err error) {
func ReadFrom(r io.Reader) (p Page, err error) {
reader := bufio.NewReader(r)
// chomp BOM and assume UTF-8
if err = chompBOM(reader); err != nil && err != io.EOF {
return
}
if err = chompWhitespace(reader); err != nil && err != io.EOF {
return
}
@ -135,6 +141,19 @@ func ReadFrom(r io.Reader) (p Page, err error) {
return newp, nil
}
func chompBOM(r io.RuneScanner) (err error) {
for {
c, _, err := r.ReadRune()
if err != nil {
return err
}
if c != BOM {
r.UnreadRune()
return nil
}
}
}
func chompWhitespace(r io.RuneScanner) (err error) {
for {
c, _, err := r.ReadRune()