hugo/parser/metadecoders/decoder.go
Ville Vesilehto f34cdc382a
parser/metadecoder: Improve errors for non-map XML root values
Previously, the XML decoder would panic when encountering a root element with
a non-map value due to an unsafe type assertion.

The fix adds proper type checking before the map conversion and provides
clear error messages to help users identify and fix invalid XML structures.

Example error for invalid XML like:
<root>just text</root>

Will now return:
"XML root element 'root' must be a map/object, got string"
2025-03-22 18:48:23 +01:00

334 lines
8.4 KiB
Go

// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metadecoders
import (
"bytes"
"encoding/csv"
"encoding/json"
"fmt"
"log"
"regexp"
"strconv"
"strings"
"github.com/gohugoio/hugo/common/herrors"
"github.com/gohugoio/hugo/common/maps"
"github.com/niklasfasching/go-org/org"
xml "github.com/clbanning/mxj/v2"
toml "github.com/pelletier/go-toml/v2"
"github.com/spf13/afero"
"github.com/spf13/cast"
yaml "gopkg.in/yaml.v2"
)
// Decoder provides some configuration options for the decoders.
type Decoder struct {
// Delimiter is the field delimiter used in the CSV decoder. It defaults to ','.
Delimiter rune
// Comment, if not 0, is the comment character used in the CSV decoder. Lines beginning with the
// Comment character without preceding whitespace are ignored.
Comment rune
// If true, a quote may appear in an unquoted field and a non-doubled quote
// may appear in a quoted field. It defaults to false.
LazyQuotes bool
}
// OptionsKey is used in cache keys.
func (d Decoder) OptionsKey() string {
var sb strings.Builder
sb.WriteRune(d.Delimiter)
sb.WriteRune(d.Comment)
sb.WriteString(strconv.FormatBool(d.LazyQuotes))
return sb.String()
}
// Default is a Decoder in its default configuration.
var Default = Decoder{
Delimiter: ',',
}
// UnmarshalToMap will unmarshall data in format f into a new map. This is
// what's needed for Hugo's front matter decoding.
func (d Decoder) UnmarshalToMap(data []byte, f Format) (map[string]any, error) {
m := make(map[string]any)
if data == nil {
return m, nil
}
err := d.UnmarshalTo(data, f, &m)
return m, err
}
// UnmarshalFileToMap is the same as UnmarshalToMap, but reads the data from
// the given filename.
func (d Decoder) UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]any, error) {
format := FormatFromString(filename)
if format == "" {
return nil, fmt.Errorf("%q is not a valid configuration format", filename)
}
data, err := afero.ReadFile(fs, filename)
if err != nil {
return nil, err
}
return d.UnmarshalToMap(data, format)
}
// UnmarshalStringTo tries to unmarshal data to a new instance of type typ.
func (d Decoder) UnmarshalStringTo(data string, typ any) (any, error) {
data = strings.TrimSpace(data)
// We only check for the possible types in YAML, JSON and TOML.
switch typ.(type) {
case string:
return data, nil
case map[string]any, maps.Params:
format := d.FormatFromContentString(data)
return d.UnmarshalToMap([]byte(data), format)
case []any:
// A standalone slice. Let YAML handle it.
return d.Unmarshal([]byte(data), YAML)
case bool:
return cast.ToBoolE(data)
case int:
return cast.ToIntE(data)
case int64:
return cast.ToInt64E(data)
case float64:
return cast.ToFloat64E(data)
default:
return nil, fmt.Errorf("unmarshal: %T not supported", typ)
}
}
// Unmarshal will unmarshall data in format f into an interface{}.
// This is what's needed for Hugo's /data handling.
func (d Decoder) Unmarshal(data []byte, f Format) (any, error) {
if len(data) == 0 {
switch f {
case CSV:
return make([][]string, 0), nil
default:
return make(map[string]any), nil
}
}
var v any
err := d.UnmarshalTo(data, f, &v)
return v, err
}
// UnmarshalTo unmarshals data in format f into v.
func (d Decoder) UnmarshalTo(data []byte, f Format, v any) error {
var err error
switch f {
case ORG:
err = d.unmarshalORG(data, v)
case JSON:
err = json.Unmarshal(data, v)
case XML:
var xmlRoot xml.Map
xmlRoot, err = xml.NewMapXml(data)
var xmlValue map[string]any
if err == nil {
xmlRootName, err := xmlRoot.Root()
if err != nil {
return toFileError(f, data, fmt.Errorf("failed to unmarshal XML: %w", err))
}
// Get the root value and verify it's a map
rootValue := xmlRoot[xmlRootName]
if rootValue == nil {
return toFileError(f, data, fmt.Errorf("XML root element '%s' has no value", xmlRootName))
}
// Type check before conversion
mapValue, ok := rootValue.(map[string]any)
if !ok {
return toFileError(f, data, fmt.Errorf("XML root element '%s' must be a map/object, got %T", xmlRootName, rootValue))
}
xmlValue = mapValue
}
switch v := v.(type) {
case *map[string]any:
*v = xmlValue
case *any:
*v = xmlValue
}
case TOML:
err = toml.Unmarshal(data, v)
case YAML:
err = yaml.Unmarshal(data, v)
if err != nil {
return toFileError(f, data, fmt.Errorf("failed to unmarshal YAML: %w", err))
}
// To support boolean keys, the YAML package unmarshals maps to
// map[interface{}]interface{}. Here we recurse through the result
// and change all maps to map[string]interface{} like we would've
// gotten from `json`.
var ptr any
switch vv := v.(type) {
case *map[string]any:
ptr = *vv
case *any:
ptr = *vv
default:
// Not a map.
}
if ptr != nil {
if mm, changed := stringifyMapKeys(ptr); changed {
switch vv := v.(type) {
case *map[string]any:
*vv = mm.(map[string]any)
case *any:
*vv = mm
}
}
}
case CSV:
return d.unmarshalCSV(data, v)
default:
return fmt.Errorf("unmarshal of format %q is not supported", f)
}
if err == nil {
return nil
}
return toFileError(f, data, fmt.Errorf("unmarshal failed: %w", err))
}
func (d Decoder) unmarshalCSV(data []byte, v any) error {
r := csv.NewReader(bytes.NewReader(data))
r.Comma = d.Delimiter
r.Comment = d.Comment
r.LazyQuotes = d.LazyQuotes
records, err := r.ReadAll()
if err != nil {
return err
}
switch vv := v.(type) {
case *any:
*vv = records
default:
return fmt.Errorf("CSV cannot be unmarshaled into %T", v)
}
return nil
}
func parseORGDate(s string) string {
r := regexp.MustCompile(`[<\[](\d{4}-\d{2}-\d{2}) .*[>\]]`)
if m := r.FindStringSubmatch(s); m != nil {
return m[1]
}
return s
}
func (d Decoder) unmarshalORG(data []byte, v any) error {
config := org.New()
config.Log = log.Default() // TODO(bep)
document := config.Parse(bytes.NewReader(data), "")
if document.Error != nil {
return document.Error
}
frontMatter := make(map[string]any, len(document.BufferSettings))
for k, v := range document.BufferSettings {
k = strings.ToLower(k)
if strings.HasSuffix(k, "[]") {
frontMatter[k[:len(k)-2]] = strings.Fields(v)
} else if strings.Contains(v, "\n") {
frontMatter[k] = strings.Split(v, "\n")
} else if k == "filetags" {
trimmed := strings.TrimPrefix(v, ":")
trimmed = strings.TrimSuffix(trimmed, ":")
frontMatter[k] = strings.Split(trimmed, ":")
} else if k == "date" || k == "lastmod" || k == "publishdate" || k == "expirydate" {
frontMatter[k] = parseORGDate(v)
} else {
frontMatter[k] = v
}
}
switch vv := v.(type) {
case *map[string]any:
*vv = frontMatter
case *any:
*vv = frontMatter
}
return nil
}
func toFileError(f Format, data []byte, err error) error {
return herrors.NewFileErrorFromName(err, fmt.Sprintf("_stream.%s", f)).UpdateContent(bytes.NewReader(data), nil)
}
// stringifyMapKeys recurses into in and changes all instances of
// map[interface{}]interface{} to map[string]interface{}. This is useful to
// work around the impedance mismatch between JSON and YAML unmarshaling that's
// described here: https://github.com/go-yaml/yaml/issues/139
//
// Inspired by https://github.com/stripe/stripe-mock, MIT licensed
func stringifyMapKeys(in any) (any, bool) {
switch in := in.(type) {
case []any:
for i, v := range in {
if vv, replaced := stringifyMapKeys(v); replaced {
in[i] = vv
}
}
case map[string]any:
for k, v := range in {
if vv, changed := stringifyMapKeys(v); changed {
in[k] = vv
}
}
case map[any]any:
res := make(map[string]any)
var (
ok bool
err error
)
for k, v := range in {
var ks string
if ks, ok = k.(string); !ok {
ks, err = cast.ToStringE(k)
if err != nil {
ks = fmt.Sprintf("%v", k)
}
}
if vv, replaced := stringifyMapKeys(v); replaced {
res[ks] = vv
} else {
res[ks] = v
}
}
return res, true
}
return nil, false
}