From db72a1f075275dfa16a124acac9fc9e74a633637 Mon Sep 17 00:00:00 2001 From: Joe Mooring Date: Mon, 21 Apr 2025 10:33:20 -0700 Subject: [PATCH] parser/metadecoders: Add CSV targetType (map or slice) option to transform.Unmarshal Closes #8859 --- parser/metadecoders/decoder.go | 59 ++++++++-- tpl/transform/transform_integration_test.go | 116 ++++++++++++++++++++ 2 files changed, 166 insertions(+), 9 deletions(-) diff --git a/parser/metadecoders/decoder.go b/parser/metadecoders/decoder.go index 1655ea513..419fbf4d2 100644 --- a/parser/metadecoders/decoder.go +++ b/parser/metadecoders/decoder.go @@ -36,16 +36,22 @@ import ( // Decoder provides some configuration options for the decoders. type Decoder struct { - // Delimiter is the field delimiter used in the CSV decoder. It defaults to ','. + // Delimiter is the field delimiter. Used in the CSV decoder. Default is + // ','. Delimiter rune - // Comment, if not 0, is the comment character used in the CSV decoder. Lines beginning with the - // Comment character without preceding whitespace are ignored. + // Comment, if not 0, is the comment character. Lines beginning with the + // Comment character without preceding whitespace are ignored. Used in the + // CSV decoder. Comment rune // If true, a quote may appear in an unquoted field and a non-doubled quote - // may appear in a quoted field. It defaults to false. + // may appear in a quoted field. Used in the CSV decoder. Default is false. LazyQuotes bool + + // The target data type, either slice or map. Used in the CSV decoder. + // Default is slice. + TargetType string } // OptionsKey is used in cache keys. @@ -54,12 +60,14 @@ func (d Decoder) OptionsKey() string { sb.WriteRune(d.Delimiter) sb.WriteRune(d.Comment) sb.WriteString(strconv.FormatBool(d.LazyQuotes)) + sb.WriteString(d.TargetType) return sb.String() } // Default is a Decoder in its default configuration. var Default = Decoder{ - Delimiter: ',', + Delimiter: ',', + TargetType: "slice", } // UnmarshalToMap will unmarshall data in format f into a new map. This is @@ -122,7 +130,14 @@ func (d Decoder) Unmarshal(data []byte, f Format) (any, error) { if len(data) == 0 { switch f { case CSV: - return make([][]string, 0), nil + switch d.TargetType { + case "map": + return make(map[string]any), nil + case "slice": + return make([][]string, 0), nil + default: + return nil, fmt.Errorf("invalid targetType: expected either slice or map, received %s", d.TargetType) + } default: return make(map[string]any), nil } @@ -232,10 +247,36 @@ func (d Decoder) unmarshalCSV(data []byte, v any) error { switch vv := v.(type) { case *any: - *vv = records - default: - return fmt.Errorf("CSV cannot be unmarshaled into %T", v) + switch d.TargetType { + case "map": + if len(records) < 2 { + return fmt.Errorf("cannot unmarshal CSV into %T: expected at least a header row and one data row", v) + } + seen := make(map[string]bool, len(records[0])) + for _, fieldName := range records[0] { + if seen[fieldName] { + return fmt.Errorf("cannot unmarshal CSV into %T: header row contains duplicate field names", v) + } + seen[fieldName] = true + } + + sm := make([]map[string]string, len(records)-1) + for i, record := range records[1:] { + m := make(map[string]string, len(records[0])) + for j, col := range record { + m[records[0][j]] = col + } + sm[i] = m + } + *vv = sm + case "slice": + *vv = records + default: + return fmt.Errorf("cannot unmarshal CSV into %T: invalid targetType: expected either slice or map, received %s", v, d.TargetType) + } + default: + return fmt.Errorf("cannot unmarshal CSV into %T", v) } return nil diff --git a/tpl/transform/transform_integration_test.go b/tpl/transform/transform_integration_test.go index ceb80309b..2b3c7d40e 100644 --- a/tpl/transform/transform_integration_test.go +++ b/tpl/transform/transform_integration_test.go @@ -379,3 +379,119 @@ Markdown: {{ $markdown }}| b.AssertFileContent("public/index.html", "Markdown: ## Heading 2\n|") } + +func TestUnmarshalCSV(t *testing.T) { + t.Parallel() + + files := ` +-- hugo.toml -- +disableKinds = ['page','rss','section','sitemap','taxonomy','term'] +-- layouts/all.html -- +{{ $opts := OPTS }} +{{ with resources.Get "pets.csv" | transform.Unmarshal $opts }} + {{ jsonify . }} +{{ end }} +-- assets/pets.csv -- +DATA +` + + // targetType = map + f := strings.ReplaceAll(files, "OPTS", `dict "targetType" "map"`) + f = strings.ReplaceAll(f, "DATA", + "name,type,breed,age\nSpot,dog,Collie,3\nFelix,cat,Malicious,7", + ) + b := hugolib.Test(t, f) + b.AssertFileContent("public/index.html", + `[{"age":"3","breed":"Collie","name":"Spot","type":"dog"},{"age":"7","breed":"Malicious","name":"Felix","type":"cat"}]`, + ) + + // targetType = map (no data) + f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "map"`) + f = strings.ReplaceAll(f, "DATA", "") + b = hugolib.Test(t, f) + b.AssertFileContent("public/index.html", "") + + // targetType = slice + f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "slice"`) + f = strings.ReplaceAll(f, "DATA", + "name,type,breed,age\nSpot,dog,Collie,3\nFelix,cat,Malicious,7", + ) + b = hugolib.Test(t, f) + b.AssertFileContent("public/index.html", + `[["name","type","breed","age"],["Spot","dog","Collie","3"],["Felix","cat","Malicious","7"]]`, + ) + + // targetType = slice (no data) + f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "slice"`) + f = strings.ReplaceAll(f, "DATA", "") + b = hugolib.Test(t, f) + b.AssertFileContent("public/index.html", "") + + // targetType not specified + f = strings.ReplaceAll(files, "OPTS", "dict") + f = strings.ReplaceAll(f, "DATA", + "name,type,breed,age\nSpot,dog,Collie,3\nFelix,cat,Malicious,7", + ) + b = hugolib.Test(t, f) + b.AssertFileContent("public/index.html", + `[["name","type","breed","age"],["Spot","dog","Collie","3"],["Felix","cat","Malicious","7"]]`, + ) + + // targetType not specified (no data) + f = strings.ReplaceAll(files, "OPTS", "dict") + f = strings.ReplaceAll(f, "DATA", "") + b = hugolib.Test(t, f) + b.AssertFileContent("public/index.html", "") + + // targetType = foo + f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "foo"`) + _, err := hugolib.TestE(t, f) + if err == nil { + t.Errorf("expected error") + } else { + if !strings.Contains(err.Error(), `invalid targetType: expected either slice or map, received foo`) { + t.Log(err.Error()) + t.Errorf("error message does not match expected error message") + } + } + + // targetType = foo (no data) + f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "foo"`) + f = strings.ReplaceAll(f, "DATA", "") + _, err = hugolib.TestE(t, f) + if err == nil { + t.Errorf("expected error") + } else { + if !strings.Contains(err.Error(), `invalid targetType: expected either slice or map, received foo`) { + t.Log(err.Error()) + t.Errorf("error message does not match expected error message") + } + } + + // targetType = map (error: expected at least a header row and one data row) + f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "map"`) + _, err = hugolib.TestE(t, f) + if err == nil { + t.Errorf("expected error") + } else { + if !strings.Contains(err.Error(), `expected at least a header row and one data row`) { + t.Log(err.Error()) + t.Errorf("error message does not match expected error message") + } + } + + // targetType = map (error: header row contains duplicate field names) + f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "map"`) + f = strings.ReplaceAll(f, "DATA", + "name,name,breed,age\nSpot,dog,Collie,3\nFelix,cat,Malicious,7", + ) + _, err = hugolib.TestE(t, f) + if err == nil { + t.Errorf("expected error") + } else { + if !strings.Contains(err.Error(), `header row contains duplicate field names`) { + t.Log(err.Error()) + t.Errorf("error message does not match expected error message") + } + } +}