From db72a1f075275dfa16a124acac9fc9e74a633637 Mon Sep 17 00:00:00 2001
From: Joe Mooring <joe@mooring.com>
Date: Mon, 21 Apr 2025 10:33:20 -0700
Subject: [PATCH] parser/metadecoders: Add CSV targetType (map or slice) option
 to transform.Unmarshal

Closes #8859
---
 parser/metadecoders/decoder.go              |  59 ++++++++--
 tpl/transform/transform_integration_test.go | 116 ++++++++++++++++++++
 2 files changed, 166 insertions(+), 9 deletions(-)

diff --git a/parser/metadecoders/decoder.go b/parser/metadecoders/decoder.go
index 1655ea513..419fbf4d2 100644
--- a/parser/metadecoders/decoder.go
+++ b/parser/metadecoders/decoder.go
@@ -36,16 +36,22 @@ import (
 
 // Decoder provides some configuration options for the decoders.
 type Decoder struct {
-	// Delimiter is the field delimiter used in the CSV decoder. It defaults to ','.
+	// Delimiter is the field delimiter. Used in the CSV decoder. Default is
+	// ','.
 	Delimiter rune
 
-	// Comment, if not 0, is the comment character used in the CSV decoder. Lines beginning with the
-	// Comment character without preceding whitespace are ignored.
+	// Comment, if not 0, is the comment character. Lines beginning with the
+	// Comment character without preceding whitespace are ignored. Used in the
+	// CSV decoder.
 	Comment rune
 
 	// If true, a quote may appear in an unquoted field and a non-doubled quote
-	// may appear in a quoted field. It defaults to false.
+	// may appear in a quoted field. Used in the CSV decoder. Default is false.
 	LazyQuotes bool
+
+	// The target data type, either slice or map. Used in the CSV decoder.
+	// Default is slice.
+	TargetType string
 }
 
 // OptionsKey is used in cache keys.
@@ -54,12 +60,14 @@ func (d Decoder) OptionsKey() string {
 	sb.WriteRune(d.Delimiter)
 	sb.WriteRune(d.Comment)
 	sb.WriteString(strconv.FormatBool(d.LazyQuotes))
+	sb.WriteString(d.TargetType)
 	return sb.String()
 }
 
 // Default is a Decoder in its default configuration.
 var Default = Decoder{
-	Delimiter: ',',
+	Delimiter:  ',',
+	TargetType: "slice",
 }
 
 // UnmarshalToMap will unmarshall data in format f into a new map. This is
@@ -122,7 +130,14 @@ func (d Decoder) Unmarshal(data []byte, f Format) (any, error) {
 	if len(data) == 0 {
 		switch f {
 		case CSV:
-			return make([][]string, 0), nil
+			switch d.TargetType {
+			case "map":
+				return make(map[string]any), nil
+			case "slice":
+				return make([][]string, 0), nil
+			default:
+				return nil, fmt.Errorf("invalid targetType: expected either slice or map, received %s", d.TargetType)
+			}
 		default:
 			return make(map[string]any), nil
 		}
@@ -232,10 +247,36 @@ func (d Decoder) unmarshalCSV(data []byte, v any) error {
 
 	switch vv := v.(type) {
 	case *any:
-		*vv = records
-	default:
-		return fmt.Errorf("CSV cannot be unmarshaled into %T", v)
+		switch d.TargetType {
+		case "map":
+			if len(records) < 2 {
+				return fmt.Errorf("cannot unmarshal CSV into %T: expected at least a header row and one data row", v)
+			}
 
+			seen := make(map[string]bool, len(records[0]))
+			for _, fieldName := range records[0] {
+				if seen[fieldName] {
+					return fmt.Errorf("cannot unmarshal CSV into %T: header row contains duplicate field names", v)
+				}
+				seen[fieldName] = true
+			}
+
+			sm := make([]map[string]string, len(records)-1)
+			for i, record := range records[1:] {
+				m := make(map[string]string, len(records[0]))
+				for j, col := range record {
+					m[records[0][j]] = col
+				}
+				sm[i] = m
+			}
+			*vv = sm
+		case "slice":
+			*vv = records
+		default:
+			return fmt.Errorf("cannot unmarshal CSV into %T: invalid targetType: expected either slice or map, received %s", v, d.TargetType)
+		}
+	default:
+		return fmt.Errorf("cannot unmarshal CSV into %T", v)
 	}
 
 	return nil
diff --git a/tpl/transform/transform_integration_test.go b/tpl/transform/transform_integration_test.go
index ceb80309b..2b3c7d40e 100644
--- a/tpl/transform/transform_integration_test.go
+++ b/tpl/transform/transform_integration_test.go
@@ -379,3 +379,119 @@ Markdown: {{ $markdown }}|
 
 	b.AssertFileContent("public/index.html", "Markdown: ## Heading 2\n|")
 }
+
+func TestUnmarshalCSV(t *testing.T) {
+	t.Parallel()
+
+	files := `
+-- hugo.toml --
+disableKinds = ['page','rss','section','sitemap','taxonomy','term']
+-- layouts/all.html --
+{{ $opts := OPTS }}
+{{ with resources.Get "pets.csv" | transform.Unmarshal $opts }}
+  {{ jsonify . }}
+{{ end }}
+-- assets/pets.csv --
+DATA
+`
+
+	// targetType = map
+	f := strings.ReplaceAll(files, "OPTS", `dict "targetType" "map"`)
+	f = strings.ReplaceAll(f, "DATA",
+		"name,type,breed,age\nSpot,dog,Collie,3\nFelix,cat,Malicious,7",
+	)
+	b := hugolib.Test(t, f)
+	b.AssertFileContent("public/index.html",
+		`[{"age":"3","breed":"Collie","name":"Spot","type":"dog"},{"age":"7","breed":"Malicious","name":"Felix","type":"cat"}]`,
+	)
+
+	// targetType = map (no data)
+	f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "map"`)
+	f = strings.ReplaceAll(f, "DATA", "")
+	b = hugolib.Test(t, f)
+	b.AssertFileContent("public/index.html", "")
+
+	// targetType = slice
+	f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "slice"`)
+	f = strings.ReplaceAll(f, "DATA",
+		"name,type,breed,age\nSpot,dog,Collie,3\nFelix,cat,Malicious,7",
+	)
+	b = hugolib.Test(t, f)
+	b.AssertFileContent("public/index.html",
+		`[["name","type","breed","age"],["Spot","dog","Collie","3"],["Felix","cat","Malicious","7"]]`,
+	)
+
+	// targetType = slice (no data)
+	f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "slice"`)
+	f = strings.ReplaceAll(f, "DATA", "")
+	b = hugolib.Test(t, f)
+	b.AssertFileContent("public/index.html", "")
+
+	// targetType not specified
+	f = strings.ReplaceAll(files, "OPTS", "dict")
+	f = strings.ReplaceAll(f, "DATA",
+		"name,type,breed,age\nSpot,dog,Collie,3\nFelix,cat,Malicious,7",
+	)
+	b = hugolib.Test(t, f)
+	b.AssertFileContent("public/index.html",
+		`[["name","type","breed","age"],["Spot","dog","Collie","3"],["Felix","cat","Malicious","7"]]`,
+	)
+
+	// targetType not specified (no data)
+	f = strings.ReplaceAll(files, "OPTS", "dict")
+	f = strings.ReplaceAll(f, "DATA", "")
+	b = hugolib.Test(t, f)
+	b.AssertFileContent("public/index.html", "")
+
+	// targetType = foo
+	f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "foo"`)
+	_, err := hugolib.TestE(t, f)
+	if err == nil {
+		t.Errorf("expected error")
+	} else {
+		if !strings.Contains(err.Error(), `invalid targetType: expected either slice or map, received foo`) {
+			t.Log(err.Error())
+			t.Errorf("error message does not match expected error message")
+		}
+	}
+
+	// targetType = foo (no data)
+	f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "foo"`)
+	f = strings.ReplaceAll(f, "DATA", "")
+	_, err = hugolib.TestE(t, f)
+	if err == nil {
+		t.Errorf("expected error")
+	} else {
+		if !strings.Contains(err.Error(), `invalid targetType: expected either slice or map, received foo`) {
+			t.Log(err.Error())
+			t.Errorf("error message does not match expected error message")
+		}
+	}
+
+	// targetType = map (error: expected at least a header row and one data row)
+	f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "map"`)
+	_, err = hugolib.TestE(t, f)
+	if err == nil {
+		t.Errorf("expected error")
+	} else {
+		if !strings.Contains(err.Error(), `expected at least a header row and one data row`) {
+			t.Log(err.Error())
+			t.Errorf("error message does not match expected error message")
+		}
+	}
+
+	// targetType = map (error: header row contains duplicate field names)
+	f = strings.ReplaceAll(files, "OPTS", `dict "targetType" "map"`)
+	f = strings.ReplaceAll(f, "DATA",
+		"name,name,breed,age\nSpot,dog,Collie,3\nFelix,cat,Malicious,7",
+	)
+	_, err = hugolib.TestE(t, f)
+	if err == nil {
+		t.Errorf("expected error")
+	} else {
+		if !strings.Contains(err.Error(), `header row contains duplicate field names`) {
+			t.Log(err.Error())
+			t.Errorf("error message does not match expected error message")
+		}
+	}
+}