Consolidate all hashing to the common/hashing package

And remove now unsued hashing funcs.
This commit is contained in:
Bjørn Erik Pedersen 2024-07-30 15:47:34 +02:00
parent d5eda13cb2
commit e67886c038
125 changed files with 177 additions and 368 deletions

View file

@ -15,8 +15,6 @@ package helpers
import (
"bytes"
"crypto/md5"
"encoding/hex"
"fmt"
"io"
"net"
@ -257,66 +255,6 @@ func SliceToLower(s []string) []string {
return l
}
// XXHashFromReader creates a xxHash hash from the given reader.
// MD5String takes a string and returns its MD5 hash.
func MD5String(f string) string {
h := md5.New()
h.Write([]byte(f))
return hex.EncodeToString(h.Sum([]byte{}))
}
// MD5FromReaderFast creates a MD5 hash from the given file. It only reads parts of
// the file for speed, so don't use it if the files are very subtly different.
// It will not close the file.
// It will return the MD5 hash and the size of r in bytes.
func MD5FromReaderFast(r io.ReadSeeker) (string, int64, error) {
const (
// Do not change once set in stone!
maxChunks = 8
peekSize = 64
seek = 2048
)
h := md5.New()
buff := make([]byte, peekSize)
for i := 0; i < maxChunks; i++ {
if i > 0 {
_, err := r.Seek(seek, 0)
if err != nil {
if err == io.EOF {
break
}
return "", 0, err
}
}
_, err := io.ReadAtLeast(r, buff, peekSize)
if err != nil {
if err == io.EOF || err == io.ErrUnexpectedEOF {
h.Write(buff)
break
}
return "", 0, err
}
h.Write(buff)
}
size, _ := r.Seek(0, io.SeekEnd)
return hex.EncodeToString(h.Sum(nil)), size, nil
}
// MD5FromReader creates a MD5 hash from the given reader.
func MD5FromReader(r io.Reader) (string, error) {
h := md5.New()
if _, err := io.Copy(h, r); err != nil {
return "", nil
}
return hex.EncodeToString(h.Sum(nil)), nil
}
// IsWhitespace determines if the given rune is whitespace.
func IsWhitespace(r rune) bool {
return r == ' ' || r == '\t' || r == '\n' || r == '\r'

View file

@ -14,7 +14,6 @@
package helpers_test
import (
"fmt"
"reflect"
"strings"
"testing"
@ -22,7 +21,6 @@ import (
"github.com/gohugoio/hugo/helpers"
qt "github.com/frankban/quicktest"
"github.com/spf13/afero"
)
func TestResolveMarkup(t *testing.T) {
@ -256,93 +254,6 @@ func TestUniqueStringsSorted(t *testing.T) {
c.Assert(helpers.UniqueStringsSorted(nil), qt.IsNil)
}
func TestFastMD5FromFile(t *testing.T) {
fs := afero.NewMemMapFs()
if err := afero.WriteFile(fs, "small.txt", []byte("abc"), 0o777); err != nil {
t.Fatal(err)
}
if err := afero.WriteFile(fs, "small2.txt", []byte("abd"), 0o777); err != nil {
t.Fatal(err)
}
if err := afero.WriteFile(fs, "bigger.txt", []byte(strings.Repeat("a bc d e", 100)), 0o777); err != nil {
t.Fatal(err)
}
if err := afero.WriteFile(fs, "bigger2.txt", []byte(strings.Repeat("c d e f g", 100)), 0o777); err != nil {
t.Fatal(err)
}
c := qt.New(t)
sf1, err := fs.Open("small.txt")
c.Assert(err, qt.IsNil)
sf2, err := fs.Open("small2.txt")
c.Assert(err, qt.IsNil)
bf1, err := fs.Open("bigger.txt")
c.Assert(err, qt.IsNil)
bf2, err := fs.Open("bigger2.txt")
c.Assert(err, qt.IsNil)
defer sf1.Close()
defer sf2.Close()
defer bf1.Close()
defer bf2.Close()
m1, _, err := helpers.MD5FromReaderFast(sf1)
c.Assert(err, qt.IsNil)
c.Assert(m1, qt.Equals, "e9c8989b64b71a88b4efb66ad05eea96")
m2, _, err := helpers.MD5FromReaderFast(sf2)
c.Assert(err, qt.IsNil)
c.Assert(m2, qt.Not(qt.Equals), m1)
m3, _, err := helpers.MD5FromReaderFast(bf1)
c.Assert(err, qt.IsNil)
c.Assert(m3, qt.Not(qt.Equals), m2)
m4, _, err := helpers.MD5FromReaderFast(bf2)
c.Assert(err, qt.IsNil)
c.Assert(m4, qt.Not(qt.Equals), m3)
m5, err := helpers.MD5FromReader(bf2)
c.Assert(err, qt.IsNil)
c.Assert(m5, qt.Not(qt.Equals), m4)
}
func BenchmarkMD5FromFileFast(b *testing.B) {
fs := afero.NewMemMapFs()
for _, full := range []bool{false, true} {
b.Run(fmt.Sprintf("full=%t", full), func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
if err := afero.WriteFile(fs, "file.txt", []byte(strings.Repeat("1234567890", 2000)), 0o777); err != nil {
b.Fatal(err)
}
f, err := fs.Open("file.txt")
if err != nil {
b.Fatal(err)
}
b.StartTimer()
if full {
if _, err := helpers.MD5FromReader(f); err != nil {
b.Fatal(err)
}
} else {
if _, _, err := helpers.MD5FromReaderFast(f); err != nil {
b.Fatal(err)
}
}
f.Close()
}
})
}
}
func BenchmarkUniqueStrings(b *testing.B) {
input := []string{"a", "b", "d", "e", "d", "h", "a", "i"}