diff --git a/README.md b/README.md index a4362b7..e32f4d5 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,35 @@ +[![GoDoc](https://godoc.org/facette.io/natsort?status.svg)](https://godoc.org/facette.io/natsort) + + # natsort: natural strings sorting in Go -This is an implementation of the "Alphanum Algorithm" by [Dave Koelle][0] in Go. +This is NOT an implementation of the "Alphanum Algorithm" by [Dave Koelle][0] in Go, but something slightly better. -[![GoDoc](https://godoc.org/facette.io/natsort?status.svg)](https://godoc.org/facette.io/natsort) +## Benchmark + +This was modified to use something else than chunks and Dave Koelle's algorithm. Not only the previous version used regular expressions to detect numbers, but it would also allocate extra memory to store the parsed values and was not optimized at all. This version is more optimized. + +### Before + +``` +goos: linux +goarch: amd64 +pkg: github.com/MagicalTux/natsort +cpu: Intel(R) Core(TM) i7-8700K CPU @ 3.70GHz +BenchmarkSort1-12 6136 300245 ns/op +PASS +``` + +### After + +``` +goos: linux +goarch: amd64 +pkg: github.com/MagicalTux/natsort +cpu: Intel(R) Core(TM) i7-8700K CPU @ 3.70GHz +BenchmarkSort1-12 606818 2013 ns/op +PASS +``` ## Usage diff --git a/natsort.go b/natsort.go index 5c3c28d..b102c01 100644 --- a/natsort.go +++ b/natsort.go @@ -2,9 +2,8 @@ package natsort import ( - "regexp" "sort" - "strconv" + "strings" ) type stringSlice []string @@ -21,12 +20,6 @@ func (s stringSlice) Swap(a, b int) { s[a], s[b] = s[b], s[a] } -var chunkifyRegexp = regexp.MustCompile(`(\d+|\D+)`) - -func chunkify(s string) []string { - return chunkifyRegexp.FindAllString(s, -1) -} - // Sort sorts a list of strings in a natural order func Sort(l []string) { sort.Sort(stringSlice(l)) @@ -34,52 +27,87 @@ func Sort(l []string) { // Compare returns true if the first string precedes the second one according to natural order func Compare(a, b string) bool { - chunksA := chunkify(a) - chunksB := chunkify(b) - - nChunksA := len(chunksA) - nChunksB := len(chunksB) - - for i := range chunksA { - if i >= nChunksB { + ln_a := len(a) + ln_b := len(b) + posa := 0 + posb := 0 + + for { + if ln_a <= posa { + if ln_b <= posb { + // eof on both at the same time (equal) + return false + } + return true + } else if ln_b <= posb { + // eof on b return false } - aInt, aErr := strconv.Atoi(chunksA[i]) - bInt, bErr := strconv.Atoi(chunksB[i]) + av, bv := a[posa], b[posb] - // If both chunks are numeric, compare them as integers - if aErr == nil && bErr == nil { - if aInt == bInt { - if i == nChunksA-1 { - // We reached the last chunk of A, thus B is greater than A - return true - } else if i == nChunksB-1 { - // We reached the last chunk of B, thus A is greater than B - return false + if av >= '0' && av <= '9' && bv >= '0' && bv <= '9' { + // go into numeric mode + intlna := 1 + intlnb := 1 + for { + if posa+intlna >= ln_a { + break + } + x := a[posa+intlna] + if av == '0' { + posa += 1 + av = x + continue + } + if x >= '0' && x <= '9' { + intlna += 1 + } else { + break } - - continue } - - return aInt < bInt - } - - // So far both strings are equal, continue to next chunk - if chunksA[i] == chunksB[i] { - if i == nChunksA-1 { - // We reached the last chunk of A, thus B is greater than A + for { + if posb+intlnb >= ln_b { + break + } + x := b[posb+intlnb] + if bv == '0' { + posb += 1 + bv = x + continue + } + if x >= '0' && x <= '9' { + intlnb += 1 + } else { + break + } + } + if intlnb > intlna { + // length of a value is longer, means it's a bigger number return true - } else if i == nChunksB-1 { - // We reached the last chunk of B, thus A is greater than B + } else if intlna > intlnb { return false } + // both have same length, let's compare as string + v := strings.Compare(a[posa:posa+intlna], b[posb:posb+intlnb]) + if v < 0 { + return true + } else if v > 0 { + return false + } + // equale + posa += intlna + posb += intlnb + continue + } + if av == bv { + posa += 1 + posb += 1 continue } - return chunksA[i] < chunksB[i] + return av < bv } - return false } diff --git a/natsort_test.go b/natsort_test.go index 0f25809..0c2ac67 100644 --- a/natsort_test.go +++ b/natsort_test.go @@ -8,6 +8,7 @@ import ( var testList = []string{ "1000X Radonius Maximus", + "000050X Radonius", "10X Radonius", "200X Radonius", "20X Radonius", @@ -51,6 +52,7 @@ func Test_Sort1(t *testing.T) { "20X Radonius Prime", "30X Radonius", "40X Radonius", + "000050X Radonius", "200X Radonius", "1000X Radonius Maximus", "Allegia 6R Clasteron",