cutil: add SplitBuffer and SplitSparseBuffer helper functions

Add SplitBuffer and SplitSparseBuffer functions for extracting a list of strings from a single buffer, typically returned in C code, from a single Go buffer. The SplitBuffer variant will return empty strings if multiple nulls are found in sequence, assuming that the C code packs data between on single null byte (expect the final byte). The SplitSparseBuffer variant assumes that the C code may not tightly pack the data with single null bytes and thus will not return any empty strings (unless the input buffer is empty or only contains nulls). Most of the code in the go-ceph codebase is doing the latter but probably should have been doing the former. Thus both approaches are provided. Signed-off-by: John Mulligan <jmulligan@redhat.com>
2025-01-09 07:19:58 +00:00 · 2020-06-19 13:38:28 -04:00 · 2020-06-19 13:38:28 -04:00 · e5d1a53060
commit e5d1a53060
parent 97732ac885
2 changed files with 119 additions and 0 deletions
--- a/internal/cutil/splitbuf.go
+++ b/internal/cutil/splitbuf.go
@ -0,0 +1,49 @@
+package cutil
+
+import "C"
+
+import (
+	"bytes"
+)
+
+// SplitBuffer splits a byte-slice buffer, typically returned from C code,
+// into a slice of strings.
+// The contents of the buffer are assumed to be null-byte separated.
+// If the buffer contains a sequence of null-bytes it will assume that the
+// "space" between the bytes are meant to be empty strings.
+func SplitBuffer(b []byte) []string {
+	return splitBufStrings(b, true)
+}
+
+// SplitSparseBuffer splits a byte-slice buffer, typically returned from C code,
+// into a slice of strings.
+// The contents of the buffer are assumed to be null-byte separated.
+// This function assumes that buffer to be "sparse" such that only non-null-byte
+// strings will be returned, and no "empty" strings exist if null-bytes
+// are found adjacent to each other.
+func SplitSparseBuffer(b []byte) []string {
+	return splitBufStrings(b, false)
+}
+
+// If keepEmpty is true, empty substrings will be returned, by default they are
+// excluded from the results.
+// This is almost certainly a suboptimal implementation, especially for
+// keepEmpty=true case. Optimizing the functions is a job for another day.
+func splitBufStrings(b []byte, keepEmpty bool) []string {
+	values := make([]string, 0)
+	// the final null byte should be the terminating null in C
+	// we never want to preserve the empty string after it
+	if len(b) > 0 && b[len(b)-1] == 0 {
+		b = b[:len(b)-1]
+	}
+	if len(b) == 0 {
+		return values
+	}
+	for _, s := range bytes.Split(b, []byte{0}) {
+		if !keepEmpty && len(s) == 0 {
+			continue
+		}
+		values = append(values, string(s))
+	}
+	return values
+}
--- a/internal/cutil/splitbuf_test.go
+++ b/internal/cutil/splitbuf_test.go
@ -0,0 +1,70 @@
+package cutil
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+var tbl = []struct {
+	val  []byte
+	res1 []string
+	res2 []string
+}{
+	// simple inputs
+	{
+		val:  []byte("foo\x00bar\x00baz\x00"),
+		res1: []string{"foo", "bar", "baz"},
+		res2: []string{"foo", "bar", "baz"},
+	},
+	// no trailing null bytes
+	{
+		val:  []byte("meow mix"),
+		res1: []string{"meow mix"},
+		res2: []string{"meow mix"},
+	},
+	// one item
+	{
+		val:  []byte("fancy feast\x00"),
+		res1: []string{"fancy feast"},
+		res2: []string{"fancy feast"},
+	},
+	// nuttin dare
+	{
+		val:  []byte(""),
+		res1: []string{},
+		res2: []string{},
+	},
+	// almost nuttin
+	{
+		val:  []byte("\x00"),
+		res1: []string{},
+		res2: []string{},
+	},
+	// how multiple adjacent nulls are handled
+	{
+		val:  []byte("kibbles\x00\x00and\x00bits"),
+		res1: []string{"kibbles", "and", "bits"},
+		res2: []string{"kibbles", "", "and", "bits"},
+	},
+	{
+		val:  []byte("dinki\x00\x00\x00di\x00\x00"),
+		res1: []string{"dinki", "di"},
+		res2: []string{"dinki", "", "", "di", ""},
+	},
+	// starting with a null
+	{
+		val:  []byte("\x00caesar\x00"),
+		res1: []string{"caesar"},
+		res2: []string{"", "caesar"},
+	},
+}
+
+func TestSplitBufStrings(t *testing.T) {
+	for _, x := range tbl {
+		assert.Equal(t, x.res1, SplitSparseBuffer(x.val))
+	}
+	for _, x := range tbl {
+		assert.Equal(t, x.res2, SplitBuffer(x.val))
+	}
+}