cutil: add SplitBuffer and SplitSparseBuffer helper functions

Add SplitBuffer and SplitSparseBuffer functions for extracting a list
of strings from a single buffer, typically returned in C code, from
a single Go buffer. The SplitBuffer variant will return empty strings
if multiple nulls are found in sequence, assuming that the C code
packs data between on single null byte (expect the final byte).
The SplitSparseBuffer variant assumes that the C code may not
tightly pack the data with single null bytes and thus will not
return any empty strings (unless the input buffer is empty
or only contains nulls).
Most of the code in the go-ceph codebase is doing the latter but
probably should have been doing the former. Thus both approaches
are provided.

Signed-off-by: John Mulligan <jmulligan@redhat.com>
This commit is contained in:
John Mulligan 2020-06-19 13:38:28 -04:00 committed by John Mulligan
parent 97732ac885
commit e5d1a53060
2 changed files with 119 additions and 0 deletions

View File

@ -0,0 +1,49 @@
package cutil
import "C"
import (
"bytes"
)
// SplitBuffer splits a byte-slice buffer, typically returned from C code,
// into a slice of strings.
// The contents of the buffer are assumed to be null-byte separated.
// If the buffer contains a sequence of null-bytes it will assume that the
// "space" between the bytes are meant to be empty strings.
func SplitBuffer(b []byte) []string {
return splitBufStrings(b, true)
}
// SplitSparseBuffer splits a byte-slice buffer, typically returned from C code,
// into a slice of strings.
// The contents of the buffer are assumed to be null-byte separated.
// This function assumes that buffer to be "sparse" such that only non-null-byte
// strings will be returned, and no "empty" strings exist if null-bytes
// are found adjacent to each other.
func SplitSparseBuffer(b []byte) []string {
return splitBufStrings(b, false)
}
// If keepEmpty is true, empty substrings will be returned, by default they are
// excluded from the results.
// This is almost certainly a suboptimal implementation, especially for
// keepEmpty=true case. Optimizing the functions is a job for another day.
func splitBufStrings(b []byte, keepEmpty bool) []string {
values := make([]string, 0)
// the final null byte should be the terminating null in C
// we never want to preserve the empty string after it
if len(b) > 0 && b[len(b)-1] == 0 {
b = b[:len(b)-1]
}
if len(b) == 0 {
return values
}
for _, s := range bytes.Split(b, []byte{0}) {
if !keepEmpty && len(s) == 0 {
continue
}
values = append(values, string(s))
}
return values
}

View File

@ -0,0 +1,70 @@
package cutil
import (
"testing"
"github.com/stretchr/testify/assert"
)
var tbl = []struct {
val []byte
res1 []string
res2 []string
}{
// simple inputs
{
val: []byte("foo\x00bar\x00baz\x00"),
res1: []string{"foo", "bar", "baz"},
res2: []string{"foo", "bar", "baz"},
},
// no trailing null bytes
{
val: []byte("meow mix"),
res1: []string{"meow mix"},
res2: []string{"meow mix"},
},
// one item
{
val: []byte("fancy feast\x00"),
res1: []string{"fancy feast"},
res2: []string{"fancy feast"},
},
// nuttin dare
{
val: []byte(""),
res1: []string{},
res2: []string{},
},
// almost nuttin
{
val: []byte("\x00"),
res1: []string{},
res2: []string{},
},
// how multiple adjacent nulls are handled
{
val: []byte("kibbles\x00\x00and\x00bits"),
res1: []string{"kibbles", "and", "bits"},
res2: []string{"kibbles", "", "and", "bits"},
},
{
val: []byte("dinki\x00\x00\x00di\x00\x00"),
res1: []string{"dinki", "di"},
res2: []string{"dinki", "", "", "di", ""},
},
// starting with a null
{
val: []byte("\x00caesar\x00"),
res1: []string{"caesar"},
res2: []string{"", "caesar"},
},
}
func TestSplitBufStrings(t *testing.T) {
for _, x := range tbl {
assert.Equal(t, x.res1, SplitSparseBuffer(x.val))
}
for _, x := range tbl {
assert.Equal(t, x.res2, SplitBuffer(x.val))
}
}