textfile collector: Ensure that only UTF8 textfiles are parsed

Signed-off-by: Dave Henderson <dhenderson@gmail.com>
This commit is contained in:
Dave Henderson 2018-11-29 11:31:48 -05:00
parent f27fdbbbf5
commit a70c57ffd1
No known key found for this signature in database
GPG Key ID: 765A97405DCE5AFA
2 changed files with 55 additions and 5 deletions

View File

@ -237,7 +237,12 @@ fileLoop:
continue
}
var parser expfmt.TextParser
r := utfbom.SkipOnly(carriageReturnFilteringReader{r: file})
r, encoding := utfbom.Skip(carriageReturnFilteringReader{r: file})
if err := checkBOM(encoding); err != nil {
log.Errorf("Invalid file encoding detected in %s: %s - file must be UTF8", path, err.Error())
error = 1.0
continue
}
parsedFamilies, err := parser.TextToMetricFamilies(r)
file.Close()
if err != nil {
@ -281,3 +286,21 @@ fileLoop:
)
return nil
}
func checkBOM(encoding utfbom.Encoding) error {
bom := ""
switch encoding {
case utfbom.Unknown, utfbom.UTF8:
return nil
case utfbom.UTF16BigEndian:
bom = "UTF16BigEndian"
case utfbom.UTF16LittleEndian:
bom = "UTF16LittleEndian"
case utfbom.UTF32BigEndian:
bom = "UTF32BigEndian"
case utfbom.UTF32LittleEndian:
bom = "UTF32LittleEndian"
}
return fmt.Errorf(bom)
}

View File

@ -1,14 +1,15 @@
package collector
import (
"testing"
"strings"
"github.com/dimchansky/utfbom"
"io/ioutil"
"strings"
"testing"
)
func TestCRFilter(t *testing.T) {
sr := strings.NewReader("line 1\r\nline 2")
cr := carriageReturnFilteringReader{ r: sr }
cr := carriageReturnFilteringReader{r: sr}
b, err := ioutil.ReadAll(cr)
if err != nil {
t.Error(err)
@ -17,4 +18,30 @@ func TestCRFilter(t *testing.T) {
if string(b) != "line 1\nline 2" {
t.Errorf("Unexpected output %q", b)
}
}
}
func TestCheckBOM(t *testing.T) {
testdata := []struct {
encoding utfbom.Encoding
err string
}{
{utfbom.Unknown, ""},
{utfbom.UTF8, ""},
{utfbom.UTF16BigEndian, "UTF16BigEndian"},
{utfbom.UTF16LittleEndian, "UTF16LittleEndian"},
{utfbom.UTF32BigEndian, "UTF32BigEndian"},
{utfbom.UTF32LittleEndian, "UTF32LittleEndian"},
}
for _, d := range testdata {
err := checkBOM(d.encoding)
if d.err == "" && err != nil {
t.Error(err)
}
if d.err != "" && err == nil {
t.Errorf("Missing expected error %s", d.err)
}
if err != nil && !strings.Contains(err.Error(), d.err) {
t.Error(err)
}
}
}