diff --git a/collector/textfile.go b/collector/textfile.go index 02cb7a40..b1bd33c0 100644 --- a/collector/textfile.go +++ b/collector/textfile.go @@ -237,7 +237,12 @@ fileLoop: continue } var parser expfmt.TextParser - r := utfbom.SkipOnly(carriageReturnFilteringReader{r: file}) + r, encoding := utfbom.Skip(carriageReturnFilteringReader{r: file}) + if err := checkBOM(encoding); err != nil { + log.Errorf("Invalid file encoding detected in %s: %s - file must be UTF8", path, err.Error()) + error = 1.0 + continue + } parsedFamilies, err := parser.TextToMetricFamilies(r) file.Close() if err != nil { @@ -281,3 +286,21 @@ fileLoop: ) return nil } + +func checkBOM(encoding utfbom.Encoding) error { + bom := "" + switch encoding { + case utfbom.Unknown, utfbom.UTF8: + return nil + case utfbom.UTF16BigEndian: + bom = "UTF16BigEndian" + case utfbom.UTF16LittleEndian: + bom = "UTF16LittleEndian" + case utfbom.UTF32BigEndian: + bom = "UTF32BigEndian" + case utfbom.UTF32LittleEndian: + bom = "UTF32LittleEndian" + } + + return fmt.Errorf(bom) +} diff --git a/collector/textfile_test.go b/collector/textfile_test.go index 231aefff..d3717146 100644 --- a/collector/textfile_test.go +++ b/collector/textfile_test.go @@ -1,14 +1,15 @@ package collector import ( - "testing" - "strings" + "github.com/dimchansky/utfbom" "io/ioutil" + "strings" + "testing" ) func TestCRFilter(t *testing.T) { sr := strings.NewReader("line 1\r\nline 2") - cr := carriageReturnFilteringReader{ r: sr } + cr := carriageReturnFilteringReader{r: sr} b, err := ioutil.ReadAll(cr) if err != nil { t.Error(err) @@ -17,4 +18,30 @@ func TestCRFilter(t *testing.T) { if string(b) != "line 1\nline 2" { t.Errorf("Unexpected output %q", b) } -} \ No newline at end of file +} + +func TestCheckBOM(t *testing.T) { + testdata := []struct { + encoding utfbom.Encoding + err string + }{ + {utfbom.Unknown, ""}, + {utfbom.UTF8, ""}, + {utfbom.UTF16BigEndian, "UTF16BigEndian"}, + {utfbom.UTF16LittleEndian, "UTF16LittleEndian"}, + {utfbom.UTF32BigEndian, "UTF32BigEndian"}, + {utfbom.UTF32LittleEndian, "UTF32LittleEndian"}, + } + for _, d := range testdata { + err := checkBOM(d.encoding) + if d.err == "" && err != nil { + t.Error(err) + } + if d.err != "" && err == nil { + t.Errorf("Missing expected error %s", d.err) + } + if err != nil && !strings.Contains(err.Error(), d.err) { + t.Error(err) + } + } +}