textfile collector: Ensure that only UTF8 textfiles are parsed
Signed-off-by: Dave Henderson <dhenderson@gmail.com>
This commit is contained in:
parent
f27fdbbbf5
commit
a70c57ffd1
|
@ -237,7 +237,12 @@ fileLoop:
|
|||
continue
|
||||
}
|
||||
var parser expfmt.TextParser
|
||||
r := utfbom.SkipOnly(carriageReturnFilteringReader{r: file})
|
||||
r, encoding := utfbom.Skip(carriageReturnFilteringReader{r: file})
|
||||
if err := checkBOM(encoding); err != nil {
|
||||
log.Errorf("Invalid file encoding detected in %s: %s - file must be UTF8", path, err.Error())
|
||||
error = 1.0
|
||||
continue
|
||||
}
|
||||
parsedFamilies, err := parser.TextToMetricFamilies(r)
|
||||
file.Close()
|
||||
if err != nil {
|
||||
|
@ -281,3 +286,21 @@ fileLoop:
|
|||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkBOM(encoding utfbom.Encoding) error {
|
||||
bom := ""
|
||||
switch encoding {
|
||||
case utfbom.Unknown, utfbom.UTF8:
|
||||
return nil
|
||||
case utfbom.UTF16BigEndian:
|
||||
bom = "UTF16BigEndian"
|
||||
case utfbom.UTF16LittleEndian:
|
||||
bom = "UTF16LittleEndian"
|
||||
case utfbom.UTF32BigEndian:
|
||||
bom = "UTF32BigEndian"
|
||||
case utfbom.UTF32LittleEndian:
|
||||
bom = "UTF32LittleEndian"
|
||||
}
|
||||
|
||||
return fmt.Errorf(bom)
|
||||
}
|
||||
|
|
|
@ -1,14 +1,15 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"strings"
|
||||
"github.com/dimchansky/utfbom"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCRFilter(t *testing.T) {
|
||||
sr := strings.NewReader("line 1\r\nline 2")
|
||||
cr := carriageReturnFilteringReader{ r: sr }
|
||||
cr := carriageReturnFilteringReader{r: sr}
|
||||
b, err := ioutil.ReadAll(cr)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
|
@ -17,4 +18,30 @@ func TestCRFilter(t *testing.T) {
|
|||
if string(b) != "line 1\nline 2" {
|
||||
t.Errorf("Unexpected output %q", b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckBOM(t *testing.T) {
|
||||
testdata := []struct {
|
||||
encoding utfbom.Encoding
|
||||
err string
|
||||
}{
|
||||
{utfbom.Unknown, ""},
|
||||
{utfbom.UTF8, ""},
|
||||
{utfbom.UTF16BigEndian, "UTF16BigEndian"},
|
||||
{utfbom.UTF16LittleEndian, "UTF16LittleEndian"},
|
||||
{utfbom.UTF32BigEndian, "UTF32BigEndian"},
|
||||
{utfbom.UTF32LittleEndian, "UTF32LittleEndian"},
|
||||
}
|
||||
for _, d := range testdata {
|
||||
err := checkBOM(d.encoding)
|
||||
if d.err == "" && err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if d.err != "" && err == nil {
|
||||
t.Errorf("Missing expected error %s", d.err)
|
||||
}
|
||||
if err != nil && !strings.Contains(err.Error(), d.err) {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue