From ddea626de4cf8397cd84c1db84579d01c296de05 Mon Sep 17 00:00:00 2001 From: Thierry FOURNIER Date: Thu, 28 May 2015 16:00:28 +0200 Subject: [PATCH] MINOR: common: escape CSV strings This function checks a string for using it in a CSV output format. If the string contains one of the following four char <">, <,>, CR or LF, the string is encapsulated between <"> and the <"> are escaped by a <""> sequence. The rounding by <"> is optionnal. It can be canceled, forced or the function choose automatically the right way. --- include/common/standard.h | 27 +++++++++++++++++ src/standard.c | 64 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/include/common/standard.h b/include/common/standard.h index bb63535f72..759d0a11ca 100644 --- a/include/common/standard.h +++ b/include/common/standard.h @@ -377,6 +377,33 @@ char *encode_chunk(char *start, char *stop, const struct chunk *chunk); +/* Check a string for using it in a CSV output format. If the string contains + * one of the following four char <">, <,>, CR or LF, the string is + * encapsulated between <"> and the <"> are escaped by a <""> sequence. + * is the input string to be escaped. The function assumes that + * the input string is null-terminated. + * + * If is 0, the result is returned escaped but without double quote. + * Is it useful if the escaped string is used between double quotes in the + * format. + * + * printf("..., \"%s\", ...\r\n", csv_enc(str, 0)); + * + * If the is 1, the converter put the quotes only if any character is + * escaped. If the is 2, the converter put always the quotes. + * + * is a struct chunk used for storing the output string if any + * change will be done. + * + * The function returns the converted string on this output. If an error + * occurs, the function return an empty string. This type of output is useful + * for using the function directly as printf() argument. + * + * If the output buffer is too short to conatin the input string, the result + * is truncated. + */ +const char *csv_enc(const char *str, int quote, struct chunk *output); + /* Decode an URL-encoded string in-place. The resulting string might * be shorter. If some forbidden characters are found, the conversion is * aborted, the string is truncated before the issue and non-zero is returned, diff --git a/src/standard.c b/src/standard.c index c7060db47d..709db8b942 100644 --- a/src/standard.c +++ b/src/standard.c @@ -1310,6 +1310,70 @@ char *encode_chunk(char *start, char *stop, return start; } +/* Check a string for using it in a CSV output format. If the string contains + * one of the following four char <">, <,>, CR or LF, the string is + * encapsulated between <"> and the <"> are escaped by a <""> sequence. + * is the input string to be escaped. The function assumes that + * the input string is null-terminated. + * + * If is 0, the result is returned escaped but without double quote. + * Is it useful if the escaped string is used between double quotes in the + * format. + * + * printf("..., \"%s\", ...\r\n", csv_enc(str, 0)); + * + * If the is 1, the converter put the quotes only if any character is + * escaped. If the is 2, the converter put always the quotes. + * + * is a struct chunk used for storing the output string if any + * change will be done. + * + * The function returns the converted string on this output. If an error + * occurs, the function return an empty string. This type of output is useful + * for using the function directly as printf() argument. + * + * If the output buffer is too short to contain the input string, the result + * is truncated. + */ +const char *csv_enc(const char *str, int quote, struct chunk *output) +{ + char *end = output->str + output->size; + char *out = output->str + 1; /* +1 for reserving space for a first <"> */ + + while (*str && out < end - 2) { /* -2 for reserving space for <"> and \0. */ + *out = *str; + if (*str == '"') { + if (quote == 1) + quote = 2; + out++; + if (out >= end - 2) { + out--; + break; + } + *out = '"'; + } + if (quote == 1 && ( *str == '\r' || *str == '\n' || *str == ',') ) + quote = 2; + out++; + str++; + } + + if (quote == 1) + quote = 0; + + if (!quote) { + *out = '\0'; + return output->str + 1; + } + + /* else quote == 2 */ + *output->str = '"'; + *out = '"'; + out++; + *out = '\0'; + return output->str; +} + /* Decode an URL-encoded string in-place. The resulting string might * be shorter. If some forbidden characters are found, the conversion is * aborted, the string is truncated before the issue and a negative value is