diff --git a/scp.c b/scp.c index c67cd71df..b4db85198 100644 --- a/scp.c +++ b/scp.c @@ -379,7 +379,7 @@ main(int argc, char **argv) /* Ensure that fds 0, 1 and 2 are open or directed to /dev/null */ sanitise_stdfd(); - setlocale(LC_CTYPE, ""); + msetlocale(); /* Copy argv, because we modify it */ newargv = xcalloc(MAXIMUM(argc + 1, 1), sizeof(*newargv)); diff --git a/sftp.c b/sftp.c index af6e3a69a..2b8fdabfb 100644 --- a/sftp.c +++ b/sftp.c @@ -2272,7 +2272,7 @@ main(int argc, char **argv) ssh_malloc_init(); /* must be called before any mallocs */ /* Ensure that fds 0, 1 and 2 are open or directed to /dev/null */ sanitise_stdfd(); - setlocale(LC_CTYPE, ""); + msetlocale(); __progname = ssh_get_progname(argv[0]); memset(&args, '\0', sizeof(args)); diff --git a/ssh.c b/ssh.c index 8aa8daae4..ee0b16dc2 100644 --- a/ssh.c +++ b/ssh.c @@ -109,6 +109,7 @@ #include "version.h" #include "ssherr.h" #include "myproposal.h" +#include "utf8.h" #ifdef ENABLE_PKCS11 #include "ssh-pkcs11.h" @@ -589,7 +590,7 @@ main(int ac, char **av) */ umask(022); - setlocale(LC_CTYPE, ""); + msetlocale(); /* * Initialize option structure to indicate that no values have been diff --git a/utf8.c b/utf8.c index f563d3738..87fa9e89a 100644 --- a/utf8.c +++ b/utf8.c @@ -27,6 +27,7 @@ # include #endif #include +#include #include #include #include @@ -288,3 +289,44 @@ mprintf(const char *fmt, ...) va_end(ap); return ret; } + +/* + * Set up libc for multibyte output in the user's chosen locale. + * + * XXX: we are known to have problems with Turkish (i/I confusion) so we + * deliberately fall back to the C locale for now. Longer term we should + * always prefer to select C.[encoding] if possible, but there's no + * standardisation in locales between systems, so we'll need to survey + * what's out there first. + */ +void +msetlocale(void) +{ + const char *vars[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL }; + char *cp; + int i; + + /* + * We can't yet cope with dotless/dotted I in Turkish locales, + * so fall back to the C locale for these. + */ + for (i = 0; vars[i] != NULL; i++) { + if ((cp = getenv(vars[i])) == NULL) + continue; + if (strncasecmp(cp, "TR", 2) != 0) + break; + /* + * If we're in a UTF-8 locale then prefer to use + * the C.UTF-8 locale (or equivalent) if it exists. + */ + if ((strcasestr(cp, "UTF-8") != NULL || + strcasestr(cp, "UTF8") != NULL) && + (setlocale(LC_CTYPE, "C.UTF-8") != NULL || + setlocale(LC_CTYPE, "POSIX.UTF-8") != NULL)) + return; + setlocale(LC_CTYPE, "C"); + return; + } + /* We can handle this locale */ + setlocale(LC_CTYPE, ""); +} diff --git a/utf8.h b/utf8.h index 43ce1d55d..88c5a34a3 100644 --- a/utf8.h +++ b/utf8.h @@ -22,3 +22,4 @@ int fmprintf(FILE *, const char *, ...) int vfmprintf(FILE *, const char *, va_list); int snmprintf(char *, size_t, int *, const char *, ...) __attribute__((format(printf, 4, 5))); +void msetlocale(void);