implement uchar.h (C11 UTF-16/32 conversion) interfaces

This commit is contained in:
Rich Felker 2014-10-13 20:59:42 -04:00
parent 0054840839
commit ab9672ae73
7 changed files with 109 additions and 5 deletions

View File

@ -58,6 +58,8 @@ TYPEDEF struct { unsigned __attr[2]; } pthread_rwlockattr_t;
TYPEDEF struct _IO_FILE FILE;
TYPEDEF struct __mbstate_t { unsigned __opaque1, __opaque2; } mbstate_t;
TYPEDEF struct __locale_struct * locale_t;
TYPEDEF struct __sigset_t { unsigned long __bits[128/sizeof(long)]; } sigset_t;

27
include/uchar.h Normal file
View File

@ -0,0 +1,27 @@
#ifndef _UCHAR_H
#define _UCHAR_H
#ifdef __cplusplus
extern "C" {
#else
typedef unsigned short char16_t;
typedef unsigned char32_t;
#endif
#define __NEED_mbstate_t
#define __NEED_size_t
#include <features.h>
#include <bits/alltypes.h>
size_t c16rtomb(char *__restrict, char16_t, mbstate_t *__restrict);
size_t mbrtoc16(char16_t *__restrict, const char *__restrict, size_t, mbstate_t *__restrict);
size_t c32rtomb(char *__restrict, char32_t, mbstate_t *__restrict);
size_t mbrtoc32(char32_t *__restrict, const char *__restrict, size_t, mbstate_t *__restrict);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -12,6 +12,7 @@ extern "C" {
#define __NEED_size_t
#define __NEED_wchar_t
#define __NEED_wint_t
#define __NEED_mbstate_t
#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
|| defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
@ -42,11 +43,6 @@ extern "C" {
#undef WEOF
#define WEOF 0xffffffffU
typedef struct __mbstate_t
{
unsigned __opaque1, __opaque2;
} mbstate_t;
wchar_t *wcscpy (wchar_t *__restrict, const wchar_t *__restrict);
wchar_t *wcsncpy (wchar_t *__restrict, const wchar_t *__restrict, size_t);

33
src/multibyte/c16rtomb.c Normal file
View File

@ -0,0 +1,33 @@
#include <uchar.h>
#include <errno.h>
#include <wchar.h>
size_t c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps)
{
unsigned *x = (unsigned *)ps;
wchar_t wc;
if (!s) {
if (*x) goto ilseq;
return 1;
}
if (!*x && c16 - 0xd800u < 0x400) {
*x = c16 - 0xd7c0 << 10;
return 0;
}
if (*x) {
if (c16 - 0xdc00u >= 0x400) goto ilseq;
else wc = *x + c16 - 0xdc00;
*x = 0;
} else {
wc = c16;
}
return wcrtomb(s, wc, 0);
ilseq:
*x = 0;
errno = EILSEQ;
return -1;
}

7
src/multibyte/c32rtomb.c Normal file
View File

@ -0,0 +1,7 @@
#include <uchar.h>
#include <wchar.h>
size_t c32rtomb(char *restrict s, char32_t c32, mbstate_t *restrict ps)
{
return wcrtomb(s, c32, ps);
}

28
src/multibyte/mbrtoc16.c Normal file
View File

@ -0,0 +1,28 @@
#include <uchar.h>
#include <wchar.h>
size_t mbrtoc16(char16_t *restrict pc16, const char *restrict s, size_t n, mbstate_t *restrict ps)
{
unsigned *pending = (unsigned *)ps;
if (!s) return mbrtoc16(0, "", 1, ps);
/* mbrtowc states for partial UTF-8 characters have the high bit set;
* we use nonzero states without high bit for pending surrogates. */
if ((int)*pending > 0) {
if (pc16) *pc16 = *pending;
*pending = 0;
return -3;
}
wchar_t wc;
size_t ret = mbrtowc(&wc, s, n, ps);
if (ret <= 4) {
if (wc >= 0x10000) {
*pending = (wc & 0x3ff) + 0xdc00;
wc = 0xd7c0 + (wc >> 10);
}
if (pc16) *pc16 = wc;
}
return ret;
}

11
src/multibyte/mbrtoc32.c Normal file
View File

@ -0,0 +1,11 @@
#include <uchar.h>
#include <wchar.h>
size_t mbrtoc32(char32_t *restrict pc32, const char *restrict s, size_t n, mbstate_t *restrict ps)
{
if (!s) return mbrtoc32(0, "", 1, ps);
wchar_t wc;
size_t ret = mbrtowc(&wc, s, n, ps);
if (ret <= 4 && pc32) *pc32 = wc;
return ret;
}