mirror of
https://github.com/ceph/ceph
synced 2025-02-28 21:42:27 +00:00
json_spirit: use utf8 intenally when parsing \uHHHH
When the python CLI is given non-ASCII characters, it converts them to \uHHHH escapes in JSON. json_spirit parses these internally into 16 bit characters, which could only work if json_spirit were built to use std::wstring, which it isn't; it's using std::string, so the high byte ends up being zero'd, leaving the low byte which is effectively garbage. This hack^H^H^H^H change makes json_spirit convert to utf8 internally instead, which can be stored just fine inside a std::string. Note that this implementation still assumes \uHHHH escapes are four hex digits, so it'll only cope with characters in the Basic Multilingual Plane. Still, that's rather a lot more characters than it could cope with before ;) (For characters outside the BMP, Python seems to generate escapes in the form \uHHHHHHHH, i.e. 8 hex digits, which the current implementation doesn't expect to see) Fixes: #7387 Signed-off-by: Tim Serong <tserong@suse.com>
This commit is contained in:
parent
89262abc53
commit
8add15b86e
src
@ -13,6 +13,8 @@
|
||||
#include "json_spirit_value.h"
|
||||
#include "json_spirit_error_position.h"
|
||||
|
||||
#include "common/utf8.h"
|
||||
|
||||
#define BOOST_SPIRIT_THREADSAFE // uncomment for multithreaded use, requires linking to boost.thread
|
||||
|
||||
#include <boost/bind.hpp>
|
||||
@ -71,18 +73,30 @@ namespace json_spirit
|
||||
return ( hex_to_num( c1 ) << 4 ) + hex_to_num( c2 );
|
||||
}
|
||||
|
||||
template< class Char_type, class Iter_type >
|
||||
Char_type unicode_str_to_char( Iter_type& begin )
|
||||
template< class String_type, class Iter_type >
|
||||
String_type unicode_str_to_utf8( Iter_type& begin );
|
||||
|
||||
template<>
|
||||
std::string unicode_str_to_utf8( std::string::const_iterator & begin )
|
||||
{
|
||||
typedef typename std::string::value_type Char_type;
|
||||
|
||||
const Char_type c1( *( ++begin ) );
|
||||
const Char_type c2( *( ++begin ) );
|
||||
const Char_type c3( *( ++begin ) );
|
||||
const Char_type c4( *( ++begin ) );
|
||||
|
||||
return ( hex_to_num( c1 ) << 12 ) +
|
||||
( hex_to_num( c2 ) << 8 ) +
|
||||
( hex_to_num( c3 ) << 4 ) +
|
||||
hex_to_num( c4 );
|
||||
unsigned long uc = ( hex_to_num( c1 ) << 12 ) +
|
||||
( hex_to_num( c2 ) << 8 ) +
|
||||
( hex_to_num( c3 ) << 4 ) +
|
||||
hex_to_num( c4 );
|
||||
|
||||
unsigned char buf[7]; // MAX_UTF8_SZ is 6 (see src/common/utf8.c)
|
||||
int r = encode_utf8(uc, buf);
|
||||
if (r >= 0) {
|
||||
return std::string(reinterpret_cast<char *>(buf), r);
|
||||
}
|
||||
return std::string("_");
|
||||
}
|
||||
|
||||
template< class String_type >
|
||||
@ -116,7 +130,7 @@ namespace json_spirit
|
||||
{
|
||||
if( end - begin >= 5 ) // expecting "uHHHH..."
|
||||
{
|
||||
s += unicode_str_to_char< Char_type >( begin );
|
||||
s += unicode_str_to_utf8< String_type >( begin );
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -178,11 +192,15 @@ namespace json_spirit
|
||||
return get_str_< std::string >( begin, end );
|
||||
}
|
||||
|
||||
// Need this guard else it tries to instantiate unicode_str_to_utf8 with a
|
||||
// std::wstring, which isn't presently implemented
|
||||
#if defined( JSON_SPIRIT_WMVALUE_ENABLED ) && !defined( BOOST_NO_STD_WSTRING )
|
||||
inline std::wstring get_str( std::wstring::const_iterator begin, std::wstring::const_iterator end )
|
||||
{
|
||||
return get_str_< std::wstring >( begin, end );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template< class String_type, class Iter_type >
|
||||
String_type get_str( Iter_type begin, Iter_type end )
|
||||
{
|
||||
|
@ -236,6 +236,21 @@ function TEST_no_pool_delete() {
|
||||
./ceph osd pool delete foo foo --yes-i-really-really-mean-it
|
||||
}
|
||||
|
||||
function TEST_utf8_cli() {
|
||||
local dir=$1
|
||||
run_mon $dir a --public-addr $CEPH_MON
|
||||
# Hopefully it's safe to include literal UTF-8 characters to test
|
||||
# the fix for http://tracker.ceph.com/issues/7387. If it turns out
|
||||
# to not be OK (when is the default encoding *not* UTF-8?), maybe
|
||||
# the character '黄' can be replaced with the escape $'\xe9\xbb\x84'
|
||||
./ceph osd pool create 黄 1024 2>&1 | \
|
||||
grep "pool '黄' created" || return 1
|
||||
./ceph osd lspools 2>&1 | \
|
||||
grep "黄" || return 1
|
||||
./ceph -f json-pretty osd dump | \
|
||||
python -c "import json; import sys; json.load(sys.stdin)" || return 1
|
||||
./ceph osd pool delete 黄 黄 --yes-i-really-really-mean-it
|
||||
}
|
||||
|
||||
main osd-pool-create
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user