26 static const unsigned kCodepointRanges[] = {
242 #define UTF8_ACCEPT 0u 244 static const unsigned char utf8d[] = {
247 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
248 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
249 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
250 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
251 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
252 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
253 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
254 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
258 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
259 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
260 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
261 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
262 12,36,12,12,12,12,12,12,12,12,12,12,
265 static unsigned inline decode(
unsigned*
state,
unsigned* codep,
unsigned byte) {
266 unsigned type = utf8d[byte];
269 (byte & 0x3fu) | (*codep << 6) :
270 (0xffu >> type) & (byte);
287 for (
const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
288 for (
unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
295 unsigned decodedCodepoint = 0;
298 unsigned decodedCount = 0;
299 for (
const char* s = encodedStr; *s; ++s)
300 if (!decode(&
state, &decodedCodepoint, static_cast<unsigned char>(*s))) {
311 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
317 unsigned decodedCodepoint;
321 if (!result || codepoint != decodedCodepoint)
322 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
340 for (
const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
341 for (
unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
354 unsigned decodedCodepoint = 0;
357 for (
const char* s = utf8os.
GetString(); *s; ++s) {
358 if (!decode(&
state, &decodedCodepoint, static_cast<unsigned char>(*s)))
362 if (codepoint <= 0xFFFF)
366 *p++ =
static_cast<UTF16<>::Ch>(0xD7C0 + (decodedCodepoint >> 10));
367 *p++ =
static_cast<UTF16<>::Ch>(0xDC00 + (decodedCodepoint & 0x3FF));
377 unsigned decodedCodepoint;
381 if (!result || codepoint != decodedCodepoint)
382 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
399 for (
const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
400 for (
unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
408 unsigned decodedCodepoint;
412 if (!result || codepoint != decodedCodepoint)
413 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
430 for (
unsigned codepoint = 0; codepoint < 128; codepoint++) {
436 unsigned decodedCodepoint;
438 if (!result || codepoint != decodedCodepoint)
439 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
static void Encode(OutputStream &os, unsigned codepoint)
static void Encode(OutputStream &os, unsigned codepoint)
static bool Decode(InputStream &is, unsigned *codepoint)
#define EXPECT_TRUE(condition)
static bool Validate(InputStream &is, OutputStream &os)
static bool Validate(InputStream &is, OutputStream &os)
static bool Validate(InputStream &is, OutputStream &os)
TEST(EncodingsTest, UTF8)
static void Encode(OutputStream &os, unsigned codepoint)
static void Encode(OutputStream &os, unsigned codepoint)
int StrCmp(const Ch *s1, const Ch *s2)
static bool Validate(InputStream &is, OutputStream &os)
const Ch * GetString() const
static bool Decode(InputStream &is, unsigned *codepoint)
static bool Decode(InputStream &is, unsigned *codepoint)
#define EXPECT_EQ(val1, val2)
static bool Decode(InputStream &is, unsigned *codepoint)