Fix #7: toml_utf8_to_ucs() returns incorrect results
This commit is contained in:
parent
624013252b
commit
56c42b7aed
3 changed files with 77 additions and 7 deletions
16
toml.c
16
toml.c
|
@ -71,9 +71,11 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
|
||||||
if (0x6 == (i >> 5)) {
|
if (0x6 == (i >> 5)) {
|
||||||
if (len < 2) return -1;
|
if (len < 2) return -1;
|
||||||
v = i & 0x1f;
|
v = i & 0x1f;
|
||||||
i = *(++buf);
|
for (int j = 0; j < 1; j++) {
|
||||||
if (0x2 != (i >> 6)) return -1;
|
i = *buf++;
|
||||||
v = (v << 6) | (i & 0x3f);
|
if (0x2 != (i >> 6)) return -1;
|
||||||
|
v = (v << 6) | (i & 0x3f);
|
||||||
|
}
|
||||||
return *ret = v, (const char*) buf - orig;
|
return *ret = v, (const char*) buf - orig;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,7 +86,7 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
|
||||||
if (len < 3) return -1;
|
if (len < 3) return -1;
|
||||||
v = i & 0x0F;
|
v = i & 0x0F;
|
||||||
for (int j = 0; j < 2; j++) {
|
for (int j = 0; j < 2; j++) {
|
||||||
i = *(++buf);
|
i = *buf++;
|
||||||
if (0x2 != (i >> 6)) return -1;
|
if (0x2 != (i >> 6)) return -1;
|
||||||
v = (v << 6) | (i & 0x3f);
|
v = (v << 6) | (i & 0x3f);
|
||||||
}
|
}
|
||||||
|
@ -98,7 +100,7 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
|
||||||
if (len < 4) return -1;
|
if (len < 4) return -1;
|
||||||
v = i & 0x07;
|
v = i & 0x07;
|
||||||
for (int j = 0; j < 3; j++) {
|
for (int j = 0; j < 3; j++) {
|
||||||
i = *(++buf);
|
i = *buf++;
|
||||||
if (0x2 != (i >> 6)) return -1;
|
if (0x2 != (i >> 6)) return -1;
|
||||||
v = (v << 6) | (i & 0x3f);
|
v = (v << 6) | (i & 0x3f);
|
||||||
}
|
}
|
||||||
|
@ -112,7 +114,7 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
|
||||||
if (len < 5) return -1;
|
if (len < 5) return -1;
|
||||||
v = i & 0x03;
|
v = i & 0x03;
|
||||||
for (int j = 0; j < 4; j++) {
|
for (int j = 0; j < 4; j++) {
|
||||||
i = *(++buf);
|
i = *buf++;
|
||||||
if (0x2 != (i >> 6)) return -1;
|
if (0x2 != (i >> 6)) return -1;
|
||||||
v = (v << 6) | (i & 0x3f);
|
v = (v << 6) | (i & 0x3f);
|
||||||
}
|
}
|
||||||
|
@ -126,7 +128,7 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
|
||||||
if (len < 6) return -1;
|
if (len < 6) return -1;
|
||||||
v = i & 0x01;
|
v = i & 0x01;
|
||||||
for (int j = 0; j < 5; j++) {
|
for (int j = 0; j < 5; j++) {
|
||||||
i = *(++buf);
|
i = *buf++;
|
||||||
if (0x2 != (i >> 6)) return -1;
|
if (0x2 != (i >> 6)) return -1;
|
||||||
v = (v << 6) | (i & 0x3f);
|
v = (v << 6) | (i & 0x3f);
|
||||||
}
|
}
|
||||||
|
|
11
unittest/Makefile
Normal file
11
unittest/Makefile
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
CFLAGS = -g -I..
|
||||||
|
|
||||||
|
TESTS = t1
|
||||||
|
|
||||||
|
all: $(TESTS)
|
||||||
|
|
||||||
|
t1: t1.c ../toml.c
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f $(TESTS)
|
||||||
|
|
57
unittest/t1.c
Normal file
57
unittest/t1.c
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "../toml.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, const char* argv[])
|
||||||
|
{
|
||||||
|
char xxbuf[6], buf[6];
|
||||||
|
int64_t xxcode, code;
|
||||||
|
int xxsize;
|
||||||
|
|
||||||
|
|
||||||
|
xxsize = 2, xxcode = 0x80; memcpy(xxbuf, "\xc2\x80", xxsize);
|
||||||
|
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||||
|
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||||
|
|
||||||
|
xxsize = 2, xxcode = 0x7ff; memcpy(xxbuf, "\xdf\xbf", xxsize);
|
||||||
|
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||||
|
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||||
|
|
||||||
|
xxsize = 3, xxcode = 0x800; memcpy(xxbuf, "\xe0\xa0\x80", xxsize);
|
||||||
|
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||||
|
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||||
|
|
||||||
|
xxsize = 3, xxcode = 0xfffd; memcpy(xxbuf, "\xef\xbf\xbd", xxsize);
|
||||||
|
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||||
|
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||||
|
|
||||||
|
xxsize = 4, xxcode = 0x10000; memcpy(xxbuf, "\xf0\x90\x80\x80", xxsize);
|
||||||
|
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||||
|
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||||
|
|
||||||
|
xxsize = 4, xxcode = 0x1fffff; memcpy(xxbuf, "\xf7\xbf\xbf\xbf", xxsize);
|
||||||
|
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||||
|
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||||
|
|
||||||
|
xxsize = 5, xxcode = 0x200000; memcpy(xxbuf, "\xf8\x88\x80\x80\x80", xxsize);
|
||||||
|
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||||
|
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||||
|
|
||||||
|
xxsize = 5, xxcode = 0x3ffffff; memcpy(xxbuf, "\xfb\xbf\xbf\xbf\xbf", xxsize);
|
||||||
|
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||||
|
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||||
|
|
||||||
|
xxsize = 6, xxcode = 0x4000000; memcpy(xxbuf, "\xfc\x84\x80\x80\x80\x80", xxsize);
|
||||||
|
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||||
|
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||||
|
|
||||||
|
xxsize = 6, xxcode = 0x7fffffff; memcpy(xxbuf, "\xfd\xbf\xbf\xbf\xbf\xbf", xxsize);
|
||||||
|
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||||
|
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in a new issue