Commit f5e47cae authored by Chris Müller's avatar Chris Müller

test: add testcases for low-level utf8 routines.

parent 3698a439
......@@ -7,6 +7,7 @@ set(SOURCES
red_black_tree.c
list.c
structures.c
utf8.c
)
......
......@@ -6,6 +6,7 @@ void cry_test_arrays();
void cry_test_red_black_trees();
void cry_test_maps();
void cry_test_lists();
void cry_test_utf8();
int
main(int argc, char** argv)
......@@ -17,6 +18,7 @@ main(int argc, char** argv)
cry_test_red_black_trees();
cry_test_lists();
cry_test_maps();
cry_test_utf8();
cry_unittest_finalize();
......
#include "unittest.h"
#include "utf8.h"
#include <assert.h>
#include <stdio.h>
// simple ascii string (length = 10)
static const byte* ascii = "teststring";
// simple latin1 string in german (length = 16)
static const byte* latin1 = "Ger\xC3\xA4t\xC3\xBC""berhitzung";
// alpha beta gamma pi (length = 4)
static const byte* greek = "\xCE\xB1\xCE\xB2\xCE\xB3\xCE\x80";
// hiragana: ze yo vu ba (length = 4)
static const byte* hiragana = "\xE3\x81\x9C\xE3\x82\x88\xE3\x82\x94\xE3\x81\xB0";
static const unicode uc_hiragana[] = { 0x305C, 0x3088, 0x3094, 0x3070 };
// mixed with codepoints of different sizes (length = 9)
static const byte* mixed = "tz\xC3\xA4 \xE4\xBB\x90 \xF0\x90\xB9\xA0 \xEF\xBB\x85";
static const unicode uc_mixed[] = { 't', 'z', 0x00E4, ' ', 0x4ED0, ' ', 0x10E60, ' ', 0xFEC5};
static void test_utf8_validate(const_pointer data)
{
assert(cry_utf8_validate(ascii) == CRY_OKAY);
assert(cry_utf8_validate(latin1) == CRY_OKAY);
assert(cry_utf8_validate(greek) == CRY_OKAY);
assert(cry_utf8_validate(hiragana) == CRY_OKAY);
assert(cry_utf8_validate(mixed) == CRY_OKAY);
assert(cry_utf8_validate("Non Valid \xE3 utf8string") == CRY_FAIL);
}
static void test_utf8_length(const_pointer data)
{
test_utf8_validate(data);
assert(cry_utf8_strlen(ascii) == 10);
assert(cry_utf8_strlen(latin1) == 16);
assert(cry_utf8_strlen(greek) == 4);
assert(cry_utf8_strlen(hiragana) == 4);
assert(cry_utf8_strlen(mixed) == 9);
assert(cry_utf8_strsize(ascii) == 10);
assert(cry_utf8_strsize(latin1) == 18);
assert(cry_utf8_strsize(greek) == 8);
assert(cry_utf8_strsize(hiragana) == 12);
assert(cry_utf8_strsize(mixed) == 17);
}
static void test_utf8_codepoints(const_pointer data)
{
test_utf8_validate(data);
const byte* str;
size_t index;
str = ascii;
index = 0;
while(*str != 0 && index < 10) {
assert(cry_utf8_get(str) == *str);
assert(cry_utf8_codepoints(str) == 1);
str = cry_utf8_next(str);
++index;
}
assert(index == 10);
str = hiragana;
index = 0;
while(*str != 0 && index < 4) {
assert(cry_utf8_get(str) == uc_hiragana[index++]);
assert(cry_utf8_codepoints(str) == 3);
str = cry_utf8_next(str);
}
assert(index == 4);
str = mixed;
index = 0;
while(*str != 0 && index < 9) {
assert(cry_utf8_get(str) == uc_mixed[index++]);
str = cry_utf8_next(str);
}
assert(index == 9);
}
static void test_utf8_search(const_pointer data)
{
test_utf8_validate(data);
// forward search
assert(cry_utf8_strchr(ascii, 's') == ascii + 2);
assert(cry_utf8_strchr(ascii, 'g') == ascii + 9);
assert(cry_utf8_strchr(mixed, 'z') == mixed + 1);
assert(cry_utf8_strchr(mixed, 0x00E4) == mixed + 2);
assert(cry_utf8_strchr(mixed, 0x4ED0) == mixed + 5);
assert(cry_utf8_strchr(mixed, 0xFEC5) == mixed + 14);
assert(cry_utf8_strchr(ascii, 'P') == 0);
// reverse search
assert(cry_utf8_strrchr(ascii, 's') == ascii + 4);
assert(cry_utf8_strrchr(latin1, 'G') == latin1);
assert(cry_utf8_strrchr(hiragana, 0x305C) == hiragana);
assert(cry_utf8_strrchr(mixed, 0x00E4) == mixed + 2);
assert(cry_utf8_strrchr(ascii, 'P') == 0);
}
void
cry_test_utf8(void)
{
cry_unittest_run("utf8.length", test_utf8_length, 0, 10);
cry_unittest_run("utf8.codepoints", test_utf8_codepoints, 0, 10);
cry_unittest_run("utf8.search", test_utf8_search, 0, 10);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment