forked from Mirrors/openclonk
Search for GTest and add a unit testing project if available
Currently this only tests correct UTF-8 handling.
parent
1ea7b1a1ce
commit
97f699a005
|
@ -1179,6 +1179,16 @@ if(NOT HAVE_FMOD AND NOT USE_OPEN_AL OR USE_SDL_MAINLOOP AND NOT USE_OPEN_AL)
|
|||
endif()
|
||||
endif()
|
||||
|
||||
# GTest
|
||||
include(FindGTest)
|
||||
if(GTEST_FOUND)
|
||||
include_directories(${GTEST_INCLUDE_DIRS})
|
||||
add_executable(tests EXCLUDE_FROM_ALL
|
||||
tests/UnicodeHandlingTest.cpp
|
||||
tests/main.cpp
|
||||
)
|
||||
target_link_libraries(tests ${GTEST_LIBRARIES})
|
||||
endif()
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h)
|
||||
|
||||
|
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* OpenClonk, http://www.openclonk.org
|
||||
*
|
||||
* Copyright (c) 2011 Nicolas Hake
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
* See isc_license.txt for full license and disclaimer.
|
||||
*
|
||||
* "Clonk" is a registered trademark of Matthes Bender.
|
||||
* See clonk_trademark_license.txt for full license.
|
||||
*/
|
||||
|
||||
/* Verify correct behavior of UTF-8 handling code. */
|
||||
|
||||
#include "lib/Standard.h"
|
||||
#include "lib/Standard.cpp"
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
TEST(UnicodeHandlingTest, AcceptsEmptyString)
|
||||
{
|
||||
// Check acceptance of empty strings.
|
||||
// Part 1: Automatic length detection
|
||||
EXPECT_TRUE(::IsValidUtf8(""));
|
||||
// Part 2: Automatic length detection with trailing garbage
|
||||
EXPECT_TRUE(::IsValidUtf8("\0\xFF\xFF\xFF\xFF"));
|
||||
// Part 3: Manual length override with trailing garbage
|
||||
EXPECT_TRUE(::IsValidUtf8("\xFF\xFF\xFF\xFF", 0));
|
||||
}
|
||||
|
||||
TEST(UnicodeHandlingTest, AcceptsValidSingleByteUtf8)
|
||||
{
|
||||
// Check acceptance of valid UTF-8 single-byte sequences.
|
||||
// This test is exhaustive over U+0000..U+007F.
|
||||
// Part 1: Automatic length detection
|
||||
// Test gc=Lu: General category: Letter, uppercase
|
||||
EXPECT_TRUE(::IsValidUtf8("ABCDEFGHIJKLMNOPQRSTUVWXYZ"));
|
||||
// Test gc=Ll: General category: Letter, lowercase
|
||||
EXPECT_TRUE(::IsValidUtf8("abcdefghijklmnopqrstuvwxyz"));
|
||||
// Test gc=Nd: General category: Number, decimal digit
|
||||
EXPECT_TRUE(::IsValidUtf8("0123456789"));
|
||||
// Test gc=Zs: General category: Separator, space
|
||||
EXPECT_TRUE(::IsValidUtf8(" "));
|
||||
// Test gc=Po: General category: Punctuation, other
|
||||
EXPECT_TRUE(::IsValidUtf8(
|
||||
"!"
|
||||
"\x22" // U+0022 QUOTATION MARK
|
||||
"#%&'*,./:;?@"
|
||||
"\x5C" // U+005C REVERSE SOLIDUS (aka BACKSLASH)
|
||||
));
|
||||
// Test gc=Sc: General category: Symbol, currency
|
||||
EXPECT_TRUE(::IsValidUtf8("$"));
|
||||
// Test gc=Ps: General category: Punctuation, open
|
||||
EXPECT_TRUE(::IsValidUtf8("([{"));
|
||||
// Test gc=Pe: General category: Punctuation, close
|
||||
EXPECT_TRUE(::IsValidUtf8(")]}"));
|
||||
// Test gc=Sm: General category: Symbol, math
|
||||
EXPECT_TRUE(::IsValidUtf8("+<=>|~"));
|
||||
// Test gc=Pd: General category: Punctuation, dash
|
||||
EXPECT_TRUE(::IsValidUtf8("-"));
|
||||
// Test gc=Sk: General category: Symbol, modifier
|
||||
EXPECT_TRUE(::IsValidUtf8("^"));
|
||||
// Test gc=Cc: General category: Other, control
|
||||
// NB: This omits U+0000 NULL due to it being the C string terminator
|
||||
EXPECT_TRUE(::IsValidUtf8(
|
||||
"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
|
||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
|
||||
"\x7F"));
|
||||
|
||||
// Part 2: Interspersed U+0000 NULL characters
|
||||
EXPECT_TRUE(::IsValidUtf8("A\0BC\0DEF\0GHIJ\0KLMNO", 20));
|
||||
|
||||
// Part 3: Valid UTF-8 with trailing garbage, manual length override
|
||||
EXPECT_TRUE(::IsValidUtf8("AAAA\x80\xF0\xFF", 4));
|
||||
}
|
||||
|
||||
TEST(UnicodeHandlingTest, RejectsInvalidSingleByteUtf8)
|
||||
{
|
||||
// Check rejection of invalid UTF-8 single-byte sequences
|
||||
// Part 1: Range 0x80..0xBF (orphaned continuation bytes)
|
||||
for (int i = 0x80; i <= 0xBF; ++i)
|
||||
{
|
||||
char buffer[] = { i, 0 };
|
||||
EXPECT_FALSE(::IsValidUtf8(buffer));
|
||||
}
|
||||
// Part 2: Range 0xC0..0xF4 (orphaned start bytes)
|
||||
for (int i = 0xC0; i <= 0xFF; ++i)
|
||||
{
|
||||
char buffer[] = { i, 0 };
|
||||
EXPECT_FALSE(::IsValidUtf8(buffer));
|
||||
}
|
||||
// Part 3: Range 0xF5..0xFF (invalid bytes)
|
||||
for (int i = 0xF5; i <= 0xFF; ++i)
|
||||
{
|
||||
char buffer[] = { i, 0 };
|
||||
EXPECT_FALSE(::IsValidUtf8(buffer));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(UnicodeHandlingTest, AcceptsValidMultiByteUtf8)
|
||||
{
|
||||
// Check acceptance of valid UTF-8 multi-byte sequences.
|
||||
// Part 1: Generate all valid two-byte sequences
|
||||
for (int i = 0x80; i < 0x800; ++i)
|
||||
{
|
||||
char buffer[] = { 0xC0 | (i >> 6), 0x80 | (i & 0x3F), 0 };
|
||||
EXPECT_TRUE(::IsValidUtf8(buffer)) << "Valid UTF-8 character not recognized:" << std::hex
|
||||
<< " 0x" << (uint32_t)(uint8_t)buffer[0] << " 0x" << (uint32_t)(uint8_t)buffer[1]
|
||||
<< " (0x" << i << ")";
|
||||
}
|
||||
// Part 2: Generate all valid three-byte sequences
|
||||
for (int i = 0x800; i < 0x10000; ++i)
|
||||
{
|
||||
if (i == 0xD800) i = 0xE000; // Skip invalid surrogate halves
|
||||
char buffer[] = { 0xE0 | (i >> 12), 0x80 | ((i >> 6) & 0x3F), 0x80 | (i & 0x3F), 0 };
|
||||
EXPECT_TRUE(::IsValidUtf8(buffer)) << "Valid UTF-8 character not recognized:" << std::hex
|
||||
<< " 0x" << (uint32_t)(uint8_t)buffer[0] << " 0x" << (uint32_t)(uint8_t)buffer[1]
|
||||
<< " 0x" << (uint32_t)(uint8_t)buffer[2]
|
||||
<< " (0x" << i << ")";
|
||||
}
|
||||
// Part 3: Generate all valid four-byte sequences
|
||||
for (int i = 0x10000; i < 0x10FFFF; ++i)
|
||||
{
|
||||
char buffer[] = { 0xF0 | (i >> 18), 0x80 | ((i >> 12) & 0x3F), 0x80 | ((i >> 6) & 0x3F), 0x80 | (i & 0x3F), 0 };
|
||||
EXPECT_TRUE(::IsValidUtf8(buffer)) << "Valid UTF-8 character not recognized:" << std::hex
|
||||
<< " 0x" << (uint32_t)(uint8_t)buffer[0] << " 0x" << (uint32_t)(uint8_t)buffer[1]
|
||||
<< " 0x" << (uint32_t)(uint8_t)buffer[2] << " 0x" << (uint32_t)(uint8_t)buffer[3]
|
||||
<< " (0x" << i << ")";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(UnicodeHandlingTest, RejectsInvalidMultiByteUtf8)
|
||||
{
|
||||
// Check rejection of invalid UTF-8 multi-byte sequences.
|
||||
// Part 1: Overlong sequences
|
||||
// 1.1: U+0000 NULL encoding
|
||||
EXPECT_FALSE(::IsValidUtf8("\xC0\x80")); // Two-byte representation of U+0000 NULL
|
||||
EXPECT_FALSE(::IsValidUtf8("\xE0\x80\x80")); // Three-byte representation of U+0000 NULL
|
||||
EXPECT_FALSE(::IsValidUtf8("\xF0\x80\x80\x80")); // Four-byte representation of U+0000 NULL
|
||||
EXPECT_FALSE(::IsValidUtf8("\xF8\x80\x80\x80\x80")); // Five-byte representation of U+0000 NULL
|
||||
EXPECT_FALSE(::IsValidUtf8("\xFC\x80\x80\x80\x80\x80")); // Six-byte representation of U+0000 NULL
|
||||
// 1.2: U+0080 <control> encoding
|
||||
EXPECT_FALSE(::IsValidUtf8("\xE0\x82\x80")); // Three-byte representation of U+0080 <control>
|
||||
EXPECT_FALSE(::IsValidUtf8("\xF0\x80\x82\x80")); // Four-byte representation of U+0080 <control>
|
||||
// 1.3: U+0800 SAMARITAN LETTER ALAF encoding
|
||||
EXPECT_FALSE(::IsValidUtf8("\xF0\x80\xA0\x80")); // four-byte representation of U+0800 SAMARITAN LETTER ALAF
|
||||
// Part 2: Incorrectly encoded surrogate halves
|
||||
for (int i = 0xD800; i <= 0xDFFF; ++i)
|
||||
{
|
||||
char buffer[] = { 0xE0 | (i >> 12), 0x80 | ((i >> 6) & 0x3F), 0x80 | (i & 0x3F), 0 };
|
||||
EXPECT_FALSE(::IsValidUtf8(buffer)) << "Invalid surrogate half not recognized: " << std::hex
|
||||
<< " 0x" << (uint32_t)(uint8_t)buffer[0] << " 0x" << (uint32_t)(uint8_t)buffer[1]
|
||||
<< " 0x" << (uint32_t)(uint8_t)buffer[2]
|
||||
<< " (0x" << i << ")";
|
||||
}
|
||||
// Part 3: Sequences encoding codepoints beyond the unicode range
|
||||
EXPECT_FALSE(::IsValidUtf8("\xF4\x90\x80\x80")); // Representation of invalid codepoint U+110000
|
||||
// Part 4: Incomplete multibyte sequences
|
||||
EXPECT_FALSE(::IsValidUtf8("\xC3\xA6", 1)); // U+00E6 LATIN SMALL LETTER AE
|
||||
EXPECT_FALSE(::IsValidUtf8("\xE2\x84\x95", 2)); // U+2115 DOUBLE-STRUCK CAPITAL N
|
||||
EXPECT_FALSE(::IsValidUtf8("\xE2\x84"));
|
||||
EXPECT_FALSE(::IsValidUtf8("\xF0\x9F\x94\x87", 3)); // U+1F507 SPEAKER WITH CANCELLATION STROKE
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* OpenClonk, http://www.openclonk.org
|
||||
*
|
||||
* This file is ineligible for copyright and therefore in the public domain,
|
||||
* because it does not reach the required threshold of originality.
|
||||
*
|
||||
* "Clonk" is a registered trademark of Matthes Bender.
|
||||
* See clonk_trademark_license.txt for full license.
|
||||
*/
|
||||
|
||||
/* Runs all available tests. */
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
Loading…
Reference in New Issue