Search for GTest and add a unit testing project if available

Currently this only tests correct UTF-8 handling.
2011-05-17 18:33:28 +02:00 · 2011-05-17 18:33:28 +02:00 · 97f699a005
parent 1ea7b1a1ce
commit 97f699a005
3 changed files with 193 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1179,6 +1179,16 @@ if(NOT HAVE_FMOD AND NOT USE_OPEN_AL OR USE_SDL_MAINLOOP AND NOT USE_OPEN_AL)
 	endif()
 endif()

+# GTest
+include(FindGTest)
+if(GTEST_FOUND)
+	include_directories(${GTEST_INCLUDE_DIRS})
+	add_executable(tests EXCLUDE_FROM_ALL
+		tests/UnicodeHandlingTest.cpp
+		tests/main.cpp
+	)
+	target_link_libraries(tests ${GTEST_LIBRARIES})
+endif()

 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h)

--- a/tests/UnicodeHandlingTest.cpp
+++ b/tests/UnicodeHandlingTest.cpp
@ -0,0 +1,164 @@
+/*
+ * OpenClonk, http://www.openclonk.org
+ *
+ * Copyright (c) 2011  Nicolas Hake
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * See isc_license.txt for full license and disclaimer.
+ *
+ * "Clonk" is a registered trademark of Matthes Bender.
+ * See clonk_trademark_license.txt for full license.
+ */
+
+/* Verify correct behavior of UTF-8 handling code. */
+
+#include "lib/Standard.h"
+#include "lib/Standard.cpp"
+#include <gtest/gtest.h>
+
+TEST(UnicodeHandlingTest, AcceptsEmptyString)
+{
+	// Check acceptance of empty strings.
+	// Part 1: Automatic length detection
+	EXPECT_TRUE(::IsValidUtf8(""));
+	// Part 2: Automatic length detection with trailing garbage
+	EXPECT_TRUE(::IsValidUtf8("\0\xFF\xFF\xFF\xFF"));
+	// Part 3: Manual length override with trailing garbage
+	EXPECT_TRUE(::IsValidUtf8("\xFF\xFF\xFF\xFF", 0));
+}
+
+TEST(UnicodeHandlingTest, AcceptsValidSingleByteUtf8)
+{
+	// Check acceptance of valid UTF-8 single-byte sequences.
+	// This test is exhaustive over U+0000..U+007F.
+	// Part 1: Automatic length detection
+	// Test gc=Lu: General category: Letter, uppercase
+	EXPECT_TRUE(::IsValidUtf8("ABCDEFGHIJKLMNOPQRSTUVWXYZ"));
+	// Test gc=Ll: General category: Letter, lowercase
+	EXPECT_TRUE(::IsValidUtf8("abcdefghijklmnopqrstuvwxyz"));
+	// Test gc=Nd: General category: Number, decimal digit
+	EXPECT_TRUE(::IsValidUtf8("0123456789"));
+	// Test gc=Zs: General category: Separator, space
+	EXPECT_TRUE(::IsValidUtf8(" "));
+	// Test gc=Po: General category: Punctuation, other
+	EXPECT_TRUE(::IsValidUtf8(
+		"!"
+		"\x22" // U+0022 QUOTATION MARK
+		"#%&'*,./:;?@"
+		"\x5C" // U+005C REVERSE SOLIDUS (aka BACKSLASH)
+		));
+	// Test gc=Sc: General category: Symbol, currency
+	EXPECT_TRUE(::IsValidUtf8("$"));
+	// Test gc=Ps: General category: Punctuation, open
+	EXPECT_TRUE(::IsValidUtf8("([{"));
+	// Test gc=Pe: General category: Punctuation, close
+	EXPECT_TRUE(::IsValidUtf8(")]}"));
+	// Test gc=Sm: General category: Symbol, math
+	EXPECT_TRUE(::IsValidUtf8("+<=>|~"));
+	// Test gc=Pd: General category: Punctuation, dash
+	EXPECT_TRUE(::IsValidUtf8("-"));
+	// Test gc=Sk: General category: Symbol, modifier
+	EXPECT_TRUE(::IsValidUtf8("^"));
+	// Test gc=Cc: General category: Other, control
+	// NB: This omits U+0000 NULL due to it being the C string terminator
+	EXPECT_TRUE(::IsValidUtf8(
+		    "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
+		"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
+		"\x7F"));
+	
+	// Part 2: Interspersed U+0000 NULL characters
+	EXPECT_TRUE(::IsValidUtf8("A\0BC\0DEF\0GHIJ\0KLMNO", 20));
+	
+	// Part 3: Valid UTF-8 with trailing garbage, manual length override
+	EXPECT_TRUE(::IsValidUtf8("AAAA\x80\xF0\xFF", 4));
+}
+
+TEST(UnicodeHandlingTest, RejectsInvalidSingleByteUtf8)
+{
+	// Check rejection of invalid UTF-8 single-byte sequences
+	// Part 1: Range 0x80..0xBF (orphaned continuation bytes)
+	for (int i = 0x80; i <= 0xBF; ++i)
+	{
+		char buffer[] = { i, 0 };
+		EXPECT_FALSE(::IsValidUtf8(buffer));
+	}
+	// Part 2: Range 0xC0..0xF4 (orphaned start bytes)
+	for (int i = 0xC0; i <= 0xFF; ++i)
+	{
+		char buffer[] = { i, 0 };
+		EXPECT_FALSE(::IsValidUtf8(buffer));
+	}
+	// Part 3: Range 0xF5..0xFF (invalid bytes)
+	for (int i = 0xF5; i <= 0xFF; ++i)
+	{
+		char buffer[] = { i, 0 };
+		EXPECT_FALSE(::IsValidUtf8(buffer));
+	}
+}
+
+TEST(UnicodeHandlingTest, AcceptsValidMultiByteUtf8)
+{
+	// Check acceptance of valid UTF-8 multi-byte sequences.
+	// Part 1: Generate all valid two-byte sequences
+	for (int i = 0x80; i < 0x800; ++i)
+	{
+		char buffer[] = { 0xC0 | (i >> 6), 0x80 | (i & 0x3F), 0 };
+		EXPECT_TRUE(::IsValidUtf8(buffer)) << "Valid UTF-8 character not recognized:" << std::hex
+			<< " 0x" << (uint32_t)(uint8_t)buffer[0] << " 0x" << (uint32_t)(uint8_t)buffer[1]
+			<< " (0x" << i << ")";
+	}
+	// Part 2: Generate all valid three-byte sequences
+	for (int i = 0x800; i < 0x10000; ++i)
+	{
+		if (i == 0xD800) i = 0xE000; // Skip invalid surrogate halves
+		char buffer[] = { 0xE0 | (i >> 12), 0x80 | ((i >> 6) & 0x3F), 0x80 | (i & 0x3F), 0 };
+		EXPECT_TRUE(::IsValidUtf8(buffer)) << "Valid UTF-8 character not recognized:" << std::hex
+			<< " 0x" << (uint32_t)(uint8_t)buffer[0] << " 0x" << (uint32_t)(uint8_t)buffer[1]
+			<< " 0x" << (uint32_t)(uint8_t)buffer[2]
+			<< " (0x" << i << ")";
+	}
+	// Part 3: Generate all valid four-byte sequences
+	for (int i = 0x10000; i < 0x10FFFF; ++i)
+	{
+		char buffer[] = { 0xF0 | (i >> 18), 0x80 | ((i >> 12) & 0x3F), 0x80 | ((i >> 6) & 0x3F), 0x80 | (i & 0x3F), 0 };
+		EXPECT_TRUE(::IsValidUtf8(buffer)) << "Valid UTF-8 character not recognized:" << std::hex
+			<< " 0x" << (uint32_t)(uint8_t)buffer[0] << " 0x" << (uint32_t)(uint8_t)buffer[1]
+			<< " 0x" << (uint32_t)(uint8_t)buffer[2] << " 0x" << (uint32_t)(uint8_t)buffer[3]
+			<< " (0x" << i << ")";
+	}
+}
+
+TEST(UnicodeHandlingTest, RejectsInvalidMultiByteUtf8)
+{
+	// Check rejection of invalid UTF-8 multi-byte sequences.
+	// Part 1: Overlong sequences
+	//  1.1: U+0000 NULL encoding
+	EXPECT_FALSE(::IsValidUtf8("\xC0\x80")); // Two-byte representation of U+0000 NULL
+	EXPECT_FALSE(::IsValidUtf8("\xE0\x80\x80")); // Three-byte representation of U+0000 NULL
+	EXPECT_FALSE(::IsValidUtf8("\xF0\x80\x80\x80")); // Four-byte representation of U+0000 NULL
+	EXPECT_FALSE(::IsValidUtf8("\xF8\x80\x80\x80\x80")); // Five-byte representation of U+0000 NULL
+	EXPECT_FALSE(::IsValidUtf8("\xFC\x80\x80\x80\x80\x80")); // Six-byte representation of U+0000 NULL
+	//  1.2: U+0080 <control> encoding
+	EXPECT_FALSE(::IsValidUtf8("\xE0\x82\x80")); // Three-byte representation of U+0080 <control>
+	EXPECT_FALSE(::IsValidUtf8("\xF0\x80\x82\x80")); // Four-byte representation of U+0080 <control>
+	//  1.3: U+0800 SAMARITAN LETTER ALAF encoding
+	EXPECT_FALSE(::IsValidUtf8("\xF0\x80\xA0\x80")); // four-byte representation of U+0800 SAMARITAN LETTER ALAF
+	// Part 2: Incorrectly encoded surrogate halves
+	for (int i = 0xD800; i <= 0xDFFF; ++i)
+	{
+		char buffer[] = { 0xE0 | (i >> 12), 0x80 | ((i >> 6) & 0x3F), 0x80 | (i & 0x3F), 0 };
+		EXPECT_FALSE(::IsValidUtf8(buffer)) << "Invalid surrogate half not recognized: " << std::hex
+			<< " 0x" << (uint32_t)(uint8_t)buffer[0] << " 0x" << (uint32_t)(uint8_t)buffer[1]
+			<< " 0x" << (uint32_t)(uint8_t)buffer[2]
+			<< " (0x" << i << ")";
+	}
+	// Part 3: Sequences encoding codepoints beyond the unicode range
+	EXPECT_FALSE(::IsValidUtf8("\xF4\x90\x80\x80")); // Representation of invalid codepoint U+110000
+	// Part 4: Incomplete multibyte sequences
+	EXPECT_FALSE(::IsValidUtf8("\xC3\xA6", 1)); // U+00E6 LATIN SMALL LETTER AE
+	EXPECT_FALSE(::IsValidUtf8("\xE2\x84\x95", 2)); // U+2115 DOUBLE-STRUCK CAPITAL N
+	EXPECT_FALSE(::IsValidUtf8("\xE2\x84"));
+	EXPECT_FALSE(::IsValidUtf8("\xF0\x9F\x94\x87", 3)); // U+1F507 SPEAKER WITH CANCELLATION STROKE
+}
--- a/tests/main.cpp
+++ b/tests/main.cpp
@ -0,0 +1,19 @@
+/*
+ * OpenClonk, http://www.openclonk.org
+ *
+ * This file is ineligible for copyright and therefore in the public domain,
+ * because it does not reach the required threshold of originality.
+ *
+ * "Clonk" is a registered trademark of Matthes Bender.
+ * See clonk_trademark_license.txt for full license.
+ */
+
+/* Runs all available tests. */
+
+#include <gtest/gtest.h>
+
+int main(int argc, char **argv)
+{
+	::testing::InitGoogleTest(&argc, argv);
+	return RUN_ALL_TESTS();
+}