diff --git a/src/c4group/C4ComponentHost.cpp b/src/c4group/C4ComponentHost.cpp index 82f4be57d..91866e98a 100644 --- a/src/c4group/C4ComponentHost.cpp +++ b/src/c4group/C4ComponentHost.cpp @@ -98,16 +98,20 @@ bool C4ComponentHost::Load(C4GroupSet &hGroupSet, void C4ComponentHost::FinishLoad(const StdStrBuf & name, C4Group &hGroup) { - Data.EnsureUnicode(); + // Store actual filename + hGroup.FindEntry(name.getData(), &Filename); + CopyFilePathFromGroup(hGroup); + + if (Data.EnsureUnicode()) + { + LogF("WARNING: File is not encoded as UTF-8 (%s)", FilePath.getData()); + } // Skip those stupid "zero width no-break spaces" (also known as Byte Order Marks) if (Data[0] == '\xEF' && Data[1] == '\xBB' && Data[2] == '\xBF') { Data.Move(3,Data.getSize()-3); Data.Shrink(3); } - // Store actual filename - hGroup.FindEntry(name.getData(), &Filename); - CopyFilePathFromGroup(hGroup); // Notify OnLoad(); } diff --git a/src/lib/StdBuf.cpp b/src/lib/StdBuf.cpp index 8073b9336..44369245b 100644 --- a/src/lib/StdBuf.cpp +++ b/src/lib/StdBuf.cpp @@ -433,7 +433,8 @@ void StdStrBuf::AppendCharacter(uint32_t unicodechar) else /* not an unicode code point, ignore */ {} } -void StdStrBuf::EnsureUnicode() +// Returns true if charset was converted. +bool StdStrBuf::EnsureUnicode() { // assume that it's windows-1252 and convert to utf-8 if (!IsValidUtf8(getData(), getLength())) @@ -477,7 +478,9 @@ void StdStrBuf::EnsureUnicode() } buf.SetLength(j); Take(std::move(buf)); + return true; } + return false; } bool StdStrBuf::TrimSpaces() diff --git a/src/lib/StdBuf.h b/src/lib/StdBuf.h index 56a29b6bf..bfa43ef3c 100644 --- a/src/lib/StdBuf.h +++ b/src/lib/StdBuf.h @@ -659,8 +659,8 @@ public: // get an indexed section from the string like Section1;Section2;Section3 bool GetSection(size_t idx, StdStrBuf *psOutSection, char cSeparator=';') const; - // Checks whether the content is valid UTF-8, and if not, convert it from windows-1252 to UTF-8. - void EnsureUnicode(); + // Checks whether the content is valid UTF-8, and if not, convert it from windows-1252 to UTF-8 and return true. + bool EnsureUnicode(); // convert to lower case void ToLowerCase();