Add warning for non-UTF-8 files

ipv6
Lukas Werling 2016-12-13 22:40:51 +01:00
parent 68ddeb3bed
commit d84c9b174f
3 changed files with 14 additions and 7 deletions

View File

@ -98,16 +98,20 @@ bool C4ComponentHost::Load(C4GroupSet &hGroupSet,
void C4ComponentHost::FinishLoad(const StdStrBuf & name, C4Group &hGroup) void C4ComponentHost::FinishLoad(const StdStrBuf & name, C4Group &hGroup)
{ {
Data.EnsureUnicode(); // Store actual filename
hGroup.FindEntry(name.getData(), &Filename);
CopyFilePathFromGroup(hGroup);
if (Data.EnsureUnicode())
{
LogF("WARNING: File is not encoded as UTF-8 (%s)", FilePath.getData());
}
// Skip those stupid "zero width no-break spaces" (also known as Byte Order Marks) // Skip those stupid "zero width no-break spaces" (also known as Byte Order Marks)
if (Data[0] == '\xEF' && Data[1] == '\xBB' && Data[2] == '\xBF') if (Data[0] == '\xEF' && Data[1] == '\xBB' && Data[2] == '\xBF')
{ {
Data.Move(3,Data.getSize()-3); Data.Move(3,Data.getSize()-3);
Data.Shrink(3); Data.Shrink(3);
} }
// Store actual filename
hGroup.FindEntry(name.getData(), &Filename);
CopyFilePathFromGroup(hGroup);
// Notify // Notify
OnLoad(); OnLoad();
} }

View File

@ -433,7 +433,8 @@ void StdStrBuf::AppendCharacter(uint32_t unicodechar)
else /* not an unicode code point, ignore */ {} else /* not an unicode code point, ignore */ {}
} }
void StdStrBuf::EnsureUnicode() // Returns true if charset was converted.
bool StdStrBuf::EnsureUnicode()
{ {
// assume that it's windows-1252 and convert to utf-8 // assume that it's windows-1252 and convert to utf-8
if (!IsValidUtf8(getData(), getLength())) if (!IsValidUtf8(getData(), getLength()))
@ -477,7 +478,9 @@ void StdStrBuf::EnsureUnicode()
} }
buf.SetLength(j); buf.SetLength(j);
Take(std::move(buf)); Take(std::move(buf));
return true;
} }
return false;
} }
bool StdStrBuf::TrimSpaces() bool StdStrBuf::TrimSpaces()

View File

@ -659,8 +659,8 @@ public:
// get an indexed section from the string like Section1;Section2;Section3 // get an indexed section from the string like Section1;Section2;Section3
bool GetSection(size_t idx, StdStrBuf *psOutSection, char cSeparator=';') const; bool GetSection(size_t idx, StdStrBuf *psOutSection, char cSeparator=';') const;
// Checks whether the content is valid UTF-8, and if not, convert it from windows-1252 to UTF-8. // Checks whether the content is valid UTF-8, and if not, convert it from windows-1252 to UTF-8 and return true.
void EnsureUnicode(); bool EnsureUnicode();
// convert to lower case // convert to lower case
void ToLowerCase(); void ToLowerCase();