Add warning for non-UTF-8 files

ipv6
Lukas Werling 2016-12-13 22:40:51 +01:00
parent 68ddeb3bed
commit d84c9b174f
3 changed files with 14 additions and 7 deletions

View File

@ -98,16 +98,20 @@ bool C4ComponentHost::Load(C4GroupSet &hGroupSet,
void C4ComponentHost::FinishLoad(const StdStrBuf & name, C4Group &hGroup)
{
Data.EnsureUnicode();
// Store actual filename
hGroup.FindEntry(name.getData(), &Filename);
CopyFilePathFromGroup(hGroup);
if (Data.EnsureUnicode())
{
LogF("WARNING: File is not encoded as UTF-8 (%s)", FilePath.getData());
}
// Skip those stupid "zero width no-break spaces" (also known as Byte Order Marks)
if (Data[0] == '\xEF' && Data[1] == '\xBB' && Data[2] == '\xBF')
{
Data.Move(3,Data.getSize()-3);
Data.Shrink(3);
}
// Store actual filename
hGroup.FindEntry(name.getData(), &Filename);
CopyFilePathFromGroup(hGroup);
// Notify
OnLoad();
}

View File

@ -433,7 +433,8 @@ void StdStrBuf::AppendCharacter(uint32_t unicodechar)
else /* not an unicode code point, ignore */ {}
}
void StdStrBuf::EnsureUnicode()
// Returns true if charset was converted.
bool StdStrBuf::EnsureUnicode()
{
// assume that it's windows-1252 and convert to utf-8
if (!IsValidUtf8(getData(), getLength()))
@ -477,7 +478,9 @@ void StdStrBuf::EnsureUnicode()
}
buf.SetLength(j);
Take(std::move(buf));
return true;
}
return false;
}
bool StdStrBuf::TrimSpaces()

View File

@ -659,8 +659,8 @@ public:
// get an indexed section from the string like Section1;Section2;Section3
bool GetSection(size_t idx, StdStrBuf *psOutSection, char cSeparator=';') const;
// Checks whether the content is valid UTF-8, and if not, convert it from windows-1252 to UTF-8.
void EnsureUnicode();
// Checks whether the content is valid UTF-8, and if not, convert it from windows-1252 to UTF-8 and return true.
bool EnsureUnicode();
// convert to lower case
void ToLowerCase();