openclonk/src/lib/StdBuf.cpp

516 lines
14 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/*
* OpenClonk, http://www.openclonk.org
*
* Copyright (c) 2001-2009, RedWolf Design GmbH, http://www.clonk.de/
* Copyright (c) 2009-2016, The OpenClonk Team and contributors
*
* Distributed under the terms of the ISC license; see accompanying file
* "COPYING" for details.
*
* "Clonk" is a registered trademark of Matthes Bender, used with permission.
* See accompanying file "TRADEMARK" for details.
*
* To redistribute this file separately, substitute the full license texts
* for the above references.
*/
#include "C4Include.h"
#include "lib/StdBuf.h"
#include "lib/StdCompiler.h"
#include "lib/StdAdaptors.h"
#ifdef _WIN32
#include "platform/C4windowswrapper.h"
#else
#define O_BINARY 0
#define O_SEQUENTIAL 0
#endif
#include <sys/stat.h>
// *** StdBuf
bool StdBuf::LoadFromFile(const char *szFile)
{
// Open file
#ifdef _WIN32
int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
#else
int fh = open(szFile, O_BINARY | O_CLOEXEC | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
#endif
if (fh < 0) return false;
// Create buf
New(FileSize(fh));
// Read
if (read(fh, getMData(), getSize()) != (signed int) getSize())
{
close(fh);
return false;
}
close(fh);
// Ok
return true;
}
bool StdBuf::SaveToFile(const char *szFile) const
{
// Open file
#ifdef _WIN32
int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
#else
int fh = open(szFile, O_BINARY | O_CLOEXEC | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
#endif
if (fh < 0) return false;
// Write data
if (write(fh, getData(), getSize()) != (signed int) getSize())
{
close(fh);
return false;
}
close(fh);
// Ok
return true;
}
bool StdStrBuf::LoadFromFile(const char *szFile)
{
// Open file
#ifdef _WIN32
int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
#else
int fh = open(szFile, O_BINARY | O_CLOEXEC | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
#endif
if (fh < 0) return false;
// Create buf
SetLength(FileSize(fh));
// Read
if (read(fh, getMData(), getLength()) != (ssize_t) getLength())
{
close(fh);
return false;
}
close(fh);
// Ok
return true;
}
bool StdStrBuf::SaveToFile(const char *szFile) const
{
// Open file
#ifdef _WIN32
int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
#else
int fh = open(szFile, O_BINARY | O_CLOEXEC | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
#endif
if (fh < 0) return false;
// Write data
if (write(fh, getData(), getLength()) != (ssize_t) getLength())
{
close(fh);
return false;
}
close(fh);
// Ok
return true;
}
void StdBuf::CompileFunc(StdCompiler *pComp, int iType)
{
// Size (guess it is a small value most of the time - if it's big, an extra byte won't hurt anyway)
uint32_t tmp = iSize; pComp->Value(mkIntPackAdapt(tmp)); iSize = tmp;
pComp->Separator(StdCompiler::SEP_PART2);
// Read/write data
if (pComp->isDeserializer())
{
New(iSize);
pComp->Raw(getMData(), iSize, StdCompiler::RawCompileType(iType));
}
else
{
pComp->Raw(const_cast<void *>(getData()), iSize, StdCompiler::RawCompileType(iType));
}
}
// *** StdStringBuf
#ifdef _WIN32
StdStrBuf::StdStrBuf(const wchar_t * utf16)
{
int len = WideCharToMultiByte(CP_UTF8, 0, utf16, -1, nullptr, 0, nullptr, nullptr);
SetSize(len);
WideCharToMultiByte(CP_UTF8, 0, utf16, -1, getMData(), getSize(), nullptr, nullptr);
}
StdStrBuf::wchar_t_holder StdStrBuf::GetWideChar() const
{
if (!getSize()) return StdStrBuf::wchar_t_holder(nullptr);
int len = MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), nullptr, 0);
wchar_t * p = new wchar_t[len];
MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), p, len);
return StdStrBuf::wchar_t_holder(p);
}
StdBuf StdStrBuf::GetWideCharBuf()
{
int len = MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), nullptr, 0);
StdBuf r; r.SetSize(len * sizeof(wchar_t));
MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), getMBufPtr<wchar_t>(r), len);
return r;
}
StdStrBuf::wchar_t_holder GetWideChar(const char * utf8, bool double_null_terminate)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, nullptr, 0);
if (double_null_terminate) ++len;
wchar_t * p = new wchar_t[len];
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, p, len);
if (double_null_terminate) p[len - 1] = wchar_t(0);
return StdStrBuf::wchar_t_holder(p);
}
StdBuf GetWideCharBuf(const char * utf8)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, nullptr, 0);
StdBuf r; r.SetSize(len * sizeof(wchar_t));
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, getMBufPtr<wchar_t>(r), len);
return r;
}
#endif
void StdStrBuf::Format(const char *szFmt, ...)
{
// Create argument list
va_list args; va_start(args, szFmt);
// Format
FormatV(szFmt, args);
}
void StdStrBuf::FormatV(const char *szFmt, va_list args)
{
// Clear previous contents
Clear();
// Format
AppendFormatV(szFmt, args);
}
void StdStrBuf::AppendFormat(const char *szFmt, ...)
{
// Create argument list
va_list args; va_start(args, szFmt);
// Format
AppendFormatV(szFmt, args);
}
void StdStrBuf::AppendFormatV(const char *szFmt, va_list args)
{
#ifdef HAVE_VASPRINTF
// Format
char *pStr; int iBytes = vasprintf(&pStr, szFmt, args);
if (iBytes < 0 || !pStr) return;
// Append
if (isNull())
Take(pStr, iBytes);
else
{
Append(pStr, iBytes);
free(pStr);
}
#elif defined(HAVE_VSCPRINTF)
// Save append start
int iStart = getLength();
// Calculate size, allocate
int iLength = vscprintf(szFmt, args);
Grow(iLength);
// Format
char *pPos = getMElem<char>(*this, iSize - iLength - 1);
vsprintf(getMPtr(iStart), szFmt, args);
#else
// Save append start
int iStart = getLength(), iBytes;
do
{
// Grow
Grow(512);
// Try output
va_list args_copy;
#ifdef va_copy
va_copy(args_copy, args);
#else
args_copy = args;
#endif
iBytes = vsnprintf(getMPtr(iStart), getLength() - iStart, szFmt, args_copy);
#ifdef va_copy
va_end(args_copy);
#endif
}
while (iBytes < 0 || (unsigned int)(iBytes) >= getLength() - iStart);
// Calculate real length, if vsnprintf didn't return anything of value
iBytes = strlen(getMPtr(iStart));
// Shrink to fit
SetSize(iStart + iBytes + 1);
#endif
}
void StdStrBuf::AppendBackslash()
{
if(getLength() && *getPtr(getLength() - 1) == DirectorySeparator) return;
AppendChar(DirectorySeparator);
}
void StdStrBuf::CompileFunc(StdCompiler *pComp, int iRawType)
{
if (pComp->isDeserializer())
{
char *pnData;
pComp->String(&pnData, StdCompiler::RawCompileType(iRawType));
Take(pnData);
}
else
{
char *pData = const_cast<char *>(getData());
if (!pData) pData = const_cast<char *>("");
pComp->String(&pData, StdCompiler::RawCompileType(iRawType));
}
}
StdStrBuf FormatString(const char *szFmt, ...)
{
va_list args; va_start(args, szFmt);
return FormatStringV(szFmt, args);
}
StdStrBuf FormatStringV(const char *szFmt, va_list args)
{
StdStrBuf Buf;
Buf.FormatV(szFmt, args);
return Buf;
}
// replace all occurences of one string with another. Return number of replacements.
int StdStrBuf::Replace(const char *szOld, const char *szNew, size_t iStartSearch)
{
if (!getPtr(0) || !szOld) return 0;
if (!szNew) szNew = "";
int cnt=0;
size_t iOldLen = strlen(szOld), iNewLen = strlen(szNew);
if (iOldLen != iNewLen)
{
// count number of occurences to calculate new string length
size_t iResultLen = getLength();
const char *szPos = getPtr(iStartSearch);
while ((szPos = SSearch(szPos, szOld)))
{
iResultLen += iNewLen - iOldLen;
++cnt;
}
if (!cnt) return 0;
// now construct new string by replacement
StdStrBuf sResult;
sResult.New(iResultLen+1);
const char *szRPos = getPtr(0), *szRNextPos;
char *szWrite = sResult.getMPtr(0);
if (iStartSearch)
{
memcpy(szWrite, szRPos, iStartSearch * sizeof(char));
szRPos += iStartSearch;
szWrite += iStartSearch;
}
while ((szRNextPos = SSearch(szRPos, szOld)))
{
memcpy(szWrite, szRPos, (szRNextPos - szRPos - iOldLen) * sizeof(char));
szWrite += (szRNextPos - szRPos - iOldLen);
memcpy(szWrite, szNew, iNewLen * sizeof(char));
szWrite += iNewLen;
szRPos = szRNextPos;
}
strcpy(szWrite, szRPos);
Take(std::move(sResult));
}
else
{
// replace directly in this string
char *szRPos = getMPtr(iStartSearch);
while ((szRPos = const_cast<char *>(SSearch(szRPos, szOld))))
{
memcpy(szRPos - iOldLen, szNew, iOldLen * sizeof(char));
++cnt;
}
}
return cnt;
}
int StdStrBuf::ReplaceChar(char cOld, char cNew)
{
if (isNull()) return 0;
char *szPos = getMPtr(0);
if (!cOld) return 0;
if (!cNew) cNew = '_';
int cnt=0;
while ((szPos = strchr(szPos, cOld)))
{
*szPos++ = cNew;
++cnt;
}
return cnt;
}
void StdStrBuf::ReplaceEnd(size_t iPos, const char *szNewEnd)
{
size_t iLen = getLength();
assert(iPos <= iLen); if (iPos > iLen) return;
size_t iEndLen = strlen(szNewEnd);
if (iLen - iPos != iEndLen) SetLength(iPos + iEndLen);
memcpy(getMPtr(iPos), szNewEnd, iEndLen * sizeof(char));
}
bool StdStrBuf::ValidateChars(const char *szInitialChars, const char *szMidChars)
{
// only given chars may be in string
for (size_t i=0; i<getLength(); ++i)
if (!strchr(i ? szMidChars : szInitialChars, getData()[i]))
return false;
return true;
}
bool StdStrBuf::GetSection(size_t idx, StdStrBuf *psOutSection, char cSeparator) const
{
assert(psOutSection);
psOutSection->Clear();
const char *szStr = getData(), *szSepPos;
if (!szStr) return false; // invaid argument
while ((szSepPos = strchr(szStr, cSeparator)) && idx) { szStr = szSepPos+1; --idx; }
if (idx) return false; // indexed section not found
// fill output buffer with section, if not empty
if (!szSepPos) szSepPos = getData() + getLength();
if (szSepPos != szStr) psOutSection->Copy(szStr, szSepPos - szStr);
// return true even if section is empty, because the section obviously exists
// (to enable loops like while (buf.GetSection(i++, &sect)) if (sect) ...)
return true;
}
void StdStrBuf::ToLowerCase()
{
if (!isNull())
for (char *szPos = getMPtr(0); *szPos; ++szPos)
*szPos = tolower(*szPos);
}
void StdStrBuf::AppendCharacter(uint32_t unicodechar)
{
if (unicodechar < 0x80)
AppendChar(unicodechar);
else if (unicodechar < 0x800)
{
Grow(2);
*getMPtr(getLength() - 2) = (0xC0 | (unicodechar >> 6));
*getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
}
else if (unicodechar < 0x10000)
{
Grow(3);
*getMPtr(getLength() - 3) = (0xE0 | (unicodechar >> 12));
*getMPtr(getLength() - 2) = (0x80 | ((unicodechar >> 6) & 0x3F));
*getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
}
else if (unicodechar < 0x110000)
{
Grow(4);
*getMPtr(getLength() - 4) = (0xF0 | (unicodechar >> 18));
*getMPtr(getLength() - 3) = (0x80 | ((unicodechar >> 12) & 0x3F));
*getMPtr(getLength() - 2) = (0x80 | ((unicodechar >> 6) & 0x3F));
*getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
}
else /* not an unicode code point, ignore */ {}
}
// Returns true if charset was converted.
bool StdStrBuf::EnsureUnicode()
{
// assume that it's windows-1252 and convert to utf-8
if (!IsValidUtf8(getData(), getLength()))
{
size_t j = 0;
StdStrBuf buf;
buf.Grow(getLength());
// totally unfounded statistic: most texts have less than 20 umlauts.
enum { GROWSIZE = 20 };
for (size_t i = 0; i < getSize(); ++i)
{
unsigned char c = *getPtr(i);
// ASCII
if (c < 0x80)
{
if (j >= buf.getLength())
buf.Grow(GROWSIZE);
*buf.getMPtr(j++) = c;
continue;
}
// Is c one of the control characters only in ISO/IEC_8859-1 or part of the common subset with windows-1252?
if (c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D || c >= 0xA0)
{
if (j + 1 >= buf.getLength())
buf.Grow(GROWSIZE);
*buf.getMPtr(j++) = (0xC0 | (c >> 6));
*buf.getMPtr(j++) = (0x80 | (c & 0x3F));
continue;
}
// Extra windows-1252-characters
buf.SetLength(j);
static const char * extra_chars [] =
{
//"€", 0, "", "ƒ", "„", "…", "†", "‡", "ˆ", "‰", "Š", "", "Œ", 0, "Ž", 0,
// 0, "", "", "“", "”", "•", "", "—", "˜", "™", "š", "", "œ", 0, "ž", "Ÿ" };
"\xe2\x82\xac", nullptr, "\xe2\x80\x9a", "\xc6\x92", "\xe2\x80\x9e", "\xe2\x80\xa6", "\xe2\x80\xa0", "\xe2\x80\xa1", "\xcb\x86", "\xe2\x80\xb0", "\xc5\xa0", "\xe2\x80\xb9", "\xc5\x92", nullptr, "\xc5\xbd", nullptr,
nullptr, "\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c", "\xe2\x80\x9d", "\xe2\x80\xa2", "\xe2\x80\x93", "\xe2\x80\x94", "\xcb\x9c", "\xe2\x84\xa2", "\xc5\xa1", "\xe2\x80\xba", "\xc5\x93", nullptr, "\xc5\xbe", "\xc5\xb8"
};
buf.Append(extra_chars[c - 0x80]);
j += strlen(extra_chars[c - 0x80]);
}
buf.SetLength(j);
Take(std::move(buf));
return true;
}
return false;
}
bool StdStrBuf::TrimSpaces()
{
// get left trim
int32_t iSpaceLeftCount = 0, iLength = getLength();
if (!iLength) return false;
const char *szStr = getData();
while (iSpaceLeftCount < iLength)
if (isspace((unsigned char)(unsigned char) szStr[iSpaceLeftCount]))
++iSpaceLeftCount;
else
break;
// only spaces? Clear!
if (iSpaceLeftCount == iLength)
{
Clear();
return true;
}
// get right trim
int32_t iSpaceRightCount = 0;
while (isspace((unsigned char)szStr[iLength - 1 - iSpaceRightCount])) ++iSpaceRightCount;
// anything to trim?
if (!iSpaceLeftCount && !iSpaceRightCount) return false;
// only right trim? Can do this by shortening
if (!iSpaceLeftCount)
{
SetLength(iLength - iSpaceRightCount);
return true;
}
// left trim involved - move text and shorten
memmove(getMPtr(0), szStr+iSpaceLeftCount, iLength - iSpaceLeftCount - iSpaceRightCount);
SetLength(iLength - iSpaceLeftCount - iSpaceRightCount);
return true;
}
#ifdef _WIN32
std::string WStrToString(wchar_t *ws)
{
int len = WideCharToMultiByte(CP_UTF8, 0, ws, -1, nullptr, 0, nullptr, nullptr);
assert(len >= 0);
if (len <= 0) return std::string{};
std::string s(static_cast<size_t>(len), '\0');
s.resize(WideCharToMultiByte(CP_UTF8, 0, ws, -1, &s[0], s.size(), nullptr, nullptr) - 1);
return s;
}
#endif