openclonk/src/lib/StdBuf.cpp

514 lines
14 KiB
C++
Raw Permalink Normal View History

2009-05-08 13:28:41 +00:00
/*
* OpenClonk, http://www.openclonk.org
*
* Copyright (c) 2001-2009, RedWolf Design GmbH, http://www.clonk.de/
* Copyright (c) 2009-2013, The OpenClonk Team and contributors
2009-05-08 13:28:41 +00:00
*
* Distributed under the terms of the ISC license; see accompanying file
* "COPYING" for details.
2009-05-08 13:28:41 +00:00
*
* "Clonk" is a registered trademark of Matthes Bender, used with permission.
* See accompanying file "TRADEMARK" for details.
2009-05-08 13:28:41 +00:00
*
* To redistribute this file separately, substitute the full license texts
* for the above references.
2009-05-08 13:28:41 +00:00
*/
2009-10-20 03:39:24 +00:00
#include "C4Include.h"
2009-05-08 13:28:41 +00:00
#include <StdBuf.h>
#include <StdCompiler.h>
#include <StdAdaptors.h>
#include <StdFile.h>
#include <stdarg.h>
#include <stdio.h>
#ifdef _WIN32
#include <io.h>
#include <C4windowswrapper.h>
2009-05-08 13:28:41 +00:00
#define vsnprintf _vsnprintf
#else
#define O_BINARY 0
#define O_SEQUENTIAL 0
#include <unistd.h>
#include <stdlib.h>
#endif
#include <ctype.h>
#include <fcntl.h>
#include <sys/stat.h>
#if !defined(HAVE_VASPRINTF) && defined(HAVE___MINGW_VASPRINTF)
// MinGW declares a vasprintf-compatible function as __mingw_vasprintf.
// Rename it for our use.
#define vasprintf __mingw_vasprintf
#define HAVE_VASPRINTF
#endif
2009-05-08 13:28:41 +00:00
// *** StdBuf
bool StdBuf::LoadFromFile(const char *szFile)
{
// Open file
#ifdef _WIN32
int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
#else
int fh = open(szFile, O_BINARY | O_CLOEXEC | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
#endif
2010-03-28 18:58:01 +00:00
if (fh < 0) return false;
// Create buf
New(FileSize(fh));
// Read
2010-03-28 18:58:01 +00:00
if (read(fh, getMData(), getSize()) != (signed int) getSize())
{
close(fh);
return false;
}
close(fh);
// Ok
return true;
2009-05-08 13:28:41 +00:00
}
bool StdBuf::SaveToFile(const char *szFile) const
{
// Open file
#ifdef _WIN32
int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
#else
int fh = open(szFile, O_BINARY | O_CLOEXEC | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
#endif
2010-03-28 18:58:01 +00:00
if (fh < 0) return false;
// Write data
2010-03-28 18:58:01 +00:00
if (write(fh, getData(), getSize()) != (signed int) getSize())
{
close(fh);
return false;
}
close(fh);
// Ok
return true;
2009-05-08 13:28:41 +00:00
}
bool StdStrBuf::LoadFromFile(const char *szFile)
2009-05-08 13:28:41 +00:00
{
// Open file
#ifdef _WIN32
int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
#else
int fh = open(szFile, O_BINARY | O_CLOEXEC | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
#endif
2010-03-28 18:58:01 +00:00
if (fh < 0) return false;
// Create buf
SetLength(FileSize(fh));
// Read
2010-03-28 18:58:01 +00:00
if (read(fh, getMData(), getLength()) != (ssize_t) getLength())
{
close(fh);
return false;
}
close(fh);
// Ok
return true;
2009-05-08 13:28:41 +00:00
}
bool StdStrBuf::SaveToFile(const char *szFile) const
{
// Open file
#ifdef _WIN32
int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
#else
int fh = open(szFile, O_BINARY | O_CLOEXEC | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
#endif
2010-03-28 18:58:01 +00:00
if (fh < 0) return false;
// Write data
2010-03-28 18:58:01 +00:00
if (write(fh, getData(), getLength()) != (ssize_t) getLength())
{
close(fh);
return false;
}
close(fh);
// Ok
return true;
}
2009-05-08 13:28:41 +00:00
void StdBuf::CompileFunc(StdCompiler *pComp, int iType)
{
// Size (guess it is a small value most of the time - if it's big, an extra byte won't hurt anyway)
uint32_t tmp = iSize; pComp->Value(mkIntPackAdapt(tmp)); iSize = tmp;
2010-04-01 21:08:06 +00:00
pComp->Separator(StdCompiler::SEP_PART2);
// Read/write data
2010-03-28 18:58:01 +00:00
if (pComp->isCompiler())
{
New(iSize);
pComp->Raw(getMData(), iSize, StdCompiler::RawCompileType(iType));
}
else
{
pComp->Raw(const_cast<void *>(getData()), iSize, StdCompiler::RawCompileType(iType));
}
2009-05-08 13:28:41 +00:00
}
// *** StdStringBuf
#ifdef _WIN32
StdStrBuf::StdStrBuf(const wchar_t * utf16)
{
int len = WideCharToMultiByte(CP_UTF8, 0, utf16, -1, NULL, 0, 0, 0);
SetSize(len);
WideCharToMultiByte(CP_UTF8, 0, utf16, -1, getMData(), getSize(), 0, 0);
}
2011-10-09 18:11:39 +00:00
StdStrBuf::wchar_t_holder StdStrBuf::GetWideChar() const
{
if (!getSize()) return StdStrBuf::wchar_t_holder(NULL);
int len = MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), NULL, 0);
wchar_t * p = new wchar_t[len];
MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), p, len);
return StdStrBuf::wchar_t_holder(p);
}
StdBuf StdStrBuf::GetWideCharBuf()
{
int len = MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), NULL, 0);
StdBuf r; r.SetSize(len * sizeof(wchar_t));
MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), getMBufPtr<wchar_t>(r), len);
return r;
}
StdStrBuf::wchar_t_holder GetWideChar(const char * utf8)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
wchar_t * p = new wchar_t[len];
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, p, len);
return StdStrBuf::wchar_t_holder(p);
}
StdBuf GetWideCharBuf(const char * utf8)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
StdBuf r; r.SetSize(len * sizeof(wchar_t));
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, getMBufPtr<wchar_t>(r), len);
return r;
}
#endif
2009-05-08 13:28:41 +00:00
void StdStrBuf::Format(const char *szFmt, ...)
{
// Create argument list
va_list args; va_start(args, szFmt);
2009-05-08 13:28:41 +00:00
// Format
FormatV(szFmt, args);
}
void StdStrBuf::FormatV(const char *szFmt, va_list args)
{
// Clear previous contents
Clear();
// Format
AppendFormatV(szFmt, args);
}
void StdStrBuf::AppendFormat(const char *szFmt, ...)
{
// Create argument list
va_list args; va_start(args, szFmt);
2009-05-08 13:28:41 +00:00
// Format
AppendFormatV(szFmt, args);
}
void StdStrBuf::AppendFormatV(const char *szFmt, va_list args)
{
#ifdef HAVE_VASPRINTF
// Format
char *pStr; int iBytes = vasprintf(&pStr, szFmt, args);
2010-03-28 18:58:01 +00:00
if (iBytes < 0 || !pStr) return;
2009-05-08 13:28:41 +00:00
// Append
2010-03-28 18:58:01 +00:00
if (isNull())
2009-05-08 13:28:41 +00:00
Take(pStr, iBytes);
else
{
Append(pStr, iBytes);
free(pStr);
}
#elif defined(HAVE_VSCPRINTF)
// Save append start
int iStart = getLength();
// Calculate size, allocate
int iLength = vscprintf(szFmt, args);
Grow(iLength);
// Format
char *pPos = getMElem<char>(*this, iSize - iLength - 1);
vsprintf(getMPtr(iStart), szFmt, args);
2009-05-08 13:28:41 +00:00
#else
// Save append start
int iStart = getLength(), iBytes;
do
{
// Grow
Grow(512);
// Try output
va_list args_copy;
#ifdef va_copy
va_copy(args_copy, args);
#else
args_copy = args;
#endif
iBytes = vsnprintf(getMPtr(iStart), getLength() - iStart, szFmt, args_copy);
#ifdef va_copy
va_end(args_copy);
#endif
2009-05-08 13:28:41 +00:00
}
2010-03-28 18:58:01 +00:00
while (iBytes < 0 || (unsigned int)(iBytes) >= getLength() - iStart);
2009-05-08 13:28:41 +00:00
// Calculate real length, if vsnprintf didn't return anything of value
iBytes = strlen(getMPtr(iStart));
// Shrink to fit
SetSize(iStart + iBytes + 1);
#endif
}
void StdStrBuf::AppendBackslash()
{
if(getLength() && *getPtr(getLength() - 1) == DirectorySeparator) return;
AppendChar(DirectorySeparator);
}
2009-05-08 13:28:41 +00:00
void StdStrBuf::CompileFunc(StdCompiler *pComp, int iRawType)
{
2010-03-28 18:58:01 +00:00
if (pComp->isCompiler())
{
char *pnData;
pComp->String(&pnData, StdCompiler::RawCompileType(iRawType));
Take(pnData);
}
else
{
char *pData = const_cast<char *>(getData());
if (!pData) pData = const_cast<char *>("");
pComp->String(&pData, StdCompiler::RawCompileType(iRawType));
}
2009-05-08 13:28:41 +00:00
}
StdStrBuf FormatString(const char *szFmt, ...)
{
va_list args; va_start(args, szFmt);
return FormatStringV(szFmt, args);
2009-05-08 13:28:41 +00:00
}
StdStrBuf FormatStringV(const char *szFmt, va_list args)
{
StdStrBuf Buf;
Buf.FormatV(szFmt, args);
return Buf;
2009-05-08 13:28:41 +00:00
}
// replace all occurences of one string with another. Return number of replacements.
int StdStrBuf::Replace(const char *szOld, const char *szNew, size_t iStartSearch)
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
if (!getPtr(0) || !szOld) return 0;
if (!szNew) szNew = "";
int cnt=0;
size_t iOldLen = strlen(szOld), iNewLen = strlen(szNew);
if (iOldLen != iNewLen)
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
// count number of occurences to calculate new string length
size_t iResultLen = getLength();
const char *szPos = getPtr(iStartSearch);
2010-01-25 04:00:59 +00:00
while ((szPos = SSearch(szPos, szOld)))
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
iResultLen += iNewLen - iOldLen;
++cnt;
2010-03-28 18:58:01 +00:00
}
2009-05-08 13:28:41 +00:00
if (!cnt) return 0;
// now construct new string by replacement
StdStrBuf sResult;
sResult.New(iResultLen+1);
const char *szRPos = getPtr(0), *szRNextPos;
char *szWrite = sResult.getMPtr(0);
if (iStartSearch)
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
memcpy(szWrite, szRPos, iStartSearch * sizeof(char));
szRPos += iStartSearch;
szWrite += iStartSearch;
2010-03-28 18:58:01 +00:00
}
2010-01-25 04:00:59 +00:00
while ((szRNextPos = SSearch(szRPos, szOld)))
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
memcpy(szWrite, szRPos, (szRNextPos - szRPos - iOldLen) * sizeof(char));
szWrite += (szRNextPos - szRPos - iOldLen);
memcpy(szWrite, szNew, iNewLen * sizeof(char));
szWrite += iNewLen;
szRPos = szRNextPos;
2010-03-28 18:58:01 +00:00
}
2009-05-08 13:28:41 +00:00
strcpy(szWrite, szRPos);
2009-11-25 18:38:54 +00:00
Take(std::move(sResult));
2010-03-28 18:58:01 +00:00
}
2009-05-08 13:28:41 +00:00
else
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
// replace directly in this string
char *szRPos = getMPtr(iStartSearch);
2010-01-25 04:00:59 +00:00
while ((szRPos = const_cast<char *>(SSearch(szRPos, szOld))))
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
memcpy(szRPos - iOldLen, szNew, iOldLen * sizeof(char));
++cnt;
}
}
2010-03-28 18:58:01 +00:00
return cnt;
}
2009-05-08 13:28:41 +00:00
int StdStrBuf::ReplaceChar(char cOld, char cNew)
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
if (isNull()) return 0;
char *szPos = getMPtr(0);
if (!cOld) return 0;
if (!cNew) cNew = '_';
int cnt=0;
2010-01-25 04:00:59 +00:00
while ((szPos = strchr(szPos, cOld)))
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
*szPos++ = cNew;
++cnt;
}
2010-03-28 18:58:01 +00:00
return cnt;
}
2009-05-08 13:28:41 +00:00
void StdStrBuf::ReplaceEnd(size_t iPos, const char *szNewEnd)
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
size_t iLen = getLength();
assert(iPos <= iLen); if (iPos > iLen) return;
size_t iEndLen = strlen(szNewEnd);
if (iLen - iPos != iEndLen) SetLength(iPos + iEndLen);
memcpy(getMPtr(iPos), szNewEnd, iEndLen * sizeof(char));
2010-03-28 18:58:01 +00:00
}
2009-05-08 13:28:41 +00:00
bool StdStrBuf::ValidateChars(const char *szInitialChars, const char *szMidChars)
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
// only given chars may be in string
for (size_t i=0; i<getLength(); ++i)
if (!strchr(i ? szMidChars : szInitialChars, getData()[i]))
return false;
return true;
2010-03-28 18:58:01 +00:00
}
2009-05-08 13:28:41 +00:00
bool StdStrBuf::GetSection(size_t idx, StdStrBuf *psOutSection, char cSeparator) const
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
assert(psOutSection);
psOutSection->Clear();
const char *szStr = getData(), *szSepPos;
if (!szStr) return false; // invaid argument
while ((szSepPos = strchr(szStr, cSeparator)) && idx) { szStr = szSepPos+1; --idx; }
if (idx) return false; // indexed section not found
// fill output buffer with section, if not empty
if (!szSepPos) szSepPos = getData() + getLength();
if (szSepPos != szStr) psOutSection->Copy(szStr, szSepPos - szStr);
// return true even if section is empty, because the section obviously exists
// (to enable loops like while (buf.GetSection(i++, &sect)) if (sect) ...)
return true;
2010-03-28 18:58:01 +00:00
}
2009-05-08 13:28:41 +00:00
void StdStrBuf::ToLowerCase()
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
if (!isNull())
for (char *szPos = getMPtr(0); *szPos; ++szPos)
*szPos = tolower(*szPos);
2010-03-28 18:58:01 +00:00
}
2009-05-08 13:28:41 +00:00
void StdStrBuf::AppendCharacter(uint32_t unicodechar)
{
if (unicodechar < 0x80)
AppendChar(unicodechar);
else if (unicodechar < 0x800)
{
Grow(2);
*getMPtr(getLength() - 2) = (0xC0 | (unicodechar >> 6));
*getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
}
else if (unicodechar < 0x10000)
{
Grow(3);
*getMPtr(getLength() - 3) = (0xE0 | (unicodechar >> 12));
*getMPtr(getLength() - 2) = (0x80 | ((unicodechar >> 6) & 0x3F));
*getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
}
else if (unicodechar < 0x110000)
{
Grow(4);
*getMPtr(getLength() - 4) = (0xF0 | (unicodechar >> 18));
*getMPtr(getLength() - 3) = (0x80 | ((unicodechar >> 12) & 0x3F));
*getMPtr(getLength() - 2) = (0x80 | ((unicodechar >> 6) & 0x3F));
*getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
}
else /* not an unicode code point, ignore */ {}
}
2009-05-08 13:28:41 +00:00
void StdStrBuf::EnsureUnicode()
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
// assume that it's windows-1252 and convert to utf-8
if (!IsValidUtf8(getData(), getLength()))
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
size_t j = 0;
StdStrBuf buf;
buf.Grow(getLength());
// totally unfounded statistic: most texts have less than 20 umlauts.
enum { GROWSIZE = 20 };
for (size_t i = 0; i < getSize(); ++i)
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
unsigned char c = *getPtr(i);
// ASCII
if (c < 0x80)
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
if (j >= buf.getLength())
buf.Grow(GROWSIZE);
*buf.getMPtr(j++) = c;
continue;
2010-03-28 18:58:01 +00:00
}
2009-05-08 13:28:41 +00:00
// Is c one of the control characters only in ISO/IEC_8859-1 or part of the common subset with windows-1252?
if (c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D || c >= 0xA0)
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
if (j + 1 >= buf.getLength())
buf.Grow(GROWSIZE);
*buf.getMPtr(j++) = (0xC0 | (c >> 6));
*buf.getMPtr(j++) = (0x80 | (c & 0x3F));
2009-05-08 13:28:41 +00:00
continue;
2010-03-28 18:58:01 +00:00
}
2009-05-08 13:28:41 +00:00
// Extra windows-1252-characters
buf.SetLength(j);
2010-03-28 18:58:01 +00:00
static const char * extra_chars [] =
{
//"€", 0, "", "ƒ", "„", "…", "†", "‡", "ˆ", "‰", "Š", "", "Œ", 0, "Ž", 0,
// 0, "", "", "“", "”", "•", "", "—", "˜", "™", "š", "", "œ", 0, "ž", "Ÿ" };
"\xe2\x82\xac", 0, "\xe2\x80\x9a", "\xc6\x92", "\xe2\x80\x9e", "\xe2\x80\xa6", "\xe2\x80\xa0", "\xe2\x80\xa1", "\xcb\x86", "\xe2\x80\xb0", "\xc5\xa0", "\xe2\x80\xb9", "\xc5\x92", 0, "\xc5\xbd", 0,
0, "\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c", "\xe2\x80\x9d", "\xe2\x80\xa2", "\xe2\x80\x93", "\xe2\x80\x94", "\xcb\x9c", "\xe2\x84\xa2", "\xc5\xa1", "\xe2\x80\xba", "\xc5\x93", 0, "\xc5\xbe", "\xc5\xb8"
};
2009-05-08 13:28:41 +00:00
buf.Append(extra_chars[c - 0x80]);
j += strlen(extra_chars[c - 0x80]);
2010-03-28 18:58:01 +00:00
}
2009-05-08 13:28:41 +00:00
buf.SetLength(j);
2009-11-25 18:38:54 +00:00
Take(std::move(buf));
2009-05-08 13:28:41 +00:00
}
2010-03-28 18:58:01 +00:00
}
2009-05-08 13:28:41 +00:00
bool StdStrBuf::TrimSpaces()
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
// get left trim
int32_t iSpaceLeftCount = 0, iLength = getLength();
if (!iLength) return false;
const char *szStr = getData();
while (iSpaceLeftCount < iLength)
if (isspace((unsigned char)(unsigned char) szStr[iSpaceLeftCount]))
++iSpaceLeftCount;
else
break;
// only spaces? Clear!
if (iSpaceLeftCount == iLength)
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
Clear();
return true;
2010-03-28 18:58:01 +00:00
}
2009-05-08 13:28:41 +00:00
// get right trim
int32_t iSpaceRightCount = 0;
while (isspace((unsigned char)szStr[iLength - 1 - iSpaceRightCount])) ++iSpaceRightCount;
// anything to trim?
if (!iSpaceLeftCount && !iSpaceRightCount) return false;
// only right trim? Can do this by shortening
if (!iSpaceLeftCount)
2010-03-28 18:58:01 +00:00
{
2009-05-08 13:28:41 +00:00
SetLength(iLength - iSpaceRightCount);
return true;
2010-03-28 18:58:01 +00:00
}
2009-05-08 13:28:41 +00:00
// left trim involved - move text and shorten
memmove(getMPtr(0), szStr+iSpaceLeftCount, iLength - iSpaceLeftCount - iSpaceRightCount);
SetLength(iLength - iSpaceLeftCount - iSpaceRightCount);
return true;
2010-03-28 18:58:01 +00:00
}