openclonk/src/lib/StdBuf.cpp

476 lines
12 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/*
* OpenClonk, http://www.openclonk.org
*
* Copyright (c) 2005, 2007 Sven Eberhardt
* Copyright (c) 2005 Peter Wortmann
* Copyright (c) 2005, 2007-2008 Günther Brammer
* Copyright (c) 2009 Nicolas Hake
* Copyright (c) 2001-2009, RedWolf Design GmbH, http://www.clonk.de
*
* Portions might be copyrighted by other authors who have contributed
* to OpenClonk.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
* See isc_license.txt for full license and disclaimer.
*
* "Clonk" is a registered trademark of Matthes Bender.
* See clonk_trademark_license.txt for full license.
*/
#include "C4Include.h"
#include <StdBuf.h>
#include <StdCompiler.h>
#include <StdAdaptors.h>
#include <StdFile.h>
#include <stdarg.h>
#include <stdio.h>
#ifdef _WIN32
#include <io.h>
#include <C4windowswrapper.h>
#define vsnprintf _vsnprintf
#else
#define O_BINARY 0
#define O_SEQUENTIAL 0
#include <unistd.h>
#include <stdlib.h>
#endif
#include <ctype.h>
#include <fcntl.h>
#include <sys/stat.h>
// *** StdBuf
bool StdBuf::LoadFromFile(const char *szFile)
{
// Open file
int fh = open(szFile, O_BINARY | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
if (fh < 0) return false;
// Create buf
New(FileSize(fh));
// Read
if (read(fh, getMData(), getSize()) != (signed int) getSize())
{
close(fh);
return false;
}
close(fh);
// Ok
return true;
}
bool StdBuf::SaveToFile(const char *szFile) const
{
// Open file
int fh = open(szFile, O_BINARY | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
if (fh < 0) return false;
// Write data
if (write(fh, getData(), getSize()) != (signed int) getSize())
{
close(fh);
return false;
}
close(fh);
// Ok
return true;
}
bool StdStrBuf::LoadFromFile(const char *szFile)
{
// Open file
int fh = open(szFile, O_BINARY | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
if (fh < 0) return false;
// Create buf
SetLength(FileSize(fh));
// Read
if (read(fh, getMData(), getLength()) != (ssize_t) getLength())
{
close(fh);
return false;
}
close(fh);
// Ok
return true;
}
bool StdStrBuf::SaveToFile(const char *szFile) const
{
// Open file
int fh = open(szFile, O_BINARY | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
if (fh < 0) return false;
// Write data
if (write(fh, getData(), getLength()) != (ssize_t) getLength())
{
close(fh);
return false;
}
close(fh);
// Ok
return true;
}
void StdBuf::CompileFunc(StdCompiler *pComp, int iType)
{
// Size (guess it is a small value most of the time - if it's big, an extra byte won't hurt anyway)
uint32_t tmp = iSize; pComp->Value(mkIntPackAdapt(tmp)); iSize = tmp;
pComp->Separator(StdCompiler::SEP_PART2);
// Read/write data
if (pComp->isCompiler())
{
New(iSize);
pComp->Raw(getMData(), iSize, StdCompiler::RawCompileType(iType));
}
else
{
pComp->Raw(const_cast<void *>(getData()), iSize, StdCompiler::RawCompileType(iType));
}
}
// *** StdStringBuf
#ifdef _WIN32
StdStrBuf::StdStrBuf(const wchar_t * utf16)
{
int len = WideCharToMultiByte(CP_UTF8, 0, utf16, -1, NULL, 0, 0, 0);
SetSize(len);
WideCharToMultiByte(CP_UTF8, 0, utf16, -1, getMData(), getSize(), 0, 0);
}
#endif
void StdStrBuf::Format(const char *szFmt, ...)
{
// Create argument list
va_list args; va_start(args, szFmt);
// Format
FormatV(szFmt, args);
}
void StdStrBuf::FormatV(const char *szFmt, va_list args)
{
// Clear previous contents
Clear();
// Format
AppendFormatV(szFmt, args);
}
void StdStrBuf::AppendFormat(const char *szFmt, ...)
{
// Create argument list
va_list args; va_start(args, szFmt);
// Format
AppendFormatV(szFmt, args);
}
void StdStrBuf::AppendFormatV(const char *szFmt, va_list args)
{
#ifdef HAVE_VASPRINTF
// Format
char *pStr; int iBytes = vasprintf(&pStr, szFmt, args);
if (iBytes < 0 || !pStr) return;
// Append
if (isNull())
Take(pStr, iBytes);
else
{
Append(pStr, iBytes);
free(pStr);
}
#elif defined(HAVE_VSCPRINTF)
// Save append start
int iStart = getLength();
// Calculate size, allocate
int iLength = vscprintf(szFmt, args);
Grow(iLength);
// Format
char *pPos = getMElem<char>(*this, iSize - iLength - 1);
vsprintf(getMPtr(iStart), szFmt, args);
#else
// Save append start
int iStart = getLength(), iBytes;
do
{
// Grow
Grow(512);
// Try output
iBytes = vsnprintf(getMPtr(iStart), getLength() - iStart, szFmt, args);
}
while (iBytes < 0 || (unsigned int)(iBytes) >= getLength() - iStart);
// Calculate real length, if vsnprintf didn't return anything of value
iBytes = strlen(getMPtr(iStart));
// Shrink to fit
SetSize(iStart + iBytes + 1);
#endif
}
void StdStrBuf::AppendBackslash()
{
if(getLength() && *getPtr(getLength() - 1) == DirectorySeparator) return;
AppendChar(DirectorySeparator);
}
void StdStrBuf::CompileFunc(StdCompiler *pComp, int iRawType)
{
if (pComp->isCompiler())
{
char *pnData;
pComp->String(&pnData, StdCompiler::RawCompileType(iRawType));
Take(pnData);
}
else
{
char *pData = const_cast<char *>(getData());
if (!pData) pData = const_cast<char *>("");
pComp->String(&pData, StdCompiler::RawCompileType(iRawType));
}
}
StdStrBuf FormatString(const char *szFmt, ...)
{
va_list args; va_start(args, szFmt);
return FormatStringV(szFmt, args);
}
StdStrBuf FormatStringV(const char *szFmt, va_list args)
{
StdStrBuf Buf;
Buf.FormatV(szFmt, args);
return Buf;
}
// replace all occurences of one string with another. Return number of replacements.
int StdStrBuf::Replace(const char *szOld, const char *szNew, size_t iStartSearch)
{
if (!getPtr(0) || !szOld) return 0;
if (!szNew) szNew = "";
int cnt=0;
size_t iOldLen = strlen(szOld), iNewLen = strlen(szNew);
if (iOldLen != iNewLen)
{
// count number of occurences to calculate new string length
size_t iResultLen = getLength();
const char *szPos = getPtr(iStartSearch);
while ((szPos = SSearch(szPos, szOld)))
{
iResultLen += iNewLen - iOldLen;
++cnt;
}
if (!cnt) return 0;
// now construct new string by replacement
StdStrBuf sResult;
sResult.New(iResultLen+1);
const char *szRPos = getPtr(0), *szRNextPos;
char *szWrite = sResult.getMPtr(0);
if (iStartSearch)
{
memcpy(szWrite, szRPos, iStartSearch * sizeof(char));
szRPos += iStartSearch;
szWrite += iStartSearch;
}
while ((szRNextPos = SSearch(szRPos, szOld)))
{
memcpy(szWrite, szRPos, (szRNextPos - szRPos - iOldLen) * sizeof(char));
szWrite += (szRNextPos - szRPos - iOldLen);
memcpy(szWrite, szNew, iNewLen * sizeof(char));
szWrite += iNewLen;
szRPos = szRNextPos;
}
strcpy(szWrite, szRPos);
Take(std::move(sResult));
}
else
{
// replace directly in this string
char *szRPos = getMPtr(iStartSearch);
while ((szRPos = const_cast<char *>(SSearch(szRPos, szOld))))
{
memcpy(szRPos - iOldLen, szNew, iOldLen * sizeof(char));
++cnt;
}
}
return cnt;
}
int StdStrBuf::ReplaceChar(char cOld, char cNew, size_t iStartSearch)
{
if (isNull()) return 0;
char *szPos = getMPtr(0);
if (!cOld) return 0;
if (!cNew) cNew = '_';
int cnt=0;
while ((szPos = strchr(szPos, cOld)))
{
*szPos++ = cNew;
++cnt;
}
return cnt;
}
void StdStrBuf::ReplaceEnd(size_t iPos, const char *szNewEnd)
{
size_t iLen = getLength();
assert(iPos <= iLen); if (iPos > iLen) return;
size_t iEndLen = strlen(szNewEnd);
if (iLen - iPos != iEndLen) SetLength(iPos + iEndLen);
memcpy(getMPtr(iPos), szNewEnd, iEndLen * sizeof(char));
}
bool StdStrBuf::ValidateChars(const char *szInitialChars, const char *szMidChars)
{
// only given chars may be in string
for (size_t i=0; i<getLength(); ++i)
if (!strchr(i ? szMidChars : szInitialChars, getData()[i]))
return false;
return true;
}
bool StdStrBuf::GetSection(size_t idx, StdStrBuf *psOutSection, char cSeparator) const
{
assert(psOutSection);
psOutSection->Clear();
const char *szStr = getData(), *szSepPos;
if (!szStr) return false; // invaid argument
while ((szSepPos = strchr(szStr, cSeparator)) && idx) { szStr = szSepPos+1; --idx; }
if (idx) return false; // indexed section not found
// fill output buffer with section, if not empty
if (!szSepPos) szSepPos = getData() + getLength();
if (szSepPos != szStr) psOutSection->Copy(szStr, szSepPos - szStr);
// return true even if section is empty, because the section obviously exists
// (to enable loops like while (buf.GetSection(i++, &sect)) if (sect) ...)
return true;
}
void StdStrBuf::ToLowerCase()
{
if (!isNull())
for (char *szPos = getMPtr(0); *szPos; ++szPos)
*szPos = tolower(*szPos);
}
void StdStrBuf::EnsureUnicode()
{
bool valid = true;
int need_continuation_bytes = 0;
// Check wether valid UTF-8
for (size_t i = 0; i < getSize(); ++i)
{
unsigned char c = *getPtr(i);
// remaining of a code point started before
if (need_continuation_bytes)
{
--need_continuation_bytes;
// (10000000-10111111)
if (0x80 <= c && c <= 0xBF)
continue;
else
{
valid = false;
break;
}
}
// ASCII
if (c < 0x80)
continue;
// Two byte sequence (11000010-11011111)
// Note: 1100000x is an invalid overlong sequence
if (0xC2 <= c && c <= 0xDF)
{
need_continuation_bytes = 1;
continue;
}
// Three byte sequence (11100000-11101111)
if (0xE0 <= c && c <= 0xEF)
{
need_continuation_bytes = 2;
continue;
// FIXME: could check for UTF-16 surrogates from a broken utf-16->utf-8 converter here
}
// Four byte sequence (11110000-11110100)
if (0xF0 <= c && c <= 0xF4)
{
need_continuation_bytes = 3;
continue;
}
valid = false;
break;
}
if (need_continuation_bytes)
valid = false;
// assume that it's windows-1252 and convert to utf-8
if (!valid)
{
size_t j = 0;
StdStrBuf buf;
buf.Grow(getLength());
// totally unfounded statistic: most texts have less than 20 umlauts.
enum { GROWSIZE = 20 };
for (size_t i = 0; i < getSize(); ++i)
{
unsigned char c = *getPtr(i);
// ASCII
if (c < 0x80)
{
if (j >= buf.getLength())
buf.Grow(GROWSIZE);
*buf.getMPtr(j++) = c;
continue;
}
// Is c one of the control characters only in ISO/IEC_8859-1 or part of the common subset with windows-1252?
if (c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D || c >= 0xA0)
{
if (j + 1 >= buf.getLength())
buf.Grow(GROWSIZE);
*buf.getMPtr(j++) = (0xC0 | (c >> 6));
*buf.getMPtr(j++) = (0x80 | (c & 0x3F));
continue;
}
// Extra windows-1252-characters
buf.SetLength(j);
static const char * extra_chars [] =
{
//"€", 0, "", "ƒ", "„", "…", "†", "‡", "ˆ", "‰", "Š", "", "Œ", 0, "Ž", 0,
// 0, "", "", "“", "”", "•", "", "—", "˜", "™", "š", "", "œ", 0, "ž", "Ÿ" };
"\xe2\x82\xac", 0, "\xe2\x80\x9a", "\xc6\x92", "\xe2\x80\x9e", "\xe2\x80\xa6", "\xe2\x80\xa0", "\xe2\x80\xa1", "\xcb\x86", "\xe2\x80\xb0", "\xc5\xa0", "\xe2\x80\xb9", "\xc5\x92", 0, "\xc5\xbd", 0,
0, "\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c", "\xe2\x80\x9d", "\xe2\x80\xa2", "\xe2\x80\x93", "\xe2\x80\x94", "\xcb\x9c", "\xe2\x84\xa2", "\xc5\xa1", "\xe2\x80\xba", "\xc5\x93", 0, "\xc5\xbe", "\xc5\xb8"
};
buf.Append(extra_chars[c - 0x80]);
j += strlen(extra_chars[c - 0x80]);
}
buf.SetLength(j);
Take(std::move(buf));
}
}
bool StdStrBuf::TrimSpaces()
{
// get left trim
int32_t iSpaceLeftCount = 0, iLength = getLength();
if (!iLength) return false;
const char *szStr = getData();
while (iSpaceLeftCount < iLength)
if (isspace((unsigned char)(unsigned char) szStr[iSpaceLeftCount]))
++iSpaceLeftCount;
else
break;
// only spaces? Clear!
if (iSpaceLeftCount == iLength)
{
Clear();
return true;
}
// get right trim
int32_t iSpaceRightCount = 0;
while (isspace((unsigned char)szStr[iLength - 1 - iSpaceRightCount])) ++iSpaceRightCount;
// anything to trim?
if (!iSpaceLeftCount && !iSpaceRightCount) return false;
// only right trim? Can do this by shortening
if (!iSpaceLeftCount)
{
SetLength(iLength - iSpaceRightCount);
return true;
}
// left trim involved - move text and shorten
memmove(getMPtr(0), szStr+iSpaceLeftCount, iLength - iSpaceLeftCount - iSpaceRightCount);
SetLength(iLength - iSpaceLeftCount - iSpaceRightCount);
return true;
}