openclonk/src/lib/C4RTF.cpp

377 lines
12 KiB
C++

/*
* OpenClonk, http://www.openclonk.org
*
* Copyright (c) 2005, 2007 Sven Eberhardt
* Copyright (c) 2008 Matthes Bender
* Copyright (c) 2008 Günther Brammer
* Copyright (c) 2005-2009, RedWolf Design GmbH, http://www.clonk.de
*
* Portions might be copyrighted by other authors who have contributed
* to OpenClonk.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
* See isc_license.txt for full license and disclaimer.
*
* "Clonk" is a registered trademark of Matthes Bender.
* See clonk_trademark_license.txt for full license.
*/
// RTF file parsing functionality
#include "Standard.h"
#include "C4RTF.H"
#include <cctype>
extern C4RTFFile::KeywordTableEntry RTFKeywordTable[];
C4RTFFile::C4RTFFile() : pState(NULL)
{
}
C4RTFFile::~C4RTFFile()
{
ClearState();
}
void C4RTFFile::ClearState()
{
PropertyState *psNext = pState, *ps;
while (ps=psNext)
{
psNext = ps->pNext;
delete ps;
}
pState = NULL;
fSkipDestIfUnknownKeyword = false;
}
void C4RTFFile::AssertNoEOF(size_t iPos)
{
if (iPos >= sRTF.getSize()) throw new ParserError("Unexpected end of file");
}
void C4RTFFile::ChangeDest(StdStrBuf &sResult, int iDest)
{
// nothing to do if text is already skipped
if (pState->dest == dsSkip) return;
// otherwise, set new dest (always skip)
pState->dest = (DestState) iDest;
}
void C4RTFFile::SpecialKeyword(StdStrBuf &sResult, int iKeyw, int iParam)
{
switch (iKeyw)
{
case specBin:
if (iParam>0)
{
pState->eState = psBinary;
pState->iHexBinCnt = iParam;
}
break;
case specHex:
pState->eState = psHex;
pState->iHexBinCnt = 2;
break;
case specSkipDest:
fSkipDestIfUnknownKeyword = true;
break;
case specAnsicpg:
pState->charset = iParam;
break;
};
}
void C4RTFFile::TranslateKeyword(StdStrBuf &sResult, const char *szKeyword, int iParam, bool fHasIntParam)
{
// get keyword from table
KeywordTableEntry *pKw = RTFKeywordTable;
while (pKw->szKeyword) if (!strcmp(szKeyword, pKw->szKeyword)) break; else ++pKw;
// no found?
if (!pKw->szKeyword)
{
// unknown destination: Skip
if (fSkipDestIfUnknownKeyword)
{
pState->dest = dsSkip;
fSkipDestIfUnknownKeyword = false;
}
return;
}
fSkipDestIfUnknownKeyword = false;
// keyword known - handle it
switch (pKw->eType)
{
case KeywordTableEntry::kwdProp:
// property: Use default param if none given or forced
ApplyPropChange(pKw->idx, (pKw->fForceDefaultParam || !fHasIntParam) ? pKw->iDefaultParam : iParam);
break;
case KeywordTableEntry::kwdChars:
// direct chars
ParseChars(sResult, pKw->szChars);
break;
case KeywordTableEntry::kwdDest:
ChangeDest(sResult, pKw->idx);
break;
case KeywordTableEntry::kwdSpec:
SpecialKeyword(sResult, pKw->idx, iParam);
break;
}
}
void C4RTFFile::ParseKeyword(StdStrBuf &sResult, size_t &iPos)
{
bool fHasIntParam = false;
int iSign = +1;
int iParamInt = 0; // parameter as integer
char szKeyword[30+1]; *szKeyword = 0;
char szParameter[20+1]; *szParameter = 0;
AssertNoEOF(iPos);
char c = ((const char *) sRTF.getData())[iPos++];
if (!isalpha((unsigned char)c))
{
// parse direct control symbol
szKeyword[0] = c;
szKeyword[1] = 0;
}
else
{
// get keyword string
char *szWrite = szKeyword;
do
{
*szWrite = c;
// do not overflow buffer - longer keywords will be read, not recognized and silently discarded
if (szWrite - szKeyword < 30) ++szWrite;
// do not go past rtf file
if (iPos >= sRTF.getSize()) break;
// next char
c = ((const char *) sRTF.getData())[iPos++];
}
while (isalpha((unsigned char)c));
*szWrite = 0;
// parameter is a negative number?
if (c == '-')
{
iSign = -1;
if (iPos < sRTF.getSize()) c = ((const char *) sRTF.getData())[iPos++];
}
if (isdigit((unsigned char)c))
{
// get parameter as number
char *szWrite = szParameter;
do
{
*szWrite = c;
// do not overflow buffer - longer parameters will be read, not recognized and silently discarded
if (szWrite - szParameter < 20) ++szWrite;
// do not go past rtf file
if (iPos >= sRTF.getSize()) break;
// next char
c = ((const char *) sRTF.getData())[iPos++];
}
while (isdigit((unsigned char)c));
*szWrite = 0;
iParamInt = atoi(szParameter) * iSign;
fHasIntParam = true;
}
// if next char is not a spacing for the command, it does not belong to the keyword and must be re-parsed
if (c != ' ') --iPos;
}
// execute keyword action
TranslateKeyword(sResult, szKeyword, iParamInt, fHasIntParam);
}
void C4RTFFile::ParseChar(StdStrBuf &sResult, char c)
{
// parse as 1-char-string
char buf[2];
buf[0] = c; buf[1] = '\0';
ParseChars(sResult, buf);
}
void C4RTFFile::ParseChars(StdStrBuf &sResult, const char *szChars)
{
// route the characters to the appropriate destination stream.
switch (pState->dest)
{
case dsNormal:
// process characters: Append to result buffer
sResult.Append(szChars);
break;
case dsSkip:
// skip character
break;
}
}
void C4RTFFile::ParseHexChar(StdStrBuf &sResult, char c)
{
pState->bHex = pState->bHex << 4;
if (isdigit((unsigned char)c))
pState->bHex += c - '0';
else if (Inside<char>(c, 'a', 'f'))
pState->bHex += c - 'a' + 10;
else if (Inside<char>(c, 'A', 'F'))
pState->bHex += c - 'A' + 10;
else
throw new ParserError("Invalid hex character");
if (!--pState->iHexBinCnt)
{
pState->eState = psNormal;
ParseChar(sResult, pState->bHex);
}
}
void C4RTFFile::PushState()
{
// store current state to new
PropertyState *pNew = new PropertyState(*pState);
pNew->pNext = pState;
// update current state to new; beginning in default parser mode
pState = pNew;
pState->eState = psNormal;
}
void C4RTFFile::PopState()
{
if (!pState->pNext) throw new ParserError("Too many brackets closed");
// if the destination ends, finish it
if (pState->dest != pState->pNext->dest) EndGroupAction();
// return to last state
PropertyState *pKill = pState;
pState = pState->pNext;
delete pKill;
pState->eState = psNormal;
}
StdStrBuf C4RTFFile::GetPlainText()
{
// clear any previous crap
ClearState();
// start with a fresh state
pState = new PropertyState();
pState->eState = psNormal;
StdStrBuf sResult;
// nothing to do for empty RTFs
if (sRTF.getSize()<=0) return sResult;
// parse through all chars
try
{
char c; size_t iPos = 0;
while (iPos < sRTF.getSize())
{
c = ((const char *) sRTF.getData())[iPos++];
// binary parsing?
if (pState->eState == psBinary)
{
if (!--pState->iHexBinCnt) pState->eState = psNormal;
ParseChar(sResult, c);
continue;
}
// normal parsing: Handle state blocks
switch (c)
{
case '{': PushState(); break;
case '}': PopState(); break;
case '\\':
ParseKeyword(sResult, iPos);
break;
case 0x0d: case 0x0a: // ignored chars
break;
default:
// regular char parsing
if (pState->eState == psNormal)
// normal mode
ParseChar(sResult, c);
else if (pState->eState == psHex)
ParseHexChar(sResult, c);
else
throw new ParserError("Invalid State");
break;
}
// next char
}
// all states must be closed in the end
if (pState->pNext) throw new ParserError("Block not closed");
}
catch (ParserError *pe)
{
// invalid RTF file: Display error message instead
sResult = "Invalid RTF file: ";
sResult.Append(pe->ErrorText);
delete pe;
}
// cleanup
ClearState();
// FIXME: This is wrong, RTF contains charset information which should be used
sResult.EnsureUnicode();
// return result
return sResult;
}
#define kwdChars C4RTFFile::KeywordTableEntry::kwdChars
#define kwdSpec C4RTFFile::KeywordTableEntry::kwdSpec
#define kwdDest C4RTFFile::KeywordTableEntry::kwdDest
// Keyword descriptions
C4RTFFile::KeywordTableEntry RTFKeywordTable [] = {
// keyword iDefaultPar fForceDef eType idx
{ "par", 0, false, kwdChars, "\n", 0 },
{ "\0x0a", 0, false, kwdChars, "\n", 0 },
{ "\0x0d", 0, false, kwdChars, "\n", 0 },
{ "tab", 0, false, kwdChars, "\t", 0 },
{ "ldblquote",0, false, kwdChars, "\x93", 0 },
{ "rdblquote",0, false, kwdChars, "\x94", 0 },
{ "lquote", 0, false, kwdChars, "\x91", 0 },
{ "rquote", 0, false, kwdChars, "\x92", 0 },
{ "bullet", 0, false, kwdChars, "\x95", 0 },
{ "endash", 0, false, kwdChars, "\x96", 0 },
{ "emdash", 0, false, kwdChars, "\x97", 0 },
{ "bin", 0, false, kwdSpec, NULL, C4RTFFile::specBin },
{ "*", 0, false, kwdSpec, NULL, C4RTFFile::specSkipDest },
{ "'", 0, false, kwdSpec, NULL, C4RTFFile::specHex },
{ "ansicpg", 0, false, kwdSpec, NULL, C4RTFFile::specAnsicpg },
{ "author", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "buptim", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "colortbl", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "comment", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "creatim", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "doccomm", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "fonttbl", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "footer", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "footerf", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "footerl", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "footerr", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "footnote", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "ftncn", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "ftnsep", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "ftnsepc", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "header", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "headerf", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "headerl", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "headerr", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "info", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "keywords", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "operator", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "pict", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "printim", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "private1", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "revtim", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "rxe", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "stylesheet",0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "subject", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "tc", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "title", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "txe", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "xe", 0, false, kwdDest, NULL, C4RTFFile::dsSkip },
{ "{", 0, false, kwdChars, "{", 0 },
{ "}", 0, false, kwdChars, "}", 0 },
{ "\\", 0, false, kwdChars, "\\", 0 },
{ NULL, 0, false, kwdChars, NULL, 0 }
};