Aul: Parse scripts into an AST, then generate bytecode from that

This commit contains a fairly substantial rewrite of the C4Script code
generator. Instead of generating bytecode while parsing the script,
we're now parsing the script into a syntax tree, and have any further
processing happen on that instead of the raw source.

At this time, the code generator emits the same bytecode as the old
parser; there are several optimization opportunities that arise from the
new possibility to emit code out of order from its specification by the
author.

Compared to the old compiler, this one is still rather deficient when
dealing with incorrect code; it's also not emitting several warnings
that used to be diagnosed.
directional-lights
Nicolas Hake 2016-05-12 19:43:48 +02:00
parent 78e5f8528d
commit 092a23c2f7
10 changed files with 2438 additions and 1323 deletions

View File

@ -1066,6 +1066,7 @@ src/script/C4AulExec.h
src/script/C4AulFunc.cpp
src/script/C4AulFunc.h
src/script/C4Aul.h
src/script/C4AulAST.h
src/script/C4AulLink.cpp
src/script/C4AulParse.cpp
src/script/C4AulParse.h

View File

@ -49,7 +49,6 @@ public:
C4AulParseError(C4ScriptHost *pScript, const char *pMsg); // constructor
C4AulParseError(class C4AulParse * state, const char *pMsg); // constructor
C4AulParseError(C4AulScriptFunc * Fn, const char *SPos, const char *pMsg);
static C4AulParseError FromSPos(const C4ScriptHost *host, const char *SPos, C4AulScriptFunc *Fn, const char *msg, const char *Idtf = nullptr, bool Warn = false);
};
// execution error

View File

@ -0,0 +1,544 @@
/*
* OpenClonk, http://www.openclonk.org
*
* Copyright (c) 2016, The OpenClonk Team and contributors
*
* Distributed under the terms of the ISC license; see accompanying file
* "COPYING" for details.
*
* "Clonk" is a registered trademark of Matthes Bender, used with permission.
* See accompanying file "TRADEMARK" for details.
*
* To redistribute this file separately, substitute the full license texts
* for the above references.
*/
// C4Aul abstract syntax tree nodes
#ifndef INC_C4AulAST
#define INC_C4AulAST
#include <memory>
#include <map>
#include <vector>
#include "script/C4Value.h"
namespace aul { namespace ast {
class Noop;
class StringLit;
class IntLit;
class BoolLit;
class ArrayLit;
class ProplistLit;
class NilLit;
class ThisLit;
class VarExpr;
class UnOpExpr;
class BinOpExpr;
class SubscriptExpr;
class SliceExpr;
class CallExpr;
class ParExpr;
class FunctionExpr;
class Block;
class Return;
class ForLoop;
class RangeLoop;
class DoLoop;
class WhileLoop;
class Break;
class Continue;
class If;
class VarDecl;
class FunctionDecl;
class IncludePragma;
class AppendtoPragma;
class Script;
}}
namespace aul {
class AstVisitor
{
public:
virtual ~AstVisitor() {}
virtual void visit(const ::aul::ast::Noop *) {}
virtual void visit(const ::aul::ast::StringLit *) {}
virtual void visit(const ::aul::ast::IntLit *) {}
virtual void visit(const ::aul::ast::BoolLit *) {}
virtual void visit(const ::aul::ast::ArrayLit *) {}
virtual void visit(const ::aul::ast::ProplistLit *) {}
virtual void visit(const ::aul::ast::NilLit *) {}
virtual void visit(const ::aul::ast::ThisLit *) {}
virtual void visit(const ::aul::ast::VarExpr *n) {}
virtual void visit(const ::aul::ast::UnOpExpr *) {}
virtual void visit(const ::aul::ast::BinOpExpr *) {}
virtual void visit(const ::aul::ast::SubscriptExpr *) {}
virtual void visit(const ::aul::ast::SliceExpr *) {}
virtual void visit(const ::aul::ast::CallExpr *) {}
virtual void visit(const ::aul::ast::ParExpr *) {}
virtual void visit(const ::aul::ast::Block *) {}
virtual void visit(const ::aul::ast::Return *) {}
virtual void visit(const ::aul::ast::ForLoop *) {}
virtual void visit(const ::aul::ast::RangeLoop *) {}
virtual void visit(const ::aul::ast::DoLoop *) {}
virtual void visit(const ::aul::ast::WhileLoop *) {}
virtual void visit(const ::aul::ast::Break *) {}
virtual void visit(const ::aul::ast::Continue *) {}
virtual void visit(const ::aul::ast::If *) {}
virtual void visit(const ::aul::ast::VarDecl *) {}
virtual void visit(const ::aul::ast::FunctionDecl *) {}
virtual void visit(const ::aul::ast::FunctionExpr *) {}
virtual void visit(const ::aul::ast::IncludePragma *) {}
virtual void visit(const ::aul::ast::AppendtoPragma *) {}
virtual void visit(const ::aul::ast::Script *) {}
// This template will catch any type missing from the list above
// to ensure that the nodes don't accidentally get visited via a
// base class instead
template<class T>
void visit(const T *) = delete;
};
}
namespace aul { namespace ast {
#define AST_NODE(cls) \
public: \
virtual void accept(::aul::AstVisitor *v) const override { v->visit(this); } \
template<class... T> static std::unique_ptr<cls> New(const char *loc, T &&...t) { auto n = std::make_unique<cls>(std::forward<T>(t)...); n->loc = loc; return n; } \
private:
class Node
{
public:
virtual ~Node() {}
struct Location
{
std::string file;
size_t line = 0;
size_t column = 0;
};
const char *loc;
virtual void accept(::aul::AstVisitor *) const = 0;
};
class Stmt : public Node
{
public:
// Does executing this statement generate a return value?
virtual bool has_value() const { return false; }
};
typedef std::unique_ptr<Stmt> StmtPtr;
class Noop : public Stmt
{
AST_NODE(Noop);
};
class Expr : public Stmt
{
public:
virtual bool has_value() const override { return true; }
};
typedef std::unique_ptr<Expr> ExprPtr;
class Literal : public Expr
{};
class StringLit : public Literal
{
AST_NODE(StringLit);
public:
explicit StringLit(const std::string &value) : value(value) {}
std::string value;
};
class IntLit : public Literal
{
AST_NODE(IntLit);
public:
explicit IntLit(int32_t value) : value(value) {}
uint32_t value;
};
class BoolLit : public Literal
{
AST_NODE(BoolLit);
public:
explicit BoolLit(bool value) : value(value) {}
bool value;
};
class ArrayLit : public Literal
{
AST_NODE(ArrayLit);
public:
std::vector<ExprPtr> values;
};
class ProplistLit : public Literal
{
AST_NODE(ProplistLit);
public:
std::vector<std::pair<std::string, ExprPtr>> values;
};
class NilLit : public Literal
{
AST_NODE(NilLit);
};
class ThisLit : public Literal
{
AST_NODE(ThisLit);
};
class VarExpr : public Expr
{
AST_NODE(VarExpr);
public:
explicit VarExpr(const std::string &identifier) : identifier(identifier) {}
std::string identifier;
};
class UnOpExpr : public Expr
{
AST_NODE(UnOpExpr);
public:
UnOpExpr(int op, ExprPtr &&operand) : op(op), operand(std::move(operand)) {}
ExprPtr operand;
int op; // TODO: Make this a proper operator type
};
class BinOpExpr : public Expr
{
AST_NODE(BinOpExpr);
public:
BinOpExpr(int op, ExprPtr &&lhs, ExprPtr &&rhs) : op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {}
ExprPtr lhs, rhs;
int op; // TODO: Make this a proper operator type
// Marker for '='
enum { AssignmentOp = -1 };
};
class SubscriptExpr : public Expr
{
AST_NODE(SubscriptExpr);
public:
SubscriptExpr(ExprPtr &&object, ExprPtr &&index) : object(std::move(object)), index(std::move(index)) {}
ExprPtr object, index;
};
class SliceExpr : public Expr
{
AST_NODE(SliceExpr);
public:
SliceExpr(ExprPtr &&object, ExprPtr &&start, ExprPtr &&end) : object(std::move(object)), start(std::move(start)), end(std::move(end)) {}
ExprPtr object, start, end;
};
class CallExpr : public Expr
{
AST_NODE(CallExpr);
public:
bool safe_call = false; // Will this call fail gracefully when the function doesn't exist?
bool append_unnamed_pars = false; // Will this call append all unnamed parameters of the current function?
ExprPtr context;
std::vector<ExprPtr> args;
std::string callee;
};
class ParExpr : public Expr
{
AST_NODE(ParExpr);
public:
explicit ParExpr(ExprPtr &&arg) : arg(std::move(arg)) {}
ExprPtr arg;
};
class Block : public Stmt
{
AST_NODE(Block);
public:
std::vector<StmtPtr> children;
};
class ControlFlow : public Stmt
{};
class Return : public ControlFlow
{
AST_NODE(Return);
public:
explicit Return(ExprPtr &&value) : value(std::move(value)) {}
ExprPtr value;
};
class Loop : public ControlFlow
{
public:
ExprPtr cond;
StmtPtr body;
};
typedef std::unique_ptr<Loop> LoopPtr;
class ForLoop : public Loop
{
AST_NODE(ForLoop);
public:
StmtPtr init;
ExprPtr incr;
};
class RangeLoop : public Loop
{
AST_NODE(RangeLoop);
public:
std::string var;
bool scoped_var = false;
};
class DoLoop : public Loop
{
AST_NODE(DoLoop);
};
class WhileLoop : public Loop
{
AST_NODE(WhileLoop);
};
class LoopControl : public ControlFlow
{};
class Break : public LoopControl
{
AST_NODE(Break);
};
class Continue : public LoopControl
{
AST_NODE(Continue);
};
class If : public ControlFlow
{
AST_NODE(If);
public:
ExprPtr cond;
StmtPtr iftrue, iffalse;
};
class Decl : public Stmt
{};
typedef std::unique_ptr<Decl> DeclPtr;
class VarDecl : public Decl
{
AST_NODE(VarDecl);
public:
enum class Scope
{
Func,
Object,
Global
};
Scope scope;
bool constant;
struct Var
{
std::string name;
ExprPtr init;
};
std::vector<Var> decls;
};
class Function
{
public:
struct Parameter
{
std::string name;
C4V_Type type;
explicit Parameter(const std::string &name, C4V_Type type = C4V_Any) : name(name), type(type) {}
};
std::vector<Parameter> params;
bool has_unnamed_params = false;
std::unique_ptr<Block> body;
virtual ~Function() = default;
virtual void accept(::aul::AstVisitor *v) const = 0;
};
class FunctionDecl : public Decl, public Function
{
AST_NODE(FunctionDecl);
public:
explicit FunctionDecl(const std::string &name) : name(name) {}
std::string name;
bool is_global = false;
};
class FunctionExpr : public Expr, public Function
{
// This node is used for constant proplists
AST_NODE(FunctionExpr);
public:
};
class Pragma : public Decl
{};
class IncludePragma : public Pragma
{
AST_NODE(IncludePragma);
public:
explicit IncludePragma(const std::string &what) : what(what) {}
std::string what;
};
class AppendtoPragma : public Pragma
{
AST_NODE(AppendtoPragma);
public:
AppendtoPragma() = default;
explicit AppendtoPragma(const std::string &what) : what(what) {}
std::string what;
};
class Script : public Node
{
AST_NODE(Script);
public:
virtual ~Script() {}
std::vector<DeclPtr> declarations;
};
#undef AST_NODE
}}
namespace aul {
// A recursive visitor that visits the children of all nodes. Override the visit() functions you're interested in in child classes.
class DefaultRecursiveVisitor : public AstVisitor
{
public:
virtual ~DefaultRecursiveVisitor() {}
using AstVisitor::visit;
virtual void visit(const ::aul::ast::ArrayLit *n) override
{
for (const auto &c : n->values)
c->accept(this);
}
virtual void visit(const ::aul::ast::ProplistLit *n) override
{
for (const auto &c : n->values)
c.second->accept(this);
}
virtual void visit(const ::aul::ast::UnOpExpr *n) override
{
n->operand->accept(this);
}
virtual void visit(const ::aul::ast::BinOpExpr *n) override
{
n->lhs->accept(this);
n->rhs->accept(this);
}
virtual void visit(const ::aul::ast::SubscriptExpr *n) override
{
n->object->accept(this);
n->index->accept(this);
}
virtual void visit(const ::aul::ast::SliceExpr *n) override
{
n->object->accept(this);
n->start->accept(this);
n->end->accept(this);
}
virtual void visit(const ::aul::ast::CallExpr *n) override
{
if (n->context)
n->context->accept(this);
for (const auto &a : n->args)
a->accept(this);
}
virtual void visit(const ::aul::ast::ParExpr *n) override
{
n->arg->accept(this);
}
virtual void visit(const ::aul::ast::Block *n) override
{
for (const auto &s : n->children)
s->accept(this);
}
virtual void visit(const ::aul::ast::Return *n) override
{
n->value->accept(this);
}
virtual void visit(const ::aul::ast::ForLoop *n) override
{
if (n->init)
n->init->accept(this);
if (n->cond)
n->cond->accept(this);
if (n->incr)
n->incr->accept(this);
n->body->accept(this);
}
virtual void visit(const ::aul::ast::RangeLoop *n) override
{
n->cond->accept(this);
n->body->accept(this);
}
virtual void visit(const ::aul::ast::DoLoop *n) override
{
n->body->accept(this);
n->cond->accept(this);
}
virtual void visit(const ::aul::ast::WhileLoop *n) override
{
n->cond->accept(this);
n->body->accept(this);
}
virtual void visit(const ::aul::ast::If *n) override
{
n->cond->accept(this);
n->iftrue->accept(this);
if (n->iffalse)
n->iffalse->accept(this);
}
virtual void visit(const ::aul::ast::VarDecl *n) override
{
for (const auto &d : n->decls)
if (d.init)
d.init->accept(this);
}
virtual void visit(const ::aul::ast::FunctionDecl *n) override
{
n->body->accept(this);
}
virtual void visit(const ::aul::ast::FunctionExpr *n) override
{
n->body->accept(this);
}
virtual void visit(const ::aul::ast::Script *n) override
{
for (const auto &d : n->declarations)
d->accept(this);
}
};
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -17,52 +17,21 @@
#ifndef INC_C4AulCompiler
#define INC_C4AulCompiler
#include "script/C4Value.h"
enum C4AulBCCType : int;
#include "script/C4AulAST.h"
class C4AulCompiler
{
public:
C4AulScriptFunc *Fn;
bool at_jump_target = false;
int stack_height = 0;
static void Compile(C4AulScriptFunc *out, const ::aul::ast::Function *f);
int AddBCC(const char * SPos, C4AulBCCType eType, intptr_t X = 0);
void ErrorOut(const char * SPos, class C4AulError & e);
void RemoveLastBCC();
C4V_Type GetLastRetType(C4AulScriptEngine * Engine, C4V_Type to); // for warning purposes
static void Preparse(C4ScriptHost *out, C4ScriptHost *source, const ::aul::ast::Script *s);
static void Compile(C4ScriptHost *out, C4ScriptHost *source, const ::aul::ast::Script *s);
int AddVarAccess(const char * TokenSPos, C4AulBCCType eType, intptr_t varnum);
C4AulBCC MakeSetter(const char * TokenSPos, bool fLeaveValue = false); // Prepares to generate a setter for the last value that was generated
int JumpHere(); // Get position for a later jump to next instruction added
void SetJumpHere(int iJumpOp); // Use the next inserted instruction as jump target for the given jump operation
void SetJump(int iJumpOp, int iWhere);
void AddJump(const char * SPos, C4AulBCCType eType, int iWhere);
// Keep track of loops and break/continue usages
struct Loop
{
struct Control
{
bool Break;
int Pos;
Control *Next;
};
Control *Controls;
int StackSize;
Loop *Next;
};
Loop *active_loops = NULL;
void PushLoop();
void PopLoop(int ContinueJump);
void AddLoopControl(const char * SPos, bool fBreak);
~C4AulCompiler()
{
while (active_loops) PopLoop(0);
}
private:
class ConstexprEvaluator;
class ConstantResolver;
class PreparseAstVisitor;
class CodegenAstVisitor;
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -17,7 +17,10 @@
#ifndef INC_C4AulParse
#define INC_C4AulParse
#include <stack>
#include "script/C4Aul.h"
#include "script/C4AulAST.h"
#include "script/C4AulCompiler.h"
#include "script/C4AulScriptFunc.h"
@ -42,12 +45,11 @@ extern const C4ScriptOpDef C4ScriptOpMap[];
class C4AulParse
{
public:
enum Type { PARSER, PREPARSER };
C4AulParse(C4ScriptHost * a, enum Type Type);
C4AulParse(class C4ScriptHost *host);
C4AulParse(C4AulScriptFunc * Fn, C4AulScriptContext* context, C4AulScriptEngine *Engine);
~C4AulParse();
void Parse_DirectExec();
void Parse_Script(C4ScriptHost *);
std::unique_ptr<::aul::ast::FunctionDecl> Parse_DirectExec(const char *code);
std::unique_ptr<::aul::ast::Script> Parse_Script(C4ScriptHost *);
private:
C4AulScriptFunc *Fn; C4ScriptHost * Host; C4ScriptHost * pOrgScript;
@ -58,28 +60,21 @@ private:
C4AulTokenType TokenType; // current token type
int32_t cInt; // current int constant
C4String * cStr; // current string constant
enum Type Type; // emitting bytecode?
C4AulScriptContext* ContextToExecIn;
void Parse_Function();
void Parse_FuncBody();
void Parse_Statement();
void Parse_Block();
int Parse_Params(int iMaxCnt, const char * sWarn, C4AulFunc * pFunc = 0);
void Parse_Array();
void Parse_PropList();
void Parse_DoWhile();
void Parse_While();
void Parse_If();
void Parse_For();
void Parse_ForEach();
void Parse_Expression(int iParentPrio = -1);
void Parse_Var();
void Parse_Local();
void Parse_Static();
void Parse_Const();
C4Value Parse_ConstExpression(C4PropListStatic * parent, C4String * Name);
C4Value Parse_ConstPropList(C4PropListStatic * parent, C4String * Name);
void Store_Const(C4PropListStatic * parent, C4String * Name, const C4Value & v);
std::unique_ptr<::aul::ast::FunctionDecl> Parse_ToplevelFunctionDecl();
void Parse_Function(::aul::ast::Function *func);
std::unique_ptr<::aul::ast::Stmt> Parse_Statement();
std::unique_ptr<::aul::ast::Block> Parse_Block();
std::unique_ptr<::aul::ast::ArrayLit> Parse_Array();
std::unique_ptr<::aul::ast::ProplistLit> Parse_PropList();
std::unique_ptr<::aul::ast::DoLoop> Parse_DoWhile();
std::unique_ptr<::aul::ast::WhileLoop> Parse_While();
std::unique_ptr<::aul::ast::If> Parse_If();
std::unique_ptr<::aul::ast::ForLoop> Parse_For();
std::unique_ptr<::aul::ast::RangeLoop> Parse_ForEach();
void Parse_CallParams(::aul::ast::CallExpr *call);
std::unique_ptr<::aul::ast::Expr> Parse_Expression(int iParentPrio = -1);
std::unique_ptr<::aul::ast::VarDecl> Parse_Var();
bool AdvanceSpaces(); // skip whitespaces; return whether script ended
int GetOperator(const char* pScript);
@ -95,26 +90,11 @@ private:
void Error(const char *pMsg, ...) GNUC_FORMAT_ATTRIBUTE_O;
void AppendPosition(StdStrBuf & Buf);
void DebugChunk();
C4AulCompiler codegen;
int AddVarAccess(C4AulBCCType eType, intptr_t varnum)
{ if (Type == PARSER) return codegen.AddVarAccess(TokenSPos, eType, varnum); else return -1; }
int AddBCC(C4AulBCCType eType, intptr_t X = 0)
{ if (Type == PARSER) return codegen.AddBCC(TokenSPos, eType, X); else return -1; }
C4V_Type GetLastRetType(C4V_Type to)
{ return codegen.GetLastRetType(Engine, to); }
C4AulBCC MakeSetter(bool fLeaveValue = false)
{ return Type == PARSER ? codegen.MakeSetter(TokenSPos, fLeaveValue) : C4AulBCC(AB_ERR, 0); }
void SetJumpHere(int iJumpOp)
{ if (Type == PARSER) codegen.SetJumpHere(iJumpOp); }
void AddJump(C4AulBCCType eType, int iWhere)
{ if (Type == PARSER) codegen.AddJump(TokenSPos, eType, iWhere); }
void PushLoop()
{ if (Type == PARSER) codegen.PushLoop(); }
void PopLoop(int Jump)
{ if (Type == PARSER) codegen.PopLoop(Jump); }
friend class C4AulParseError;
std::stack<const char *> parse_pos_stack;
void PushParsePos();
void PopParsePos();
void DiscardParsePos();
};
#endif

View File

@ -20,6 +20,10 @@
#include "C4Include.h"
#include "script/C4ScriptHost.h"
#include "script/C4AulAST.h"
#include "script/C4AulCompiler.h"
#include "script/C4AulParse.h"
#include "script/C4AulScriptFunc.h"
#include "object/C4Def.h"
#include "script/C4Effect.h"
@ -49,6 +53,7 @@ C4ScriptHost::~C4ScriptHost()
void C4ScriptHost::Clear()
{
C4ComponentHost::Clear();
ast.reset();
Script.Clear();
LocalValues.Clear();
SourceScripts.clear();

View File

@ -21,7 +21,9 @@
#define INC_C4ScriptHost
#include "c4group/C4ComponentHost.h"
#include "script/C4Aul.h"
#include "script/C4AulAST.h"
// aul script state
enum C4AulScriptState
@ -85,6 +87,10 @@ protected:
friend class C4AulProfiler;
friend class C4AulScriptEngine;
friend class C4AulDebug;
friend class C4AulCompiler;
private:
std::unique_ptr<::aul::ast::Script> ast;
};
// script host for System.ocg scripts and scenario section Objects.c

View File

@ -148,6 +148,17 @@ TEST_F(AulTest, Locals)
EXPECT_EQ(C4VInt(42), RunCode("local p1 = { i = 42 }, p2 = new p1 {}; func Main() { return p2.i; }", false));
}
TEST_F(AulTest, ProplistFunctions)
{
EXPECT_EQ(C4VInt(1), RunCode(R"(
local a = new Global {
a = func() { return b; },
b = 1
};
func Main() { return a->Call(a.a); }
)", false));
}
TEST_F(AulTest, Eval)
{
EXPECT_EQ(C4VInt(42), RunExpr("eval(\"42\")"));