Skip to content

C++实现编译器(10): 内置函数

参照《Writing An Interpreter/Compiler In Go》,改用C++实现。

项目源码: https://github.com/LeslieZhu/monkey-cpp

引言

本篇对应的源码位于目录: src/09/

在实现树遍历解释器的时候,已经有内置函数了,但这里要将它编译为字节码并且能够正常调用,还要处理作用域的问题。

调整部分代码

可以预知虚拟机版本比树遍历解释器更快,将虚拟机和解释器共用的一部分代码独立出来放到objects名字空间:

  • objects::newError()
  • objects::isError()
  • objects::isTruthy()
  • objects::nativeBoolToBooleanObject()
  • objects::evalArrayIndexExpression()
  • objects::evalHashIndexExpression()
  • objects::Function
  • objects::CompiledFunction

重构部分代码

在实现树遍历解释器的时候,已经有内置函数了,类似:

std::map<std::string, std::shared_ptr<objects::Builtin>> builtins
{
    {
        "len", std::make_shared<objects::Builtin>(&BuiltinFunc_Len)
    },
    {
        "first", std::make_shared<objects::Builtin>(&BuiltinFunc_First)
    },
    {
        "last", std::make_shared<objects::Builtin>(&BuiltinFunc_Last)
    },
    {
        "rest", std::make_shared<objects::Builtin>(&BuiltinFunc_Rest)
    },
    {
        "push", std::make_shared<objects::Builtin>(&BuiltinFunc_Push)
    },
    {
        "puts", std::make_shared<objects::Builtin>(&BuiltinFunc_Puts)
    }
};

在虚拟机这边字节码的操作数是一个整数作为索引来查找函数,为了保持统一,因此需要对这部分做一些调整,改成可以切片方式平稳迭代访问:

namespace objects
{
    struct BuiltinWithName
    {
        std::string Name;
        std::shared_ptr<objects::Builtin> Builtin;

        BuiltinWithName(const std::string name, BuiltinFunction fn)
            : Name(name)
        {
            Builtin = std::make_shared<objects::Builtin>(fn);
        }
    };

    std::vector<std::shared_ptr<objects::BuiltinWithName>> Builtins{
        std::make_shared<objects::BuiltinWithName>("len", &BuiltinFunc_Len),
        std::make_shared<objects::BuiltinWithName>("first", &BuiltinFunc_First),
        std::make_shared<objects::BuiltinWithName>("last", &BuiltinFunc_Last),
        std::make_shared<objects::BuiltinWithName>("rest", &BuiltinFunc_Rest),
        std::make_shared<objects::BuiltinWithName>("push", &BuiltinFunc_Push),
        std::make_shared<objects::BuiltinWithName>("puts", &BuiltinFunc_Puts),
    };

    std::shared_ptr<objects::Builtin> GetBuiltinByName(const std::string& name)
    {
        for(auto &def: Builtins)
        {
            if(def->Name == name)
            {
                return def->Builtin;
            }
        }

        return nullptr;
    }
}

之前解释器那边的内置函数定义简化为:

namespace evaluator
{
    std::map<std::string, std::shared_ptr<objects::Builtin>> builtins{
        {"len",   objects::GetBuiltinByName("len")},
        {"first", objects::GetBuiltinByName("first")},
        {"last",  objects::GetBuiltinByName("last")},
        {"rest",  objects::GetBuiltinByName("rest")},
        {"push",  objects::GetBuiltinByName("push")},
        {"puts",  objects::GetBuiltinByName("puts")}
    };
}

这样解释器和编译器都共用同一个内置函数定义了,而且编译器可以根据整数作为切片索引来方便虚拟机调用内置函数了。唯一的改变是内置函数返回NULL_OBJ的改为返回nullptr,方便虚拟机不会多处理一个NULL对象。

编译内置函数作用域

内置函数和其它定义函数的区别,就是内置函数不需要定义已经存在了,它所在那个作用域叫内置函数作用域:

namespace SymbolScopeType
{
    const SymbolScope GlobalScope = "GLOBAL";
    const SymbolScope LocalScope = "LOCAL";
    const SymbolScope BuiltinScope = "BUILTIN";
}

struct SymbolTable{

    //....
    std::shared_ptr<Symbol> DefineBuiltin(const int& index, std::string name)
    {
        auto symbol = std::make_shared<Symbol>(name, SymbolScopeType::BuiltinScope ,index);
        store[name] = symbol;
        return symbol;
    }
};

只需要创建编译器的时候,提前将内置函数加载到符号表并标记好作用域:

std::shared_ptr<Compiler> New()
{
    auto symbolTable = NewSymbolTable();

    int i = -1;
    for(auto &fn: objects::Builtins)
    {
        i += 1;
        symbolTable->DefineBuiltin(i, fn->Name);
    }

    auto compiler = std::make_shared<Compiler>();
    compiler->symbolTable = symbolTable;

    return compiler;
}

void loadSymbol(std::shared_ptr<compiler::Symbol> symbol)
{
    if (symbol->Scope == compiler::SymbolScopeType::GlobalScope)
    {
        emit(bytecode::OpcodeType::OpGetGlobal, {symbol->Index});
    }
    else if(symbol->Scope == compiler::SymbolScopeType::LocalScope)
    {
        emit(bytecode::OpcodeType::OpGetLocal, {symbol->Index});
    }
    else if(symbol->Scope == compiler::SymbolScopeType::BuiltinScope)
    {
        emit(bytecode::OpcodeType::OpGetBuiltin, {symbol->Index});
    }
}

执行内置函数

遇到OpGetBuiltin指令将内置函数对象入栈:

case bytecode::OpcodeType::OpGetBuiltin:
    {
        uint8_t builtinIndex;
        bytecode::ReadUint8(instructions, ip+1, builtinIndex);
        frame->ip += 1;

        // 将字典换成向量就是想通过索引整数获得稳定迭代
        auto definition = objects::Builtins[builtinIndex];

        auto result = Push(definition->Builtin);
        if(objects::isError(result))
        {
           return result;
        }
    }
    break;

遇到OpCall指令时判断调用函数的类型:

case bytecode::OpcodeType::OpCall:
    {
        uint8_t numArgs;
        bytecode::ReadUint8(instructions, ip+1, numArgs);
        frame->ip += 1;

        auto result = executeCall((int)numArgs);
        if(objects::isError(result))
        {
           return result;
        }

        frame = currentFrame();
        instructions = frame->Instruction();
        ins_size = instructions.size();
    }
    break;

分而治之:

std::shared_ptr<objects::Object>  executeCall(int numArgs)
{
    auto fnObj = stack[sp - 1 - numArgs];

    if(fnObj->Type() == objects::ObjectType::COMPILED_FUNCTION)
    {
        auto compiledFnObj = std::dynamic_pointer_cast<objects::CompiledFunction>(fnObj);
        return callFunction(compiledFnObj, numArgs);
    }
    else if(fnObj->Type() == objects::ObjectType::BUILTIN)
    {
        auto builtinFnObj = std::dynamic_pointer_cast<objects::Builtin>(fnObj);
        return callBuiltin(builtinFnObj, numArgs);
    }
    else
    {
        return objects::newError("calling non-function and non-built-in");
    }
}

std::shared_ptr<objects::Object> callFunction(std::shared_ptr<objects::ObjectCompiledFunction> compiledFnObj,int numArgs)
{
    if(compiledFnObj->NumParameters != numArgs)
    {
        std::string str1 = std::to_string(compiledFnObj->NumParameters);
        std::string str2 = std::to_string(numArgs);
        return objects::newError("wrong number of arguments: want=" + str1 + ", got=" + str2);
    }

    auto funcFrame = NewFrame(compiledFnObj, sp - numArgs);

    pushFrame(funcFrame);

    sp = funcFrame->basePointer + compiledFnObj->NumLocals;

    return nullptr;
}

std::shared_ptr<objects::Object> callBuiltin(std::shared_ptr<objects::ObjectBuiltin> builtinFnObj,int numArgs)
{
    std::vector<std::shared_ptr<objects::Object>> args;

    args.assign(stack.begin() + sp - numArgs, stack.begin() + sp);

    auto result = builtinFnObj->Fn(args);

    sp = sp - numArgs - 1;

    if(result != nullptr)
    {
        Push(result);
    } else {
        Push(objects::NULL_OBJ);
    }

    return nullptr;
}

执行效果:

>> let array = [1, 2, 3]
[1, 2, 3]

>> len(array)
3

>> push(array, 4)
[1, 2, 3, 4]

>> rest(array)
[2, 3]

>> first(array)
1

>> last(array)
3

>> first(rest(push(array, 4)))
2

>> puts(array)
[1, 2, 3]
null

总结

内置函数编译有点类似编译函数参数,都是最先加载到对应的作用域最前面,但区别是内置函数它本身是不会编译为字节码的,而是一个函数指针。