Hi Jacuques,
For expression only scenario, we have designed a series API for substrait expression build, from scratch or from parsed expression representation in other frontend framework, mainly include:
class SubstraitExprBuilder{
static ::substrait::Type* makeType(bool isNullable);
static ::substrait::NamedStruct* makeNamedStruct(SubstraitExprBuilder* builder,
std::vector<std::string> names,
std::vector<::substrait::Type*> types);
static ::substrait::Expression* makeFieldReference(size_t field);
static ::substrait::Expression* makeFieldReference(SubstraitExprBuilder* builder,
std::string name,
::substrait::Type* type);
static ::substrait::extensions::SimpleExtensionDeclaration_ExtensionFunction* makeFunc(
SubstraitExprBuilder* builder,
std::string func_name,
std::vector<::substrait::Expression*> args,
::substrait::Type* output_type);
static ::substrait::Expression* makeExpr(SubstraitExprBuilder* builder,
std::string func_name,
std::vector<::substrait::Expression*> args,
::substrait::Type* output_type);
::substrait::NamedStruct* schema();
std::vector<::substrait::extensions::SimpleExtensionDeclaration_ExtensionFunction*>
funcsInfo() {
return funcs_info_;
}
private:
size_t func_anchor_;
std::vector<std::string> names_;
std::vector<::substrait::Type*> types_;
::substrait::NamedStruct* schema_;
std::vector<::substrait::extensions::SimpleExtensionDeclaration_ExtensionFunction*>
funcs_info_;
}
In short, this builder provides interface to make substrait::type, substrait::expression, etc as well as maintain a context that can build final name_struct and function info. The typical usage will like this:
// example 1
SubstraitExprBuilder* builder = new SubstraitExprBuilder();
::substrait::NamedStruct* schema = SubstraitExprBuilder::makeNamedStruct(
builder,
{"a", "b"},
{CREATE_SUBSTRAIT_TYPE_FULL(I64, false), CREATE_SUBSTRAIT_TYPE_FULL(I64, false)});
::substrait::Expression* field0 = SubstraitExprBuilder::makeFieldReference(0);
::substrait::Expression* field1 = SubstraitExprBuilder::makeFieldReference(1);
::substrait::Expression* multiply_expr = SubstraitExprBuilder::makeExpr(
builder, "multiply", {field0, field1}, CREATE_SUBSTRAIT_TYPE_FULL(I64, false));
::substrait::Expression* add_expr = SubstraitExprBuilder::makeExpr(
builder, "add", {multiply_expr, field1}, CREATE_SUBSTRAIT_TYPE_FULL(I64, false));
// after necessary substrait expression info built, we provide it to our evaluator and do the evaluation
CiderExprEvaluator evaluator({add_expr}, schema, {func_info}, ExprType::ProjectExpr);
auto result = evaluator.eval(in_data);
// example 2
SubstraitExprBuilder* inc_builder = new SubstraitExprBuilder();
::substrait::Expression* field2 = SubstraitExprBuilder::makeFieldReference(
inc_builder, "a", CREATE_SUBSTRAIT_TYPE_FULL(I64, false));
::substrait::Expression* field3 = SubstraitExprBuilder::makeFieldReference(
inc_builder, "b", CREATE_SUBSTRAIT_TYPE_FULL(I64, false));
::substrait::NamedStruct* inc_schema = inc_builder->schema();
::substrait::Expression* multiply_expr1 = SubstraitExprBuilder::makeExpr(
inc_builder, "multiply", {field2, field3}, CREATE_SUBSTRAIT_TYPE_FULL(I64, false));
::substrait::Expression* add_expr1 = SubstraitExprBuilder::makeExpr(
inc_builder, "add", {multiply_expr1, field3}, CREATE_SUBSTRAIT_TYPE_FULL(I64, false));
// after necessary substrait expression info built, we provide it to our evaluator and do the evaluation
CiderExprEvaluator evaluator({add_expr}, schema, {func_info}, ExprType::ProjectExpr);
auto result = evaluator.eval(in_data);
Can you help review this? will appreciate for your comments.
Thanks,
Yan