diff --git a/velox/expression/ReverseSignatureBinder.cpp b/velox/expression/ReverseSignatureBinder.cpp index b21715fef36d..495920e6bf1f 100644 --- a/velox/expression/ReverseSignatureBinder.cpp +++ b/velox/expression/ReverseSignatureBinder.cpp @@ -39,7 +39,9 @@ bool ReverseSignatureBinder::tryBind() { if (hasConstrainedIntegerVariable(signature_.returnType())) { return false; } - return SignatureBinderBase::tryBind(signature_.returnType(), returnType_); + tryBindSucceeded_ = + SignatureBinderBase::tryBind(signature_.returnType(), returnType_); + return tryBindSucceeded_; } } // namespace facebook::velox::exec diff --git a/velox/expression/ReverseSignatureBinder.h b/velox/expression/ReverseSignatureBinder.h index 02b92d217cf4..087adc4dbe77 100644 --- a/velox/expression/ReverseSignatureBinder.h +++ b/velox/expression/ReverseSignatureBinder.h @@ -40,15 +40,28 @@ class ReverseSignatureBinder : private SignatureBinderBase { /// tryBind() and only if tryBind() returns true. If a type variable is not /// determined by tryBind(), it maps to a nullptr. const std::unordered_map& bindings() const { + VELOX_CHECK( + tryBindSucceeded_, "tryBind() must be called first and succeed"); return typeVariablesBindings_; } + /// Return the integer bindings produced by 'tryBind'. This function should be + /// called after 'tryBind' and only if 'tryBind' returns true. + const std::unordered_map& integerBindings() const { + VELOX_CHECK( + tryBindSucceeded_, "tryBind() must be called first and succeed"); + return integerVariablesBindings_; + } + private: - /// Return whether there is a constraint on an integer variable in type - /// signature. + // Return whether there is a constraint on an integer variable in type + // signature. bool hasConstrainedIntegerVariable(const TypeSignature& type) const; const TypePtr returnType_; + + // True if 'tryBind' has been called and succeeded. False otherwise. + bool tryBindSucceeded_{false}; }; } // namespace facebook::velox::exec diff --git a/velox/expression/tests/ArgumentTypeFuzzerTest.cpp b/velox/expression/tests/ArgumentTypeFuzzerTest.cpp index e139ae0c4f7a..595a09ccaf8d 100644 --- a/velox/expression/tests/ArgumentTypeFuzzerTest.cpp +++ b/velox/expression/tests/ArgumentTypeFuzzerTest.cpp @@ -403,4 +403,250 @@ TEST_F(ArgumentTypeFuzzerTest, orderableConstraint) { } } +TEST_F(ArgumentTypeFuzzerTest, fuzzDecimalArgumentTypes) { + auto fuzzArgumentTypes = [](const exec::FunctionSignature& signature, + const TypePtr& returnType) { + std::mt19937 seed{0}; + ArgumentTypeFuzzer fuzzer{signature, returnType, seed}; + bool ok = fuzzer.fuzzArgumentTypes(kMaxVariadicArgs); + VELOX_CHECK( + ok, + "Signature: {}, Return type: {}", + signature.toString(), + returnType->toString()); + return fuzzer.argumentTypes(); + }; + + // Argument type must match return type. + auto signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .build(); + + auto argTypes = fuzzArgumentTypes(*signature, DECIMAL(10, 7)); + ASSERT_EQ(1, argTypes.size()); + EXPECT_EQ(DECIMAL(10, 7)->toString(), argTypes[0]->toString()); + + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .argumentType("decimal(p,s)") + .variableArity() + .build(); + argTypes = fuzzArgumentTypes(*signature, DECIMAL(10, 7)); + ASSERT_LE(1, argTypes.size()); + for (const auto& argType : argTypes) { + EXPECT_EQ(DECIMAL(10, 7)->toString(), argType->toString()); + } + + // Argument type can be any decimal. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("boolean") + .argumentType("decimal(p,s)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, BOOLEAN()); + ASSERT_EQ(1, argTypes.size()); + EXPECT_TRUE(argTypes[0]->isDecimal()); + + // Argument type can be any decimal with scale 30. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .returnType("boolean") + .argumentType("decimal(p,30)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, BOOLEAN()); + ASSERT_EQ(1, argTypes.size()); + EXPECT_TRUE(argTypes[0]->isDecimal()); + EXPECT_EQ(30, getDecimalPrecisionScale(*argTypes[0]).second); + + // Another way to specify fixed scale. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s", "3") + .returnType("boolean") + .argumentType("decimal(p,s)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, BOOLEAN()); + ASSERT_EQ(1, argTypes.size()); + EXPECT_TRUE(argTypes[0]->isDecimal()); + EXPECT_EQ(3, getDecimalPrecisionScale(*argTypes[0]).second); + + // Argument type can be any decimal with precision 3. + signature = exec::FunctionSignatureBuilder() + .integerVariable("s") + .returnType("boolean") + .argumentType("decimal(3,s)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, BOOLEAN()); + ASSERT_EQ(1, argTypes.size()); + EXPECT_TRUE(argTypes[0]->isDecimal()); + EXPECT_EQ(3, getDecimalPrecisionScale(*argTypes[0]).first); + + // Another way to specify fixed precision. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p", "30") + .integerVariable("s") + .returnType("boolean") + .argumentType("decimal(p,s)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, BOOLEAN()); + ASSERT_EQ(1, argTypes.size()); + EXPECT_TRUE(argTypes[0]->isDecimal()); + EXPECT_EQ(30, getDecimalPrecisionScale(*argTypes[0]).first); + + // Multiple arguments. All must be the same as return type. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .argumentType("decimal(p,s)") + .argumentType("decimal(p,s)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, DECIMAL(10, 7)); + ASSERT_EQ(3, argTypes.size()); + EXPECT_EQ(DECIMAL(10, 7)->toString(), argTypes[0]->toString()); + EXPECT_EQ(DECIMAL(10, 7)->toString(), argTypes[1]->toString()); + EXPECT_EQ(DECIMAL(10, 7)->toString(), argTypes[2]->toString()); + + // Multiple arguments. Some have fixed precision, scale or both. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .argumentType("decimal(p,10)") + .argumentType("decimal(12,s)") + .argumentType("decimal(2,1)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, DECIMAL(10, 7)); + ASSERT_EQ(4, argTypes.size()); + EXPECT_EQ(DECIMAL(10, 7)->toString(), argTypes[0]->toString()); + EXPECT_EQ(DECIMAL(10, 10)->toString(), argTypes[1]->toString()); + EXPECT_EQ(DECIMAL(12, 7)->toString(), argTypes[2]->toString()); + EXPECT_EQ(DECIMAL(2, 1)->toString(), argTypes[3]->toString()); + + // Multiple pairs of precision and scale variables. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p1") + .integerVariable("s1") + .integerVariable("p2") + .integerVariable("s2") + .returnType("bigint") + .argumentType("decimal(p1,s1)") + .argumentType("decimal(p2,s2)") + .argumentType("decimal(p1,s1)") + .argumentType("decimal(p2,s2)") + .build(); + argTypes = fuzzArgumentTypes(*signature, BIGINT()); + ASSERT_EQ(4, argTypes.size()); + EXPECT_TRUE(argTypes[0]->isDecimal()); + EXPECT_TRUE(argTypes[1]->isDecimal()); + EXPECT_EQ(argTypes[0]->toString(), argTypes[2]->toString()); + EXPECT_EQ(argTypes[1]->toString(), argTypes[3]->toString()); +} + +TEST_F(ArgumentTypeFuzzerTest, fuzzDecimalReturnType) { + auto fuzzReturnType = [](const exec::FunctionSignature& signature) { + std::mt19937 seed{0}; + ArgumentTypeFuzzer fuzzer{signature, seed}; + return fuzzer.fuzzReturnType(); + }; + + // Return type can be any decimal. + auto signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .build(); + + auto returnType = fuzzReturnType(*signature); + EXPECT_TRUE(returnType->isDecimal()); + + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .argumentType("decimal(p,s)") + .variableArity() + .build(); + + returnType = fuzzReturnType(*signature); + EXPECT_TRUE(returnType->isDecimal()); + + // Return type can be any decimal with scale 3. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(p,3)") + .argumentType("decimal(p,s)") + .build(); + + returnType = fuzzReturnType(*signature); + EXPECT_TRUE(returnType->isDecimal()); + EXPECT_EQ(3, getDecimalPrecisionScale(*returnType).second); + + // Another way to specify that scale must be 3. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s", "3") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .build(); + + returnType = fuzzReturnType(*signature); + EXPECT_TRUE(returnType->isDecimal()); + EXPECT_EQ(3, getDecimalPrecisionScale(*returnType).second); + + // Return type can be any decimal with precision 22. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(22,s)") + .argumentType("decimal(p,s)") + .build(); + + returnType = fuzzReturnType(*signature); + EXPECT_TRUE(returnType->isDecimal()); + EXPECT_EQ(22, getDecimalPrecisionScale(*returnType).first); + + // Another way to specify that precision must be 22. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p", "22") + .integerVariable("s") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .build(); + + returnType = fuzzReturnType(*signature); + EXPECT_TRUE(returnType->isDecimal()); + EXPECT_EQ(22, getDecimalPrecisionScale(*returnType).first); + + // Return type can only be DECIMAL(10, 7). + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(10,7)") + .argumentType("decimal(p,s)") + .build(); + + returnType = fuzzReturnType(*signature); + EXPECT_EQ(DECIMAL(10, 7)->toString(), returnType->toString()); +} + } // namespace facebook::velox::test diff --git a/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp b/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp index 44c8a0a69bf3..1b32b5c94674 100644 --- a/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp +++ b/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp @@ -27,6 +27,9 @@ namespace facebook::velox::test { std::string typeToBaseName(const TypePtr& type) { + if (type->isDecimal()) { + return "decimal"; + } return boost::algorithm::to_lower_copy(std::string{type->kindName()}); } @@ -35,6 +38,92 @@ std::optional baseNameToTypeKind(const std::string& typeName) { return tryMapNameToTypeKind(kindName); } +namespace { + +bool isDecimalBaseName(const std::string& typeName) { + auto normalized = boost::algorithm::to_lower_copy(typeName); + + return normalized == "decimal"; +} + +/// Returns true only if 'str' contains digits. +bool isPositiveInteger(const std::string& str) { + return !str.empty() && + std::find_if(str.begin(), str.end(), [](unsigned char c) { + return !std::isdigit(c); + }) == str.end(); +} + +} // namespace + +void ArgumentTypeFuzzer::determineUnboundedIntegerVariables( + const exec::TypeSignature& type) { + if (!isDecimalBaseName(type.baseName())) { + return; + } + + VELOX_CHECK_EQ(2, type.parameters().size()) + + const auto& precision = type.parameters()[0].baseName(); + const auto& scale = type.parameters()[1].baseName(); + + // Bind 'name' variable, if not already bound, using 'constant' constraint + // ('name'='123'). Return bound value if 'name' is already bound or was + // successfully bound to a constant value. Return std::nullopt otherwise. + auto tryFixedBinding = [&](const auto& name) -> std::optional { + auto it = variables().find(name); + if (it == variables().end()) { + VELOX_CHECK( + isPositiveInteger(name), + "Precision and scale of a decimal type must refer to a variable " + "or specify a position integer constant: {}", + name) + return std::stoi(name); + } + + if (integerBindings_.count(name) > 0) { + return integerBindings_[name]; + } + + if (isPositiveInteger(it->second.constraint())) { + const auto value = std::stoi(it->second.constraint()); + integerBindings_[name] = value; + return value; + } + + return std::nullopt; + }; + + std::optional p = tryFixedBinding(precision); + std::optional s = tryFixedBinding(scale); + + if (p.has_value() && s.has_value()) { + return; + } + + if (s.has_value()) { + p = std::max(ShortDecimalType::kMinPrecision, s.value()); + if (p < LongDecimalType::kMaxPrecision) { + p = p.value() + rand32(0, LongDecimalType::kMaxPrecision - p.value()); + } + + integerBindings_[precision] = p.value(); + return; + } + + if (p.has_value()) { + s = rand32(0, p.value()); + integerBindings_[scale] = s.value(); + return; + } + + p = rand32(1, LongDecimalType::kMaxPrecision); + s = rand32(0, p.value()); + + integerBindings_[precision] = p.value(); + integerBindings_[scale] = s.value(); +} + void ArgumentTypeFuzzer::determineUnboundedTypeVariables() { for (auto& [variableName, variableInfo] : variables()) { if (!variableInfo.isTypeParameter()) { @@ -74,6 +163,7 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { return false; } bindings_ = binder.bindings(); + integerBindings_ = binder.integerBindings(); } else { for (const auto& [name, _] : signature_.variables()) { bindings_.insert({name, nullptr}); @@ -81,13 +171,16 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { } determineUnboundedTypeVariables(); + for (const auto& argType : signature_.argumentTypes()) { + determineUnboundedIntegerVariables(argType); + } for (auto i = 0; i < formalArgsCnt; i++) { TypePtr actualArg; if (formalArgs[i].baseName() == "any") { actualArg = randType(); } else { actualArg = exec::SignatureBinder::tryResolveType( - formalArgs[i], variables(), bindings_); + formalArgs[i], variables(), bindings_, integerBindings_); VELOX_CHECK(actualArg != nullptr); } argumentTypes_.push_back(actualArg); @@ -114,15 +207,23 @@ TypePtr ArgumentTypeFuzzer::fuzzReturnType() { "Only fuzzing uninitialized return type is allowed."); determineUnboundedTypeVariables(); - if (signature_.returnType().baseName() == "any") { + determineUnboundedIntegerVariables(signature_.returnType()); + + const auto& returnType = signature_.returnType(); + + if (returnType.baseName() == "any") { returnType_ = randType(); - return returnType_; } else { returnType_ = exec::SignatureBinder::tryResolveType( - signature_.returnType(), variables(), bindings_); - VELOX_CHECK_NE(returnType_, nullptr); - return returnType_; + returnType, variables(), bindings_, integerBindings_); } + + VELOX_CHECK_NOT_NULL(returnType_); + return returnType_; +} + +int32_t ArgumentTypeFuzzer::rand32(int32_t min, int32_t max) { + return boost::random::uniform_int_distribution(min, max)(rng_); } } // namespace facebook::velox::test diff --git a/velox/expression/tests/utils/ArgumentTypeFuzzer.h b/velox/expression/tests/utils/ArgumentTypeFuzzer.h index 540a353221bf..f21ced70c521 100644 --- a/velox/expression/tests/utils/ArgumentTypeFuzzer.h +++ b/velox/expression/tests/utils/ArgumentTypeFuzzer.h @@ -65,10 +65,18 @@ class ArgumentTypeFuzzer { return signature_.variables(); } - /// Bind each type variable that is not determined by the return type to a - /// randomly generated type. + // Returns random integer between min and max inclusive. + int32_t rand32(int32_t min, int32_t max); + + // Bind each type variable that is not determined by the return type to a + // randomly generated type. void determineUnboundedTypeVariables(); + // Bind integer variables used in specified decimal(p,s) type signature to + // randomly generated values if not already bound. + // Noop if 'type' is not a decimal type signature. + void determineUnboundedIntegerVariables(const exec::TypeSignature& type); + TypePtr randType(); /// Generates an orderable random type, including structs, and arrays. @@ -83,6 +91,8 @@ class ArgumentTypeFuzzer { /// Bindings between type variables and their actual types. std::unordered_map bindings_; + std::unordered_map integerBindings_; + /// RNG to generate random types for unbounded type variables when necessary. std::mt19937& rng_; };