From 045b82f8f4c5c806827b7ff5ccc42be5ce32b67d Mon Sep 17 00:00:00 2001 From: Alex Patel Date: Wed, 20 Nov 2024 16:44:32 -0500 Subject: [PATCH] Treat anyOf: [] as matching no routing shards --- .../index_expression_builder.rb | 2 +- .../graphql/datastore_query/routing_picker.rb | 4 +- .../filtering/filter_value_set_extractor.rb | 10 ++++- .../graphql/query_adapter/filters.rb | 2 +- .../datastore_query/routing_picker.rbs | 1 + .../filtering/filter_value_set_extractor.rbs | 3 +- .../datastore_query/shard_routing_spec.rb | 37 +++++++++++++------ 7 files changed, 42 insertions(+), 17 deletions(-) diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb index 1c58026d..c43d4aa6 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb @@ -15,7 +15,7 @@ class DatastoreQuery # Responsible for building a search index expression for a specific query based on the filters. class IndexExpressionBuilder def initialize(schema_names:) - @filter_value_set_extractor = Filtering::FilterValueSetExtractor.new(schema_names, Support::TimeSet::ALL) do |operator, filter_value| + @filter_value_set_extractor = Filtering::FilterValueSetExtractor.new(schema_names, Support::TimeSet::ALL, Support::TimeSet::EMPTY) do |operator, filter_value| case operator when :gt, :gte, :lt, :lte if date_string?(filter_value) diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/routing_picker.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/routing_picker.rb index 67eb09c8..bcbb9662 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/routing_picker.rb @@ -16,8 +16,9 @@ class RoutingPicker def initialize(schema_names:) # @type var all_values_set: _RoutingValueSet all_values_set = RoutingValueSet::ALL + none_values_set = RoutingValueSet::NONE - @filter_value_set_extractor = Filtering::FilterValueSetExtractor.new(schema_names, all_values_set) do |operator, filter_value| + @filter_value_set_extractor = Filtering::FilterValueSetExtractor.new(schema_names, all_values_set, none_values_set) do |operator, filter_value| if operator == :equal_to_any_of # This calls `.compact` to remove `nil` filter_value values RoutingValueSet.of(filter_value.compact) @@ -70,6 +71,7 @@ def self.of_all_except(routing_values) end ALL = of_all_except([]) + NONE = of([]) def intersection(other_set) # Here we return `self` to preserve the commutative property of `intersection`. Returning `self` diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb index c3667d66..b12ff3d6 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb @@ -12,9 +12,10 @@ module Filtering # Responsible for extracting a set of values from query filters, based on a using a custom # set type that is able to efficiently model the "all values" case. class FilterValueSetExtractor - def initialize(schema_names, all_values_set, &build_set_for_filter) + def initialize(schema_names, all_values_set, none_values_set, &build_set_for_filter) @schema_names = schema_names @all_values_set = all_values_set + @none_values_set = none_values_set @build_set_for_filter = build_set_for_filter end @@ -55,7 +56,7 @@ def filter_value_set_for_target_field_path(target_field_path, filter_hashes) # outside the `map_reduce_sets` block below so we only do it once instead of N times. target_field_path_parts = target_field_path.split(".") - # Here we intersect the filter value setbecause when we have multiple `filter_hashes`, + # Here we intersect the filter value set, because when we have multiple `filter_hashes`, # the filters are ANDed together. Only documents that match ALL the filters will be # returned. Therefore, we want the intersection of filter value sets. map_reduce_sets(filter_hashes, :intersection, negate: false) do |filter_hash| @@ -105,6 +106,11 @@ def filter_value_set_for_filter_hash_entry(field_or_op, filter_value, target_fie # Determines the set of filter values for an `any_of` clause, which is used for ORing multiple filters together. def filter_value_set_for_any_of(filter_hashes, target_field_path_parts, traversed_field_path_parts, negate:) + # Here we treat `any_of: []` as matching no routes. + if filter_hashes.empty? + return negate ? @all_values_set : @none_values_set + end + # Here we union the filter value sets because `any_of` represents an OR. If we can determine specific # filter values for all `any_of` clauses, we will OR them together. Alternately, if we cannot # determine specific filter values for any clauses, we will union `@all_values_set`, diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/query_adapter/filters.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/query_adapter/filters.rb index 8e9e3452..a16ef13e 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql/query_adapter/filters.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/query_adapter/filters.rb @@ -101,7 +101,7 @@ def can_match_nil_values_at?(path, filter) def filter_value_set_extractor @filter_value_set_extractor ||= - Filtering::FilterValueSetExtractor.new(schema_element_names, IncludesNilSet) do |operator, filter_value| + Filtering::FilterValueSetExtractor.new(schema_element_names, IncludesNilSet, ExcludesNilSet) do |operator, filter_value| if operator == :equal_to_any_of && filter_value.include?(nil) IncludesNilSet else diff --git a/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_query/routing_picker.rbs b/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_query/routing_picker.rbs index c2bc34b8..22bee8a0 100644 --- a/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_query/routing_picker.rbs +++ b/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_query/routing_picker.rbs @@ -45,6 +45,7 @@ module ElasticGraph def self.of_all_except: (::Enumerable[routingValue]) -> RoutingValueSet ALL: RoutingValueSet + NONE: RoutingValueSet INVERTED_TYPES: ::Hash[routingValueSetType, routingValueSetType] def inclusive?: () -> bool diff --git a/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/filter_value_set_extractor.rbs b/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/filter_value_set_extractor.rbs index 7ca44788..f5f2f84c 100644 --- a/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/filter_value_set_extractor.rbs +++ b/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/filter_value_set_extractor.rbs @@ -2,13 +2,14 @@ module ElasticGraph class GraphQL module Filtering class FilterValueSetExtractor[S < Support::_NegatableSet[S]] - def initialize: (SchemaArtifacts::RuntimeMetadata::SchemaElementNames, S) { (::Symbol, untyped) -> S? } -> void + def initialize: (SchemaArtifacts::RuntimeMetadata::SchemaElementNames, S, S) { (::Symbol, untyped) -> S? } -> void def extract_filter_value_set: (::Array[::Hash[::String, untyped]], ::Array[::String]) -> S private @schema_names: SchemaArtifacts::RuntimeMetadata::SchemaElementNames @all_values_set: S + @none_values_set: S @build_set_for_filter: ^(::Symbol, untyped) -> S? def filter_value_set_for_target_field_path: (::String, ::Array[::Hash[::String, untyped]]) -> S diff --git a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/shard_routing_spec.rb b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/shard_routing_spec.rb index 19358886..bf43c932 100644 --- a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/shard_routing_spec.rb +++ b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/shard_routing_spec.rb @@ -58,7 +58,7 @@ class GraphQL })).to search_shards_identified_by "abc", "def" end - it "ignores `nil` among other values in `equal_to_any_of` filter for a single `route_with_field_paths` field" do + it "treats `nil` among other values in `equal_to_any_of` filter as matching nothing for a single `route_with_field_paths` field" do expect(shard_routing_for(["name"], { "name" => {"equal_to_any_of" => ["abc", nil, "def"]} })).to search_shards_identified_by "abc", "def" @@ -88,14 +88,14 @@ class GraphQL }})).to search_shards_identified_by "def" end - it "ignores filters on other fields so long as they are not in an `any_of` clause (for multiple filters in one hash)" do + it "treats filters on other fields as matching everything so long as they are not in an `any_of` clause (for multiple filters in one hash)" do expect(shard_routing_for(["name"], { "name" => {"equal_to_any_of" => ["abc", "def"]}, "cost" => {"gt" => 10} })).to search_shards_identified_by "abc", "def" end - it "ignores filters on other fields so long as they are not in an `any_of` clause (for multiple filters in an array of hashes)" do + it "treats filters on other fields as matching everything so long as they are not in an `any_of` clause (for multiple filters in an array of hashes)" do expect(shard_routing_for(["name"], [ {"name" => {"equal_to_any_of" => ["abc", "def"]}}, {"cost" => {"gt" => 10}} @@ -162,7 +162,7 @@ def shard_routing_for(route_with_field_paths, filter_or_filters) end end - it "searches all shards when the query filters with `equal_to_any_of: nil` on a single `route_with_field_paths` field because our current filter logic ignores that filter and we must search all shards" do + it "searches all shards when the query filters with `equal_to_any_of: nil` on a single `route_with_field_paths` field because our current filter logic treats that filter as matching everything and we must search all shards" do expect(shard_routing_for(["name"], { "name" => {"equal_to_any_of" => nil} })).to search_all_shards @@ -253,19 +253,34 @@ def shard_routing_for(route_with_field_paths, filter_or_filters) ]})).to search_all_shards end - # TODO: Change behaviour so no shards are matched when given `anyOf => []`. - # Updated references of ignore and prune to use language such as "treated ... as `true`" it "searches no shards when we have an `any_of: []` filter because that will match no results" do expect(shard_routing_for(["name"], { "any_of" => [] - })).to search_all_shards + })).to search_no_shards end - # TODO: Change behaviour so no shards are matched when given `anyOf => {anyOf => []}` - # Updated references of ignore and prune to use language such as "treated ... as `true`" it "searches no shards when we have an `any_of: [{anyof: []}]` filter because that will match no results" do expect(shard_routing_for(["name"], { "any_of" => [{"any_of" => []}] + })).to search_no_shards + end + + it "searches no shards when we have an `any_of: []` AND `equal_to_any_of: ['abc']` filter because that will match no results" do + expect(shard_routing_for(["name"], { + "any_of" => [], + "equal_to_any_of" => ["abc"] + })).to search_no_shards + end + + it "searches all shards when we have an `any_of: [{equal_to_any_of: ['abc']}]` filter because that will match some results" do + expect(shard_routing_for(["name"], { + "any_of" => [{"equal_to_any_of" => ["abc"]}] + })).to search_all_shards + end + + it "searches all shards when we have an `any_of: [{anyof: []}, {equal_to_any_of: ['abc']}]` filter because that will match some results" do + expect(shard_routing_for(["name"], { + "any_of" => [{"any_of" => []}, {"equal_to_any_of" => ["abc"]}] })).to search_all_shards end @@ -366,14 +381,14 @@ def shard_routing_for(route_with_field_paths, filter_or_filters) }})).to search_shards_identified_by "def" end - it "ignores filters on other fields so long as they are not in an `any_of` clause (for multiple filters in one hash)" do + it "treats filters on other fields as matching everything so long as they are not in an `any_of` clause (for multiple filters in one hash)" do expect(shard_routing_for(["name"], { "name" => {"not" => {"equal_to_any_of" => ["abc", "def"]}}, "cost" => {"gt" => 10} })).to search_all_shards end - it "ignores filters on other fields so long as they are not in an `any_of` clause (for multiple filters in an array of hashes)" do + it "treats filters on other fields as matching everything so long as they are not in an `any_of` clause (for multiple filters in an array of hashes)" do expect(shard_routing_for(["name"], [ {"name" => {"not" => {"equal_to_any_of" => ["abc", "def"]}}}, {"cost" => {"gt" => 10}}