diff --git a/app/services/discovery_engine/query/filter_expression_helpers.rb b/app/services/discovery_engine/query/filter_expression_helpers.rb index e043b8e..c15ad73 100644 --- a/app/services/discovery_engine/query/filter_expression_helpers.rb +++ b/app/services/discovery_engine/query/filter_expression_helpers.rb @@ -6,7 +6,7 @@ module FilterExpressionHelpers # values in string_value_or_values def filter_any_string(string_or_array_field, string_value_or_values) Array(string_value_or_values) - .map { escape_and_quote(_1) } + .map { quote(_1) } .join(",") .then { "#{string_or_array_field}: ANY(#{_1})" } end @@ -69,9 +69,8 @@ def parenthesize(expression) "(#{expression})" end - def escape_and_quote(string_value) - escaped_string = string_value.gsub(/(["\\])/, '\\\\\1') - "\"#{escaped_string}\"" + def quote(string_value) + "\"#{string_value}\"" end end end diff --git a/app/services/discovery_engine/query/filters.rb b/app/services/discovery_engine/query/filters.rb index 972eb28..c98167a 100644 --- a/app/services/discovery_engine/query/filters.rb +++ b/app/services/discovery_engine/query/filters.rb @@ -1,6 +1,7 @@ module DiscoveryEngine::Query class Filters FILTER_PARAM_KEY_REGEX = /\A(filter_all|filter|reject)_(.+)\z/ + ACCEPTABLE_VALUE_REGEX = /\A[a-zA-Z0-9_:,\-\/]+\z/ FILTERABLE_STRING_FIELDS = %w[ content_purpose_supergroup @@ -70,9 +71,11 @@ def valid_filter_value?(value) # for the time being. This ensures that occasionally observed garbage parameters such as: # # filter_world_locations[\\\\]=all + # filter_something=blah%00blah # - # are ignored by checking that the value is either a string or an array of strings. - Array(value).all? { _1.is_a?(String) } + # are ignored by checking that the value is either a string or an array of strings, and + # matches an allowlist of permissible characters. + Array(value).all? { _1.is_a?(String) && _1.match?(ACCEPTABLE_VALUE_REGEX) } end end end diff --git a/spec/services/discovery_engine/query/filters_spec.rb b/spec/services/discovery_engine/query/filters_spec.rb index 9431fff..70be08c 100644 --- a/spec/services/discovery_engine/query/filters_spec.rb +++ b/spec/services/discovery_engine/query/filters_spec.rb @@ -32,6 +32,12 @@ it { is_expected.to be_nil } end + + context "with invalid characters in the filter value" do + let(:query_params) { { q: "garden centres", reject_link: "foo\u0000bar" } } + + it { is_expected.to be_nil } + end end context "with an 'any' string filter" do @@ -65,6 +71,12 @@ it { is_expected.to be_nil } end + context "with invalid characters in the filter value" do + let(:query_params) { { q: "garden centres", filter_link: "foo\u0000bar" } } + + it { is_expected.to be_nil } + end + context "with an unknown field" do let(:query_params) { { q: "garden centres", filter_foo: "bar" } } @@ -101,6 +113,12 @@ it { is_expected.to be_nil } end + context "with invalid characters in the filter value" do + let(:query_params) { { q: "garden centres", filter_all_link: "foo\u0000bar" } } + + it { is_expected.to be_nil } + end + context "with an unknown field" do let(:query_params) { { q: "garden centres", filter_all_foo: "bar" } } @@ -177,11 +195,5 @@ it { is_expected.to eq('(NOT link: ANY("/foo")) AND (content_purpose_supergroup: ANY("services")) AND ((part_of_taxonomy_tree: ANY("cafe-1234")) AND (part_of_taxonomy_tree: ANY("face-5678"))) AND (public_timestamp: IN(629510400,629596799))') } end - - context "with filters containing escapable characters" do - let(:query_params) { { q: "garden centres", filter_content_purpose_supergroup: "foo\"\\bar" } } - - it { is_expected.to eq('content_purpose_supergroup: ANY("foo\\"\\\\bar")') } - end end end