From 744d160e968f2c8662c9137f844547e7fbf37055 Mon Sep 17 00:00:00 2001 From: joocer Date: Mon, 23 Dec 2024 12:26:55 +0000 Subject: [PATCH 1/2] HOUSEKEEPING --- opteryx/config.py | 3 - opteryx/connectors/aws_s3_connector.py | 10 --- opteryx/connectors/base/base_connector.py | 2 +- opteryx/connectors/capabilities/cacheable.py | 8 +- opteryx/cursor.py | 24 +++--- opteryx/managers/cache/memcached.py | 2 +- opteryx/managers/cache/redis.py | 6 +- opteryx/managers/cache/valkey.py | 6 +- opteryx/managers/catalog/tarchia_provider.py | 2 +- opteryx/models/logical_column.py | 13 ---- opteryx/operators/base_plan_node.py | 7 -- .../planner/cost_based_optimizer/__init__.py | 2 +- .../strategies/constant_folding.py | 2 +- .../logical_planner/logical_planner.py | 32 +------- opteryx/planner/physical_planner.py | 10 --- opteryx/planner/views/__init__.py | 2 - opteryx/shared/__init__.py | 9 +-- opteryx/shared/rolling_log.py | 73 ------------------ opteryx/utils/__init__.py | 16 +--- opteryx/utils/arrow.py | 2 +- .../test_sql_fuzzer_single_table_select.py | 5 +- tests/misc/test_connection.py | 1 + tests/misc/test_cursor.py | 9 +++ .../test_optimizations_invoked.py | 4 +- .../test_shapes_and_errors_battery.py | 3 + tests/sql_battery/tests/system.run_tests | 1 + tests/sql_battery/tests/v2_planner.run_tests | 77 ++++++++++++++++++- tests/storage/test_cache_memcached.py | 10 +++ tests/storage/test_cache_redis.py | 18 +++++ tests/storage/test_cache_valkey.py | 17 ++++ 30 files changed, 175 insertions(+), 201 deletions(-) delete mode 100644 opteryx/shared/rolling_log.py diff --git a/opteryx/config.py b/opteryx/config.py index aeb1a7878..c2ad0d5bf 100644 --- a/opteryx/config.py +++ b/opteryx/config.py @@ -183,9 +183,6 @@ def get(key: str, default: Optional[typing.Any] = None) -> Optional[typing.Any]: ENABLE_RESOURCE_LOGGING: bool = bool(get("ENABLE_RESOURCE_LOGGING", False)) # size of morsels to push between steps MORSEL_SIZE: int = int(get("MORSEL_SIZE", 64 * 1024 * 1024)) -# query log -QUERY_LOG_LOCATION:str = get("QUERY_LOG_LOCATION", False) -QUERY_LOG_SIZE:int = int(get("QUERY_LOG_SIZE", 100)) # not GA PROFILE_LOCATION:str = get("PROFILE_LOCATION") # fmt:on diff --git a/opteryx/connectors/aws_s3_connector.py b/opteryx/connectors/aws_s3_connector.py index c185c534d..e8405dfc3 100644 --- a/opteryx/connectors/aws_s3_connector.py +++ b/opteryx/connectors/aws_s3_connector.py @@ -68,10 +68,6 @@ def __init__(self, credentials=None, **kwargs): self.minio = Minio(end_point, access_key, secret_key, secure=secure) self.dataset = self.dataset.replace(".", OS_SEP) - # we're going to cache the first blob as the schema and dataset reader - # sometimes both start here - self.cached_first_blob = None - @single_item_cache def get_list_of_blob_names(self, *, prefix: str) -> List[str]: bucket, object_path, _, _ = paths.get_parts(prefix) @@ -94,12 +90,6 @@ def read_dataset( prefix=self.dataset, ) - # Check if the first blob was cached earlier - if self.cached_first_blob is not None: - yield self.cached_first_blob # Use cached blob - blob_names = blob_names[1:] # Skip first blob - self.cached_first_blob = None - for blob_name in blob_names: try: decoder = get_decoder(blob_name) diff --git a/opteryx/connectors/base/base_connector.py b/opteryx/connectors/base/base_connector.py index 851505abc..af3e31751 100644 --- a/opteryx/connectors/base/base_connector.py +++ b/opteryx/connectors/base/base_connector.py @@ -148,7 +148,7 @@ def __next__(self) -> pyarrow.Table: # pragma: no cover """ raise NotImplementedError("Subclasses must implement __next__ method.") - def close(self) -> None: + def close(self) -> None: # pragma: no cover """ Close the reader and release any resources. """ diff --git a/opteryx/connectors/capabilities/cacheable.py b/opteryx/connectors/capabilities/cacheable.py index 30e170232..9535d64ec 100644 --- a/opteryx/connectors/capabilities/cacheable.py +++ b/opteryx/connectors/capabilities/cacheable.py @@ -88,7 +88,7 @@ async def wrapper(blob_name: str, statistics, pool: MemoryPool, **kwargs): remote_cache.touch(key) # help the remote cache track LRU statistics.bufferpool_hits += 1 read_buffer_ref = await pool.commit(payload) # type: ignore - while read_buffer_ref is None: + while read_buffer_ref is None: # pragma: no cover await asyncio.sleep(0.1) statistics.stalls_writing_to_read_buffer += 1 read_buffer_ref = await pool.commit(payload) # type: ignore @@ -103,7 +103,7 @@ async def wrapper(blob_name: str, statistics, pool: MemoryPool, **kwargs): statistics.remote_cache_hits += 1 system_statistics.remote_cache_reads += 1 read_buffer_ref = await pool.commit(payload) # type: ignore - while read_buffer_ref is None: + while read_buffer_ref is None: # pragma: no cover await asyncio.sleep(0.1) statistics.stalls_writing_to_read_buffer += 1 read_buffer_ref = await pool.commit(payload) # type: ignore @@ -119,7 +119,7 @@ async def wrapper(blob_name: str, statistics, pool: MemoryPool, **kwargs): statistics.cache_misses += 1 system_statistics.origin_reads += 1 return read_buffer_ref - except Exception as e: + except Exception as e: # pragma: no cover print(f"Error in {func.__name__}: {e}") raise # Optionally re-raise the error after logging it @@ -136,7 +136,7 @@ async def wrapper(blob_name: str, statistics, pool: MemoryPool, **kwargs): ): # if we didn't get it from the buffer pool (origin or remote cache) we add it evicted = buffer_pool.set(key, payload) - if evicted: + if evicted: # pragma: no cover # if we're evicting items we just put in the cache, stop if evicted in my_keys: evictions_remaining = 0 diff --git a/opteryx/cursor.py b/opteryx/cursor.py index 188e0ab49..1216015dc 100644 --- a/opteryx/cursor.py +++ b/opteryx/cursor.py @@ -40,17 +40,9 @@ from opteryx.exceptions import SqlError from opteryx.exceptions import UnsupportedSyntaxError from opteryx.models import QueryStatistics -from opteryx.shared.rolling_log import RollingLog from opteryx.utils import sql PROFILE_LOCATION = config.PROFILE_LOCATION -QUERY_LOG_LOCATION = config.QUERY_LOG_LOCATION -QUERY_LOG_SIZE = config.QUERY_LOG_SIZE - - -ROLLING_LOG = None -if QUERY_LOG_LOCATION: - ROLLING_LOG = RollingLog(QUERY_LOG_LOCATION, max_entries=QUERY_LOG_SIZE) class CursorState(Enum): @@ -191,9 +183,6 @@ def _inner_execute( except RuntimeError as err: # pragma: no cover raise SqlError(f"Error Executing SQL Statement ({err})") from err - if ROLLING_LOG: - ROLLING_LOG.append(operation) - results = execute(plan, statistics=self._statistics) start = time.time_ns() @@ -337,6 +326,19 @@ def execute_to_arrow( results = self._execute_statements(operation, params, visibility_filters) if results is not None: result_data, self._result_type = results + + if self._result_type == ResultType.NON_TABULAR: + import orso + + meta_dataframe = orso.DataFrame( + rows=[(result_data.record_count,)], # type: ignore + schema=RelationSchema( + name="table", + columns=[FlatColumn(name="rows_affected", type=OrsoTypes.INTEGER)], + ), + ) # type: ignore + return meta_dataframe.arrow() + if limit is not None: result_data = utils.arrow.limit_records(result_data, limit) # type: ignore if isinstance(result_data, pyarrow.Table): diff --git a/opteryx/managers/cache/memcached.py b/opteryx/managers/cache/memcached.py index 01d4db1e9..e5761678b 100644 --- a/opteryx/managers/cache/memcached.py +++ b/opteryx/managers/cache/memcached.py @@ -49,7 +49,7 @@ def _memcached_server(**kwargs): try: from pymemcache.client import base - except ImportError as err: + except ImportError as err: # pragma: no cover raise MissingDependencyError(err.name) from err try: diff --git a/opteryx/managers/cache/redis.py b/opteryx/managers/cache/redis.py index b1eeec358..e80b707ea 100644 --- a/opteryx/managers/cache/redis.py +++ b/opteryx/managers/cache/redis.py @@ -38,7 +38,7 @@ def _redis_server(**kwargs): try: import redis - except ImportError as err: + except ImportError as err: # pragma: no cover raise MissingDependencyError(err.name) from err return redis.from_url(redis_config) @@ -80,7 +80,7 @@ def get(self, key: bytes) -> Union[bytes, None]: if response: self.hits += 1 return bytes(response) - except Exception as err: + except Exception as err: # pragma: no cover self._consecutive_failures += 1 if self._consecutive_failures >= MAXIMUM_CONSECUTIVE_FAILURES: import datetime @@ -99,7 +99,7 @@ def set(self, key: bytes, value: bytes) -> None: try: self._server.set(key, value) self.sets += 1 - except Exception as err: + except Exception as err: # pragma: no cover # if we fail to set, stop trying self._consecutive_failures = MAXIMUM_CONSECUTIVE_FAILURES self.errors += 1 diff --git a/opteryx/managers/cache/valkey.py b/opteryx/managers/cache/valkey.py index b82e6c4b2..36f1330fc 100644 --- a/opteryx/managers/cache/valkey.py +++ b/opteryx/managers/cache/valkey.py @@ -28,7 +28,7 @@ def _valkey_server(**kwargs): try: import valkey # Assuming `valkey` is the client library's name - except ImportError as err: + except ImportError as err: # pragma: no cover raise MissingDependencyError(err.name) from err return valkey.from_url(valkey_config) # Example instantiation of the client @@ -70,7 +70,7 @@ def get(self, key: bytes) -> Union[bytes, None]: if response: self.hits += 1 return bytes(response) - except Exception as err: + except Exception as err: # pragma: no cover self._consecutive_failures += 1 if self._consecutive_failures >= MAXIMUM_CONSECUTIVE_FAILURES: import datetime @@ -89,7 +89,7 @@ def set(self, key: bytes, value: bytes) -> None: try: self._server.set(key, value) # Adjust based on Valkey's API self.sets += 1 - except Exception as err: + except Exception as err: # pragma: no cover # if we fail to set, stop trying self._consecutive_failures = MAXIMUM_CONSECUTIVE_FAILURES self.errors += 1 diff --git a/opteryx/managers/catalog/tarchia_provider.py b/opteryx/managers/catalog/tarchia_provider.py index 03f6e05f8..f2e25ff2e 100644 --- a/opteryx/managers/catalog/tarchia_provider.py +++ b/opteryx/managers/catalog/tarchia_provider.py @@ -44,7 +44,7 @@ def is_valid_url(url: str) -> bool: try: result = urlparse(url) return all([result.scheme, result.netloc]) - except ValueError: + except ValueError: # pragma: no cover return False diff --git a/opteryx/models/logical_column.py b/opteryx/models/logical_column.py index fd2ba1aa4..36fde92d7 100644 --- a/opteryx/models/logical_column.py +++ b/opteryx/models/logical_column.py @@ -85,16 +85,3 @@ def copy(self): def __repr__(self) -> str: return f"" - - def to_dict(self) -> dict: - from opteryx.utils import dataclass_to_dict - - return { - "class": "LogicalColumn", - "node_type": self.node_type.name, - "source_column": self.source_column, - "source_connector": self.source_connector, - "source": self.source, - "alias": self.alias, - "schema_column": dataclass_to_dict(self.schema_column), - } diff --git a/opteryx/operators/base_plan_node.py b/opteryx/operators/base_plan_node.py index 4a69aea17..ea786bfd4 100644 --- a/opteryx/operators/base_plan_node.py +++ b/opteryx/operators/base_plan_node.py @@ -61,13 +61,6 @@ def __init__(self, *, properties, **parameters): self.records_out = 0 self.bytes_out = 0 - def to_json(self) -> bytes: # pragma: no cover - import orjson - - from opteryx.utils import dataclass_to_dict - - return orjson.dumps(dataclass_to_dict(self.do)) - @classmethod def from_json(cls, json_obj: str) -> "BasePlanNode": # pragma: no cover raise NotImplementedError() diff --git a/opteryx/planner/cost_based_optimizer/__init__.py b/opteryx/planner/cost_based_optimizer/__init__.py index 2663c1c7d..35f0980b5 100644 --- a/opteryx/planner/cost_based_optimizer/__init__.py +++ b/opteryx/planner/cost_based_optimizer/__init__.py @@ -152,7 +152,7 @@ def do_cost_based_optimizer(plan: LogicalPlan, statistics: QueryStatistics) -> L Returns: LogicalPlan: The optimized logical plan. """ - if DISABLE_OPTIMIZER: + if DISABLE_OPTIMIZER: # pragma: no cover message = "[OPTERYX] The optimizer has been disabled, 'DISABLE_OPTIMIZER' variable is TRUE." print(message) statistics.add_message(message) diff --git a/opteryx/planner/cost_based_optimizer/strategies/constant_folding.py b/opteryx/planner/cost_based_optimizer/strategies/constant_folding.py index 242048d44..7ce5e04a9 100644 --- a/opteryx/planner/cost_based_optimizer/strategies/constant_folding.py +++ b/opteryx/planner/cost_based_optimizer/strategies/constant_folding.py @@ -128,7 +128,7 @@ def fold_constants(root: Node, statistics: QueryStatistics) -> Node: if root.node_type == NodeType.COMPARISON_OPERATOR: if ( - root.value in ("Like", "Ilike") + root.value in ("Like", "ILike") and root.left.node_type == NodeType.IDENTIFIER and root.right.node_type == NodeType.LITERAL and root.right.value == "%" diff --git a/opteryx/planner/logical_planner/logical_planner.py b/opteryx/planner/logical_planner/logical_planner.py index a379f4cca..507c45615 100644 --- a/opteryx/planner/logical_planner/logical_planner.py +++ b/opteryx/planner/logical_planner/logical_planner.py @@ -73,7 +73,7 @@ class LogicalPlanNode(Node): def copy(self) -> "Node": return LogicalPlanNode(**super().copy().properties) - def __str__(self): + def __str__(self): # pragma: no cover try: # fmt:off node_type = self.node_type @@ -502,12 +502,6 @@ def inner_query_planner(ast_branch): if previous_step_id is not None: inner_plan.add_edge(previous_step_id, step_id) - if distinct_step.on and _projection[0].source_column == "FROM": - cols = ", ".join([format_expression(c) for c in distinct_step.on]) - raise UnsupportedSyntaxError( - f"Did you mean 'SELECT DISTINCT ON ({cols}) {cols} FROM {_projection[0].alias};'?" - ) - # order if _order_by: order_step = LogicalPlanNode(node_type=LogicalPlanStepType.Order) @@ -794,28 +788,6 @@ def create_node_relation(relation): return root_node, sub_plan -def analyze_query(statement) -> LogicalPlan: - root_node = "Analyze" - plan = LogicalPlan() - - from_step = LogicalPlanNode(node_type=LogicalPlanStepType.Scan) - table = statement[root_node]["table_name"] - from_step.relation = ".".join(part["value"] for part in table) - from_step.alias = from_step.relation - from_step.start_date = table[0].get("start_date") - from_step.end_date = table[0].get("end_date") - step_id = random_string() - plan.add_node(step_id, from_step) - - metadata_step = LogicalPlanNode(node_type=LogicalPlanStepType.MetadataWriter) - previous_step_id, step_id = step_id, random_string() - plan.add_node(step_id, metadata_step) - plan.add_edge(previous_step_id, step_id) - - return plan - # write manifest - - def plan_execute_query(statement) -> LogicalPlan: import orjson @@ -1112,7 +1084,7 @@ def plan_show_variables(statement): QUERY_BUILDERS = { - "Analyze": analyze_query, + # "Analyze": analyze_query, "Execute": plan_execute_query, "Explain": plan_explain, "Query": plan_query, diff --git a/opteryx/planner/physical_planner.py b/opteryx/planner/physical_planner.py index 3233a6e7c..414cced93 100644 --- a/opteryx/planner/physical_planner.py +++ b/opteryx/planner/physical_planner.py @@ -95,16 +95,6 @@ def create_physical_plan(logical_plan, query_properties) -> PhysicalPlan: raise Exception(f"something unexpected happed - {node_type.name}") # fmt: on - # DEBUG: from opteryx.exceptions import InvalidInternalStateError - # DEBUG: - # DEBUG: try: - # DEBUG: config = node.to_json() - ## DEBUG: print(config) - # DEBUG: except Exception as err: - # DEBUG: message = f"Internal Error - node '{node}' unable to be serialized" - # DEBUG: print(message) - ## DEBUG: raise InvalidInternalStateError(message) - plan.add_node(nid, node) for source, destination, relation in logical_plan.edges(): diff --git a/opteryx/planner/views/__init__.py b/opteryx/planner/views/__init__.py index ffe66abab..f500a70a4 100644 --- a/opteryx/planner/views/__init__.py +++ b/opteryx/planner/views/__init__.py @@ -20,8 +20,6 @@ def _load_views(): with open("views.json", "rb") as defs: return orjson.loads(defs.read()) except Exception as err: # nosec - if not err: - pass # DEBUG:: log (f"[OPTERYX] Unable to open views definition file. {err}") return {} diff --git a/opteryx/shared/__init__.py b/opteryx/shared/__init__.py index 14e0ba35b..a5877f3ce 100644 --- a/opteryx/shared/__init__.py +++ b/opteryx/shared/__init__.py @@ -14,12 +14,5 @@ from opteryx.shared.async_memory_pool import AsyncMemoryPool from opteryx.shared.buffer_pool import BufferPool from opteryx.shared.materialized_datasets import MaterializedDatasets -from opteryx.shared.rolling_log import RollingLog -__all__ = ( - "AsyncMemoryPool", - "BufferPool", - "MaterializedDatasets", - "MemoryPool", - "RollingLog", -) +__all__ = ("AsyncMemoryPool", "BufferPool", "MaterializedDatasets", "MemoryPool") diff --git a/opteryx/shared/rolling_log.py b/opteryx/shared/rolling_log.py deleted file mode 100644 index 5230c3d46..000000000 --- a/opteryx/shared/rolling_log.py +++ /dev/null @@ -1,73 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Write to a log with a fixed number of entries, as the log fills the older entries are removed -from the log file. -""" - -import os - -EIGHT_MEGABYTES: int = 8 * 1024 * 1024 - - -class RollingLog: - _instance = None - log_file: str = None - max_entries: int = 100 - block_size: int = EIGHT_MEGABYTES - - def __new__(cls, log_file: str, max_entries: int = 100, block_size: int = EIGHT_MEGABYTES): - if cls._instance is None: - cls._instance = super().__new__(cls) - cls._instance.log_file = log_file - cls._instance.max_entries = max_entries - cls._instance.block_size = block_size - if not os.path.exists(log_file): - with open(log_file, "wb"): - pass - return cls._instance - - def append(self, entry): - # Append the new entry - with open(self.log_file, "a", encoding="UTF8") as log_file: - log_file.write(entry + "\n") - - # Check if max entries exceeded and remove the first entry if needed - lines = None - with open(self.log_file, "r", encoding="UTF8") as f: - lines = f.readlines() - - if len(lines) > self.max_entries: - with open(self.log_file, "w", encoding="UTF8") as f: - f.writelines(lines[1:]) # Write all lines except the first - - def scan(self): # pragma: no cover - # open the log file in binary mode - with open(self.log_file, "r", encoding="UTF8") as log_file: - # read the current position in the circular buffer - while True: - chunk = log_file.read(self.block_size) - if not chunk: - break - lines = chunk.split("\n") - for line in lines: - if line: - yield line - - def tail(self, count: int = 5): - """return the last 'count' records""" - return list(self.scan())[-count:] - - def head(self, count: int = 5): - """return the first 'count' records""" - return list(self.scan())[:count] diff --git a/opteryx/utils/__init__.py b/opteryx/utils/__init__.py index 21473f4e5..17904d0f8 100644 --- a/opteryx/utils/__init__.py +++ b/opteryx/utils/__init__.py @@ -11,7 +11,6 @@ # limitations under the License. -from enum import Enum from itertools import permutations from typing import Iterable from typing import Optional @@ -47,7 +46,7 @@ def suggest_alternative(value: str, candidates: Iterable[str]) -> Optional[str]: best_match_column = None best_match_score = 100 # Large number indicates no match found yet. - # Function to find the best match + # Function to find the best match based on levenstein distance def find_best_match(name: str): nonlocal best_match_column, best_match_score for raw, candidate in ((ca, "".join(ch for ch in ca if ch.isalnum())) for ca in candidates): @@ -74,16 +73,3 @@ def find_best_match(name: str): return result return best_match_column # Return the best match found, or None if no suitable match is found. - - -def dataclass_to_dict(instance): - if isinstance(instance, Enum): - return instance.name - elif hasattr(instance, "to_dict"): - return instance.to_dict() - elif hasattr(instance, "__dataclass_fields__"): - return {k: dataclass_to_dict(getattr(instance, k)) for k in instance.__dataclass_fields__} - elif isinstance(instance, (list, tuple)): - return [dataclass_to_dict(k) for k in instance] - else: - return instance diff --git a/opteryx/utils/arrow.py b/opteryx/utils/arrow.py index f8765e387..8effb11ba 100644 --- a/opteryx/utils/arrow.py +++ b/opteryx/utils/arrow.py @@ -26,7 +26,7 @@ def limit_records( morsels: Iterator[pyarrow.Table], limit: Optional[int] = None, offset: int = 0 -) -> Optional[Iterator[pyarrow.Table]]: +) -> Optional[Iterator[pyarrow.Table]]: # pragma: no cover """ Cycle over an iterable of morsels, limiting the response to a given number of records with an optional offset. diff --git a/tests/fuzzing/test_sql_fuzzer_single_table_select.py b/tests/fuzzing/test_sql_fuzzer_single_table_select.py index ec586a0b3..e0603d7a0 100644 --- a/tests/fuzzing/test_sql_fuzzer_single_table_select.py +++ b/tests/fuzzing/test_sql_fuzzer_single_table_select.py @@ -85,7 +85,10 @@ def generate_random_sql_select(columns, table): else: select_clause = "SELECT *" # Add table name - select_clause = select_clause + " FROM " + table + if random.random() < 0.1: + return f"SELECT * FROM ({generate_random_sql_select(columns, table)}) as table_{random_string(4)}" + else: + select_clause = select_clause + " FROM " + table # Generate a WHERE clause with 70% chance if random.random() < 0.7: where_clause = generate_condition(columns) diff --git a/tests/misc/test_connection.py b/tests/misc/test_connection.py index 296f2341f..69422d23d 100644 --- a/tests/misc/test_connection.py +++ b/tests/misc/test_connection.py @@ -31,6 +31,7 @@ def test_connection(): cur.close() + def test_execute(): import pandas diff --git a/tests/misc/test_cursor.py b/tests/misc/test_cursor.py index ddb86db5c..2ac6ff7a3 100644 --- a/tests/misc/test_cursor.py +++ b/tests/misc/test_cursor.py @@ -185,6 +185,15 @@ def test_execute_unsupported_syntax_error(): with pytest.raises(UnsupportedSyntaxError): cursor.execute("SELECT * FROM table; SELECT * FROM table2", params=[1]) +def test_non_tabular_result(): + cursor = setup_function() + cursor.execute("SET @name = 'tim'") + cursor.fetchall() + +def test_limit(): + cursor = setup_function() + dataset = cursor.execute_to_arrow("SELECT * FROM $planets", limit=3) + assert dataset.num_rows == 3 if __name__ == "__main__": # pragma: no cover from tests.tools import run_tests diff --git a/tests/plan_optimization/test_optimizations_invoked.py b/tests/plan_optimization/test_optimizations_invoked.py index cda3d4c8d..a8e686582 100644 --- a/tests/plan_optimization/test_optimizations_invoked.py +++ b/tests/plan_optimization/test_optimizations_invoked.py @@ -19,7 +19,9 @@ ("SELECT * FROM $planets WHERE id = 4 + 4", "optimization_constant_fold_expression"), ("SELECT * FROM $planets WHERE id * 0 = 1", "optimization_constant_fold_reduce"), ("SELECT id ^ 1 = 1 FROM $planets LIMIT 10", "optimization_limit_pushdown"), - ("SELECT name FROM $astronauts WHERE name = 'Neil A. Armstrong'", "optimization_predicate_pushdown") + ("SELECT name FROM $astronauts WHERE name = 'Neil A. Armstrong'", "optimization_predicate_pushdown"), + ("SELECT name FROM $planets WHERE name LIKE '%'", "optimization_constant_fold_reduce"), # rewritten to `name is not null` + ("SELECT name FROM $planets WHERE name ILIKE '%'", "optimization_constant_fold_reduce"), # rewritten to `name is not null` ] # fmt:on diff --git a/tests/sql_battery/test_shapes_and_errors_battery.py b/tests/sql_battery/test_shapes_and_errors_battery.py index d7c04d3ca..08f6ed5b9 100644 --- a/tests/sql_battery/test_shapes_and_errors_battery.py +++ b/tests/sql_battery/test_shapes_and_errors_battery.py @@ -257,6 +257,7 @@ ("SELECT * FROM $satellites WHERE name != 'Calypso'", 176, 8, None), ("SELECT * FROM $satellites WHERE name = '********'", 0, 8, None), ("SELECT * FROM $satellites WHERE name LIKE '_a_y_s_'", 1, 8, None), + ("SELECT * FROM $satellites WHERE name LIKE 'Cal%%'", 4, 8, None), ("SELECT * FROM $satellites WHERE name LIKE 'Cal%'", 4, 8, None), ("SELECT * FROM $satellites WHERE name like 'Cal%'", 4, 8, None), ("SELECT * FROM $satellites WHERE name ILIKE '_a_y_s_'", 1, 8, None), @@ -1125,6 +1126,7 @@ ("SELECT name, SEARCH(birth_place, 'Italy') FROM $astronauts", 357, 2, None), ("SELECT name, birth_place FROM $astronauts WHERE SEARCH(birth_place, 'Italy')", 1, 2, None), ("SELECT name, birth_place FROM $astronauts WHERE SEARCH(birth_place, 'Rome')", 1, 2, None), + ("SELECT SEARCH($satellites.name, 'a') FROM $planets LEFT JOIN $satellites ON $planets.id = $satellites.planetId", 179, 1, None), ("SELECT birth_date FROM $astronauts WHERE EXTRACT(year FROM birth_date) < 1930;", 14, 1, None), ("SELECT EXTRACT(month FROM birth_date) FROM $astronauts", 357, 1, None), @@ -1474,6 +1476,7 @@ ("SELECT p1.name AS planet1_name, p2.name AS planet2_name, p3.name AS planet3_name, p4.name AS planet4_name, p5.name AS planet5_name, p6.name AS planet6_name, p7.name AS planet7_name, p8.name AS planet8_name, p9.name AS planet9_name, p10.name AS planet10_name, p1.diameter AS planet1_diameter, p2.gravity AS planet2_gravity, p3.orbitalPeriod AS planet3_orbitalPeriod, p4.numberOfMoons AS planet4_numberOfMoons, p5.meanTemperature AS planet5_meanTemperature FROM $planets p1 JOIN $planets p2 ON p1.id = p2.id JOIN $planets p3 ON p1.id = p3.id JOIN $planets p4 ON p1.id = p4.id JOIN $planets p5 ON p1.id = p5.id JOIN $planets p6 ON p1.id = p6.id JOIN $planets p7 ON p1.id = p7.id JOIN $planets p8 ON p1.id = p8.id JOIN $planets p9 ON p1.id = p9.id JOIN $planets p10 ON p1.id = p10.id WHERE p1.diameter > 10000 ORDER BY p1.name, p2.name, p3.name, p4.name, p5.name;", 6, 15, None), ("SELECT mission, LIST(name) FROM $missions INNER JOIN (SELECT * FROM $astronauts CROSS JOIN UNNEST(missions) AS mission) AS astronauts ON Mission = mission GROUP BY mission", 16, 2, None), + ("SELECT alma_matered FROM (SELECT alma_mater FROM $astronauts CROSS JOIN $satellites) AS bulked CROSS JOIN UNNEST(alma_mater) AS alma_matered", 120537, 1, None), # virtual dataset doesn't exist ("SELECT * FROM $RomanGods", None, None, DatasetNotFoundError), diff --git a/tests/sql_battery/tests/system.run_tests b/tests/sql_battery/tests/system.run_tests index d255bf720..19c4c6f9b 100644 --- a/tests/sql_battery/tests/system.run_tests +++ b/tests/sql_battery/tests/system.run_tests @@ -1,6 +1,7 @@ SELECT version(); SELECT connection_id() AS pid; SHOW VARIABLES; +SET @name = 'bob'; # SELECT * FROM information_schema.tables; # SELECT * FROM information_schema.views; \ No newline at end of file diff --git a/tests/sql_battery/tests/v2_planner.run_tests b/tests/sql_battery/tests/v2_planner.run_tests index f28658566..1faec341e 100644 --- a/tests/sql_battery/tests/v2_planner.run_tests +++ b/tests/sql_battery/tests/v2_planner.run_tests @@ -30,4 +30,79 @@ EXPLAIN ANALYZE FORMAT JSON SELECT * FROM $planets AS a INNER JOIN (SELECT id FR SELECT DISTINCT ON (planetId) planetId, name FROM $satellites; # CONDITIONS IN AGGREGATES -SELECT SUM(DISTINCT id ORDER BY id) FROM $planets \ No newline at end of file +SELECT SUM(DISTINCT id ORDER BY id) FROM $planets + +# INNER JOIN with FULL OUTER JOIN +SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id FULL OUTER JOIN $planets AS p3 ON p1.id = p3.id; + +# INNER JOIN with LEFT OUTER JOIN +SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id LEFT OUTER JOIN $planets AS p3 ON p1.id = p3.id; + +# INNER JOIN with RIGHT OUTER JOIN +SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id RIGHT OUTER JOIN $planets AS p3 ON p1.id = p3.id; + +# INNER JOIN with NATURAL JOIN +SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id NATURAL JOIN $planets AS p3; + +# INNER JOIN with LEFT ANTI JOIN +SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id LEFT ANTI JOIN $planets AS p3 ON p1.id = p3.id; + +# INNER JOIN with LEFT SEMI JOIN +SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id LEFT SEMI JOIN $planets AS p3 ON p1.id = p3.id; + +# FULL OUTER JOIN with LEFT OUTER JOIN +SELECT * FROM $planets AS p1 FULL OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT OUTER JOIN $planets AS p3 ON p1.id = p3.id; + +# FULL OUTER JOIN with RIGHT OUTER JOIN +SELECT * FROM $planets AS p1 FULL OUTER JOIN $planets AS p2 ON p1.id = p2.id RIGHT OUTER JOIN $planets AS p3 ON p1.id = p3.id; + +# FULL OUTER JOIN with NATURAL JOIN +SELECT * FROM $planets AS p1 FULL OUTER JOIN $planets AS p2 ON p1.id = p2.id NATURAL JOIN $planets AS p3; + +# FULL OUTER JOIN with LEFT ANTI JOIN +SELECT * FROM $planets AS p1 FULL OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT ANTI JOIN $planets AS p3 ON p1.id = p3.id; + +# FULL OUTER JOIN with LEFT SEMI JOIN +SELECT * FROM $planets AS p1 FULL OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT SEMI JOIN $planets AS p3 ON p1.id = p3.id; + +# LEFT OUTER JOIN with RIGHT OUTER JOIN +SELECT * FROM $planets AS p1 LEFT OUTER JOIN $planets AS p2 ON p1.id = p2.id RIGHT OUTER JOIN $planets AS p3 ON p1.id = p3.id; + +# LEFT OUTER JOIN with NATURAL JOIN +SELECT * FROM $planets AS p1 LEFT OUTER JOIN $planets AS p2 ON p1.id = p2.id NATURAL JOIN $planets AS p3; + +# LEFT OUTER JOIN with LEFT ANTI JOIN +SELECT * FROM $planets AS p1 LEFT OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT ANTI JOIN $planets AS p3 ON p1.id = p3.id; + +# LEFT OUTER JOIN with LEFT SEMI JOIN +SELECT * FROM $planets AS p1 LEFT OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT SEMI JOIN $planets AS p3 ON p1.id = p3.id; + +# RIGHT OUTER JOIN with NATURAL JOIN +SELECT * FROM $planets AS p1 RIGHT OUTER JOIN $planets AS p2 ON p1.id = p2.id NATURAL JOIN $planets AS p3; + +# RIGHT OUTER JOIN with LEFT ANTI JOIN +SELECT * FROM $planets AS p1 RIGHT OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT ANTI JOIN $planets AS p3 ON p1.id = p3.id; + +# RIGHT OUTER JOIN with LEFT SEMI JOIN +SELECT * FROM $planets AS p1 RIGHT OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT SEMI JOIN $planets AS p3 ON p1.id = p3.id; + +# LEFT ANTI JOIN with LEFT SEMI JOIN +SELECT * FROM $planets AS p1 LEFT ANTI JOIN $planets AS p2 ON p1.id = p2.id LEFT SEMI JOIN $planets AS p3 ON p1.id = p3.id; + +# INNER JOIN with INNER JOIN +SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id INNER JOIN $planets AS p3 ON p1.id = p3.id; + +# FULL OUTER JOIN with FULL OUTER JOIN +SELECT * FROM $planets AS p1 FULL OUTER JOIN $planets AS p2 ON p1.id = p2.id FULL OUTER JOIN $planets AS p3 ON p1.id = p3.id; + +# LEFT OUTER JOIN with LEFT OUTER JOIN +SELECT * FROM $planets AS p1 LEFT OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT OUTER JOIN $planets AS p3 ON p1.id = p3.id; + +# RIGHT OUTER JOIN with RIGHT OUTER JOIN +SELECT * FROM $planets AS p1 RIGHT OUTER JOIN $planets AS p2 ON p1.id = p2.id RIGHT OUTER JOIN $planets AS p3 ON p1.id = p3.id; + +# LEFT ANTI JOIN with LEFT ANTI JOIN +SELECT * FROM $planets AS p1 LEFT ANTI JOIN $planets AS p2 ON p1.id = p2.id LEFT ANTI JOIN $planets AS p3 ON p1.id = p3.id; + +# LEFT SEMI JOIN with LEFT SEMI JOIN +SELECT * FROM $planets AS p1 LEFT SEMI JOIN $planets AS p2 ON p1.id = p2.id LEFT SEMI JOIN $planets AS p3 ON p1.id = p3.id; \ No newline at end of file diff --git a/tests/storage/test_cache_memcached.py b/tests/storage/test_cache_memcached.py index f73d6a227..8f0e52404 100644 --- a/tests/storage/test_cache_memcached.py +++ b/tests/storage/test_cache_memcached.py @@ -6,6 +6,7 @@ import os import sys +import pytest os.environ["OPTERYX_DEBUG"] = "1" @@ -128,6 +129,15 @@ def test_memcache_threaded(): assert result == load, f"Post-thread check failed: {result} != {load}" +def test_skip_on_error(): + from opteryx.managers.cache import MemcachedCache + cache = MemcachedCache() + cache.set(b"key", b"value") + assert cache.get(b"key") == b"value" + cache._consecutive_failures = 10 + assert cache.get(b"key") is None + + if __name__ == "__main__": # pragma: no cover from tests.tools import run_tests diff --git a/tests/storage/test_cache_redis.py b/tests/storage/test_cache_redis.py index 5fdd6de2d..dc3f1f1e4 100644 --- a/tests/storage/test_cache_redis.py +++ b/tests/storage/test_cache_redis.py @@ -6,6 +6,7 @@ import os import sys +import pytest sys.path.insert(1, os.path.join(sys.path[0], "../..")) @@ -46,6 +47,23 @@ def test_redis_cache(): assert stats.get("cache_misses", 0) == 0, stats +def test_invalid_config(): + from opteryx.managers.cache import RedisCache + + with pytest.raises(Exception): + RedisCache(server="") + + v = RedisCache(server=None) + assert v._consecutive_failures == 10 + +def test_skip_on_error(): + from opteryx.managers.cache import RedisCache + cache = RedisCache() + cache.set(b"key", b"value") + assert cache.get(b"key") == b"value" + cache._consecutive_failures = 10 + assert cache.get(b"key") is None + if __name__ == "__main__": # pragma: no cover from tests.tools import run_tests diff --git a/tests/storage/test_cache_valkey.py b/tests/storage/test_cache_valkey.py index fe7c6f0da..52ba187ab 100644 --- a/tests/storage/test_cache_valkey.py +++ b/tests/storage/test_cache_valkey.py @@ -6,6 +6,7 @@ import os import sys +import pytest sys.path.insert(1, os.path.join(sys.path[0], "../..")) @@ -45,6 +46,22 @@ def test_valkey_cache(): assert stats.get("remote_cache_hits", 0) >= stats["blobs_read"], stats assert stats.get("cache_misses", 0) == 0, stats +def test_invalid_config(): + from opteryx.managers.cache import ValkeyCache + + with pytest.raises(Exception): + ValkeyCache(server="") + + v = ValkeyCache(server=None) + assert v._consecutive_failures == 10 + +def test_skip_on_error(): + from opteryx.managers.cache import ValkeyCache + cache = ValkeyCache() + cache.set(b"key", b"value") + assert cache.get(b"key") == b"value" + cache._consecutive_failures = 10 + assert cache.get(b"key") is None if __name__ == "__main__": # pragma: no cover from tests.tools import run_tests From 436ee2258135e14498ab1d3097d6a769fd71b551 Mon Sep 17 00:00:00 2001 From: XB500 Date: Mon, 23 Dec 2024 12:27:19 +0000 Subject: [PATCH 2/2] Opteryx Version 0.19.0-alpha.912 --- opteryx/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opteryx/__version__.py b/opteryx/__version__.py index a17ea2163..fed3ecee2 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -1,4 +1,4 @@ -__build__ = 910 +__build__ = 912 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.