From 744d160e968f2c8662c9137f844547e7fbf37055 Mon Sep 17 00:00:00 2001
From: joocer <justin.joyce@joocer.com>
Date: Mon, 23 Dec 2024 12:26:55 +0000
Subject: [PATCH 1/2] HOUSEKEEPING

---
 opteryx/config.py                             |  3 -
 opteryx/connectors/aws_s3_connector.py        | 10 ---
 opteryx/connectors/base/base_connector.py     |  2 +-
 opteryx/connectors/capabilities/cacheable.py  |  8 +-
 opteryx/cursor.py                             | 24 +++---
 opteryx/managers/cache/memcached.py           |  2 +-
 opteryx/managers/cache/redis.py               |  6 +-
 opteryx/managers/cache/valkey.py              |  6 +-
 opteryx/managers/catalog/tarchia_provider.py  |  2 +-
 opteryx/models/logical_column.py              | 13 ----
 opteryx/operators/base_plan_node.py           |  7 --
 .../planner/cost_based_optimizer/__init__.py  |  2 +-
 .../strategies/constant_folding.py            |  2 +-
 .../logical_planner/logical_planner.py        | 32 +-------
 opteryx/planner/physical_planner.py           | 10 ---
 opteryx/planner/views/__init__.py             |  2 -
 opteryx/shared/__init__.py                    |  9 +--
 opteryx/shared/rolling_log.py                 | 73 ------------------
 opteryx/utils/__init__.py                     | 16 +---
 opteryx/utils/arrow.py                        |  2 +-
 .../test_sql_fuzzer_single_table_select.py    |  5 +-
 tests/misc/test_connection.py                 |  1 +
 tests/misc/test_cursor.py                     |  9 +++
 .../test_optimizations_invoked.py             |  4 +-
 .../test_shapes_and_errors_battery.py         |  3 +
 tests/sql_battery/tests/system.run_tests      |  1 +
 tests/sql_battery/tests/v2_planner.run_tests  | 77 ++++++++++++++++++-
 tests/storage/test_cache_memcached.py         | 10 +++
 tests/storage/test_cache_redis.py             | 18 +++++
 tests/storage/test_cache_valkey.py            | 17 ++++
 30 files changed, 175 insertions(+), 201 deletions(-)
 delete mode 100644 opteryx/shared/rolling_log.py

diff --git a/opteryx/config.py b/opteryx/config.py
index aeb1a7878..c2ad0d5bf 100644
--- a/opteryx/config.py
+++ b/opteryx/config.py
@@ -183,9 +183,6 @@ def get(key: str, default: Optional[typing.Any] = None) -> Optional[typing.Any]:
 ENABLE_RESOURCE_LOGGING: bool = bool(get("ENABLE_RESOURCE_LOGGING", False))
 # size of morsels to push between steps
 MORSEL_SIZE: int = int(get("MORSEL_SIZE", 64 * 1024 * 1024))
-# query log
-QUERY_LOG_LOCATION:str = get("QUERY_LOG_LOCATION", False)
-QUERY_LOG_SIZE:int = int(get("QUERY_LOG_SIZE", 100))
 # not GA
 PROFILE_LOCATION:str = get("PROFILE_LOCATION")
 # fmt:on
diff --git a/opteryx/connectors/aws_s3_connector.py b/opteryx/connectors/aws_s3_connector.py
index c185c534d..e8405dfc3 100644
--- a/opteryx/connectors/aws_s3_connector.py
+++ b/opteryx/connectors/aws_s3_connector.py
@@ -68,10 +68,6 @@ def __init__(self, credentials=None, **kwargs):
         self.minio = Minio(end_point, access_key, secret_key, secure=secure)
         self.dataset = self.dataset.replace(".", OS_SEP)
 
-        # we're going to cache the first blob as the schema and dataset reader
-        # sometimes both start here
-        self.cached_first_blob = None
-
     @single_item_cache
     def get_list_of_blob_names(self, *, prefix: str) -> List[str]:
         bucket, object_path, _, _ = paths.get_parts(prefix)
@@ -94,12 +90,6 @@ def read_dataset(
             prefix=self.dataset,
         )
 
-        # Check if the first blob was cached earlier
-        if self.cached_first_blob is not None:
-            yield self.cached_first_blob  # Use cached blob
-            blob_names = blob_names[1:]  # Skip first blob
-        self.cached_first_blob = None
-
         for blob_name in blob_names:
             try:
                 decoder = get_decoder(blob_name)
diff --git a/opteryx/connectors/base/base_connector.py b/opteryx/connectors/base/base_connector.py
index 851505abc..af3e31751 100644
--- a/opteryx/connectors/base/base_connector.py
+++ b/opteryx/connectors/base/base_connector.py
@@ -148,7 +148,7 @@ def __next__(self) -> pyarrow.Table:  # pragma: no cover
         """
         raise NotImplementedError("Subclasses must implement __next__ method.")
 
-    def close(self) -> None:
+    def close(self) -> None:  # pragma: no cover
         """
         Close the reader and release any resources.
         """
diff --git a/opteryx/connectors/capabilities/cacheable.py b/opteryx/connectors/capabilities/cacheable.py
index 30e170232..9535d64ec 100644
--- a/opteryx/connectors/capabilities/cacheable.py
+++ b/opteryx/connectors/capabilities/cacheable.py
@@ -88,7 +88,7 @@ async def wrapper(blob_name: str, statistics, pool: MemoryPool, **kwargs):
                 remote_cache.touch(key)  # help the remote cache track LRU
                 statistics.bufferpool_hits += 1
                 read_buffer_ref = await pool.commit(payload)  # type: ignore
-                while read_buffer_ref is None:
+                while read_buffer_ref is None:  # pragma: no cover
                     await asyncio.sleep(0.1)
                     statistics.stalls_writing_to_read_buffer += 1
                     read_buffer_ref = await pool.commit(payload)  # type: ignore
@@ -103,7 +103,7 @@ async def wrapper(blob_name: str, statistics, pool: MemoryPool, **kwargs):
                 statistics.remote_cache_hits += 1
                 system_statistics.remote_cache_reads += 1
                 read_buffer_ref = await pool.commit(payload)  # type: ignore
-                while read_buffer_ref is None:
+                while read_buffer_ref is None:  # pragma: no cover
                     await asyncio.sleep(0.1)
                     statistics.stalls_writing_to_read_buffer += 1
                     read_buffer_ref = await pool.commit(payload)  # type: ignore
@@ -119,7 +119,7 @@ async def wrapper(blob_name: str, statistics, pool: MemoryPool, **kwargs):
                 statistics.cache_misses += 1
                 system_statistics.origin_reads += 1
                 return read_buffer_ref
-            except Exception as e:
+            except Exception as e:  # pragma: no cover
                 print(f"Error in {func.__name__}: {e}")
                 raise  # Optionally re-raise the error after logging it
 
@@ -136,7 +136,7 @@ async def wrapper(blob_name: str, statistics, pool: MemoryPool, **kwargs):
             ):
                 # if we didn't get it from the buffer pool (origin or remote cache) we add it
                 evicted = buffer_pool.set(key, payload)
-                if evicted:
+                if evicted:  # pragma: no cover
                     # if we're evicting items we just put in the cache, stop
                     if evicted in my_keys:
                         evictions_remaining = 0
diff --git a/opteryx/cursor.py b/opteryx/cursor.py
index 188e0ab49..1216015dc 100644
--- a/opteryx/cursor.py
+++ b/opteryx/cursor.py
@@ -40,17 +40,9 @@
 from opteryx.exceptions import SqlError
 from opteryx.exceptions import UnsupportedSyntaxError
 from opteryx.models import QueryStatistics
-from opteryx.shared.rolling_log import RollingLog
 from opteryx.utils import sql
 
 PROFILE_LOCATION = config.PROFILE_LOCATION
-QUERY_LOG_LOCATION = config.QUERY_LOG_LOCATION
-QUERY_LOG_SIZE = config.QUERY_LOG_SIZE
-
-
-ROLLING_LOG = None
-if QUERY_LOG_LOCATION:
-    ROLLING_LOG = RollingLog(QUERY_LOG_LOCATION, max_entries=QUERY_LOG_SIZE)
 
 
 class CursorState(Enum):
@@ -191,9 +183,6 @@ def _inner_execute(
         except RuntimeError as err:  # pragma: no cover
             raise SqlError(f"Error Executing SQL Statement ({err})") from err
 
-        if ROLLING_LOG:
-            ROLLING_LOG.append(operation)
-
         results = execute(plan, statistics=self._statistics)
         start = time.time_ns()
 
@@ -337,6 +326,19 @@ def execute_to_arrow(
         results = self._execute_statements(operation, params, visibility_filters)
         if results is not None:
             result_data, self._result_type = results
+
+            if self._result_type == ResultType.NON_TABULAR:
+                import orso
+
+                meta_dataframe = orso.DataFrame(
+                    rows=[(result_data.record_count,)],  # type: ignore
+                    schema=RelationSchema(
+                        name="table",
+                        columns=[FlatColumn(name="rows_affected", type=OrsoTypes.INTEGER)],
+                    ),
+                )  # type: ignore
+                return meta_dataframe.arrow()
+
             if limit is not None:
                 result_data = utils.arrow.limit_records(result_data, limit)  # type: ignore
         if isinstance(result_data, pyarrow.Table):
diff --git a/opteryx/managers/cache/memcached.py b/opteryx/managers/cache/memcached.py
index 01d4db1e9..e5761678b 100644
--- a/opteryx/managers/cache/memcached.py
+++ b/opteryx/managers/cache/memcached.py
@@ -49,7 +49,7 @@ def _memcached_server(**kwargs):
 
     try:
         from pymemcache.client import base
-    except ImportError as err:
+    except ImportError as err:  # pragma: no cover
         raise MissingDependencyError(err.name) from err
 
     try:
diff --git a/opteryx/managers/cache/redis.py b/opteryx/managers/cache/redis.py
index b1eeec358..e80b707ea 100644
--- a/opteryx/managers/cache/redis.py
+++ b/opteryx/managers/cache/redis.py
@@ -38,7 +38,7 @@ def _redis_server(**kwargs):
 
     try:
         import redis
-    except ImportError as err:
+    except ImportError as err:  # pragma: no cover
         raise MissingDependencyError(err.name) from err
 
     return redis.from_url(redis_config)
@@ -80,7 +80,7 @@ def get(self, key: bytes) -> Union[bytes, None]:
             if response:
                 self.hits += 1
                 return bytes(response)
-        except Exception as err:
+        except Exception as err:  # pragma: no cover
             self._consecutive_failures += 1
             if self._consecutive_failures >= MAXIMUM_CONSECUTIVE_FAILURES:
                 import datetime
@@ -99,7 +99,7 @@ def set(self, key: bytes, value: bytes) -> None:
             try:
                 self._server.set(key, value)
                 self.sets += 1
-            except Exception as err:
+            except Exception as err:  # pragma: no cover
                 # if we fail to set, stop trying
                 self._consecutive_failures = MAXIMUM_CONSECUTIVE_FAILURES
                 self.errors += 1
diff --git a/opteryx/managers/cache/valkey.py b/opteryx/managers/cache/valkey.py
index b82e6c4b2..36f1330fc 100644
--- a/opteryx/managers/cache/valkey.py
+++ b/opteryx/managers/cache/valkey.py
@@ -28,7 +28,7 @@ def _valkey_server(**kwargs):
 
     try:
         import valkey  # Assuming `valkey` is the client library's name
-    except ImportError as err:
+    except ImportError as err:  # pragma: no cover
         raise MissingDependencyError(err.name) from err
 
     return valkey.from_url(valkey_config)  # Example instantiation of the client
@@ -70,7 +70,7 @@ def get(self, key: bytes) -> Union[bytes, None]:
             if response:
                 self.hits += 1
                 return bytes(response)
-        except Exception as err:
+        except Exception as err:  # pragma: no cover
             self._consecutive_failures += 1
             if self._consecutive_failures >= MAXIMUM_CONSECUTIVE_FAILURES:
                 import datetime
@@ -89,7 +89,7 @@ def set(self, key: bytes, value: bytes) -> None:
             try:
                 self._server.set(key, value)  # Adjust based on Valkey's API
                 self.sets += 1
-            except Exception as err:
+            except Exception as err:  # pragma: no cover
                 # if we fail to set, stop trying
                 self._consecutive_failures = MAXIMUM_CONSECUTIVE_FAILURES
                 self.errors += 1
diff --git a/opteryx/managers/catalog/tarchia_provider.py b/opteryx/managers/catalog/tarchia_provider.py
index 03f6e05f8..f2e25ff2e 100644
--- a/opteryx/managers/catalog/tarchia_provider.py
+++ b/opteryx/managers/catalog/tarchia_provider.py
@@ -44,7 +44,7 @@ def is_valid_url(url: str) -> bool:
     try:
         result = urlparse(url)
         return all([result.scheme, result.netloc])
-    except ValueError:
+    except ValueError:  # pragma: no cover
         return False
 
 
diff --git a/opteryx/models/logical_column.py b/opteryx/models/logical_column.py
index fd2ba1aa4..36fde92d7 100644
--- a/opteryx/models/logical_column.py
+++ b/opteryx/models/logical_column.py
@@ -85,16 +85,3 @@ def copy(self):
 
     def __repr__(self) -> str:
         return f"<LogicalColumn name: '{self.current_name}' fullname: '{self.qualified_name}'>"
-
-    def to_dict(self) -> dict:
-        from opteryx.utils import dataclass_to_dict
-
-        return {
-            "class": "LogicalColumn",
-            "node_type": self.node_type.name,
-            "source_column": self.source_column,
-            "source_connector": self.source_connector,
-            "source": self.source,
-            "alias": self.alias,
-            "schema_column": dataclass_to_dict(self.schema_column),
-        }
diff --git a/opteryx/operators/base_plan_node.py b/opteryx/operators/base_plan_node.py
index 4a69aea17..ea786bfd4 100644
--- a/opteryx/operators/base_plan_node.py
+++ b/opteryx/operators/base_plan_node.py
@@ -61,13 +61,6 @@ def __init__(self, *, properties, **parameters):
         self.records_out = 0
         self.bytes_out = 0
 
-    def to_json(self) -> bytes:  # pragma: no cover
-        import orjson
-
-        from opteryx.utils import dataclass_to_dict
-
-        return orjson.dumps(dataclass_to_dict(self.do))
-
     @classmethod
     def from_json(cls, json_obj: str) -> "BasePlanNode":  # pragma: no cover
         raise NotImplementedError()
diff --git a/opteryx/planner/cost_based_optimizer/__init__.py b/opteryx/planner/cost_based_optimizer/__init__.py
index 2663c1c7d..35f0980b5 100644
--- a/opteryx/planner/cost_based_optimizer/__init__.py
+++ b/opteryx/planner/cost_based_optimizer/__init__.py
@@ -152,7 +152,7 @@ def do_cost_based_optimizer(plan: LogicalPlan, statistics: QueryStatistics) -> L
     Returns:
         LogicalPlan: The optimized logical plan.
     """
-    if DISABLE_OPTIMIZER:
+    if DISABLE_OPTIMIZER:  # pragma: no cover
         message = "[OPTERYX] The optimizer has been disabled, 'DISABLE_OPTIMIZER' variable is TRUE."
         print(message)
         statistics.add_message(message)
diff --git a/opteryx/planner/cost_based_optimizer/strategies/constant_folding.py b/opteryx/planner/cost_based_optimizer/strategies/constant_folding.py
index 242048d44..7ce5e04a9 100644
--- a/opteryx/planner/cost_based_optimizer/strategies/constant_folding.py
+++ b/opteryx/planner/cost_based_optimizer/strategies/constant_folding.py
@@ -128,7 +128,7 @@ def fold_constants(root: Node, statistics: QueryStatistics) -> Node:
 
         if root.node_type == NodeType.COMPARISON_OPERATOR:
             if (
-                root.value in ("Like", "Ilike")
+                root.value in ("Like", "ILike")
                 and root.left.node_type == NodeType.IDENTIFIER
                 and root.right.node_type == NodeType.LITERAL
                 and root.right.value == "%"
diff --git a/opteryx/planner/logical_planner/logical_planner.py b/opteryx/planner/logical_planner/logical_planner.py
index a379f4cca..507c45615 100644
--- a/opteryx/planner/logical_planner/logical_planner.py
+++ b/opteryx/planner/logical_planner/logical_planner.py
@@ -73,7 +73,7 @@ class LogicalPlanNode(Node):
     def copy(self) -> "Node":
         return LogicalPlanNode(**super().copy().properties)
 
-    def __str__(self):
+    def __str__(self):  # pragma: no cover
         try:
             # fmt:off
             node_type = self.node_type
@@ -502,12 +502,6 @@ def inner_query_planner(ast_branch):
         if previous_step_id is not None:
             inner_plan.add_edge(previous_step_id, step_id)
 
-        if distinct_step.on and _projection[0].source_column == "FROM":
-            cols = ", ".join([format_expression(c) for c in distinct_step.on])
-            raise UnsupportedSyntaxError(
-                f"Did you mean 'SELECT DISTINCT ON ({cols}) {cols} FROM {_projection[0].alias};'?"
-            )
-
     # order
     if _order_by:
         order_step = LogicalPlanNode(node_type=LogicalPlanStepType.Order)
@@ -794,28 +788,6 @@ def create_node_relation(relation):
     return root_node, sub_plan
 
 
-def analyze_query(statement) -> LogicalPlan:
-    root_node = "Analyze"
-    plan = LogicalPlan()
-
-    from_step = LogicalPlanNode(node_type=LogicalPlanStepType.Scan)
-    table = statement[root_node]["table_name"]
-    from_step.relation = ".".join(part["value"] for part in table)
-    from_step.alias = from_step.relation
-    from_step.start_date = table[0].get("start_date")
-    from_step.end_date = table[0].get("end_date")
-    step_id = random_string()
-    plan.add_node(step_id, from_step)
-
-    metadata_step = LogicalPlanNode(node_type=LogicalPlanStepType.MetadataWriter)
-    previous_step_id, step_id = step_id, random_string()
-    plan.add_node(step_id, metadata_step)
-    plan.add_edge(previous_step_id, step_id)
-
-    return plan
-    # write manifest
-
-
 def plan_execute_query(statement) -> LogicalPlan:
     import orjson
 
@@ -1112,7 +1084,7 @@ def plan_show_variables(statement):
 
 
 QUERY_BUILDERS = {
-    "Analyze": analyze_query,
+    #    "Analyze": analyze_query,
     "Execute": plan_execute_query,
     "Explain": plan_explain,
     "Query": plan_query,
diff --git a/opteryx/planner/physical_planner.py b/opteryx/planner/physical_planner.py
index 3233a6e7c..414cced93 100644
--- a/opteryx/planner/physical_planner.py
+++ b/opteryx/planner/physical_planner.py
@@ -95,16 +95,6 @@ def create_physical_plan(logical_plan, query_properties) -> PhysicalPlan:
             raise Exception(f"something unexpected happed - {node_type.name}")
         # fmt: on
 
-        # DEBUG: from opteryx.exceptions import InvalidInternalStateError
-        # DEBUG:
-        # DEBUG: try:
-        # DEBUG:    config = node.to_json()
-        ## DEBUG:    print(config)
-        # DEBUG: except Exception as err:
-        # DEBUG:    message = f"Internal Error - node '{node}' unable to be serialized"
-        # DEBUG:    print(message)
-        ## DEBUG:    raise InvalidInternalStateError(message)
-
         plan.add_node(nid, node)
 
     for source, destination, relation in logical_plan.edges():
diff --git a/opteryx/planner/views/__init__.py b/opteryx/planner/views/__init__.py
index ffe66abab..f500a70a4 100644
--- a/opteryx/planner/views/__init__.py
+++ b/opteryx/planner/views/__init__.py
@@ -20,8 +20,6 @@ def _load_views():
         with open("views.json", "rb") as defs:
             return orjson.loads(defs.read())
     except Exception as err:  # nosec
-        if not err:
-            pass
         # DEBUG:: log (f"[OPTERYX] Unable to open views definition file. {err}")
         return {}
 
diff --git a/opteryx/shared/__init__.py b/opteryx/shared/__init__.py
index 14e0ba35b..a5877f3ce 100644
--- a/opteryx/shared/__init__.py
+++ b/opteryx/shared/__init__.py
@@ -14,12 +14,5 @@
 from opteryx.shared.async_memory_pool import AsyncMemoryPool
 from opteryx.shared.buffer_pool import BufferPool
 from opteryx.shared.materialized_datasets import MaterializedDatasets
-from opteryx.shared.rolling_log import RollingLog
 
-__all__ = (
-    "AsyncMemoryPool",
-    "BufferPool",
-    "MaterializedDatasets",
-    "MemoryPool",
-    "RollingLog",
-)
+__all__ = ("AsyncMemoryPool", "BufferPool", "MaterializedDatasets", "MemoryPool")
diff --git a/opteryx/shared/rolling_log.py b/opteryx/shared/rolling_log.py
deleted file mode 100644
index 5230c3d46..000000000
--- a/opteryx/shared/rolling_log.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Write to a log with a fixed number of entries, as the log fills the older entries are removed
-from the log file.
-"""
-
-import os
-
-EIGHT_MEGABYTES: int = 8 * 1024 * 1024
-
-
-class RollingLog:
-    _instance = None
-    log_file: str = None
-    max_entries: int = 100
-    block_size: int = EIGHT_MEGABYTES
-
-    def __new__(cls, log_file: str, max_entries: int = 100, block_size: int = EIGHT_MEGABYTES):
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-            cls._instance.log_file = log_file
-            cls._instance.max_entries = max_entries
-            cls._instance.block_size = block_size
-            if not os.path.exists(log_file):
-                with open(log_file, "wb"):
-                    pass
-        return cls._instance
-
-    def append(self, entry):
-        # Append the new entry
-        with open(self.log_file, "a", encoding="UTF8") as log_file:
-            log_file.write(entry + "\n")
-
-        # Check if max entries exceeded and remove the first entry if needed
-        lines = None
-        with open(self.log_file, "r", encoding="UTF8") as f:
-            lines = f.readlines()
-
-        if len(lines) > self.max_entries:
-            with open(self.log_file, "w", encoding="UTF8") as f:
-                f.writelines(lines[1:])  # Write all lines except the first
-
-    def scan(self):  # pragma: no cover
-        # open the log file in binary mode
-        with open(self.log_file, "r", encoding="UTF8") as log_file:
-            # read the current position in the circular buffer
-            while True:
-                chunk = log_file.read(self.block_size)
-                if not chunk:
-                    break
-                lines = chunk.split("\n")
-                for line in lines:
-                    if line:
-                        yield line
-
-    def tail(self, count: int = 5):
-        """return the last 'count' records"""
-        return list(self.scan())[-count:]
-
-    def head(self, count: int = 5):
-        """return the first 'count' records"""
-        return list(self.scan())[:count]
diff --git a/opteryx/utils/__init__.py b/opteryx/utils/__init__.py
index 21473f4e5..17904d0f8 100644
--- a/opteryx/utils/__init__.py
+++ b/opteryx/utils/__init__.py
@@ -11,7 +11,6 @@
 # limitations under the License.
 
 
-from enum import Enum
 from itertools import permutations
 from typing import Iterable
 from typing import Optional
@@ -47,7 +46,7 @@ def suggest_alternative(value: str, candidates: Iterable[str]) -> Optional[str]:
     best_match_column = None
     best_match_score = 100  # Large number indicates no match found yet.
 
-    # Function to find the best match
+    # Function to find the best match based on levenstein distance
     def find_best_match(name: str):
         nonlocal best_match_column, best_match_score
         for raw, candidate in ((ca, "".join(ch for ch in ca if ch.isalnum())) for ca in candidates):
@@ -74,16 +73,3 @@ def find_best_match(name: str):
                 return result
 
     return best_match_column  # Return the best match found, or None if no suitable match is found.
-
-
-def dataclass_to_dict(instance):
-    if isinstance(instance, Enum):
-        return instance.name
-    elif hasattr(instance, "to_dict"):
-        return instance.to_dict()
-    elif hasattr(instance, "__dataclass_fields__"):
-        return {k: dataclass_to_dict(getattr(instance, k)) for k in instance.__dataclass_fields__}
-    elif isinstance(instance, (list, tuple)):
-        return [dataclass_to_dict(k) for k in instance]
-    else:
-        return instance
diff --git a/opteryx/utils/arrow.py b/opteryx/utils/arrow.py
index f8765e387..8effb11ba 100644
--- a/opteryx/utils/arrow.py
+++ b/opteryx/utils/arrow.py
@@ -26,7 +26,7 @@
 
 def limit_records(
     morsels: Iterator[pyarrow.Table], limit: Optional[int] = None, offset: int = 0
-) -> Optional[Iterator[pyarrow.Table]]:
+) -> Optional[Iterator[pyarrow.Table]]:  # pragma: no cover
     """
     Cycle over an iterable of morsels, limiting the response to a given
     number of records with an optional offset.
diff --git a/tests/fuzzing/test_sql_fuzzer_single_table_select.py b/tests/fuzzing/test_sql_fuzzer_single_table_select.py
index ec586a0b3..e0603d7a0 100644
--- a/tests/fuzzing/test_sql_fuzzer_single_table_select.py
+++ b/tests/fuzzing/test_sql_fuzzer_single_table_select.py
@@ -85,7 +85,10 @@ def generate_random_sql_select(columns, table):
     else:
         select_clause = "SELECT *"
     # Add table name
-    select_clause = select_clause + " FROM " + table
+    if random.random() < 0.1:
+        return f"SELECT * FROM ({generate_random_sql_select(columns, table)}) as table_{random_string(4)}"
+    else:
+        select_clause = select_clause + " FROM " + table
     # Generate a WHERE clause with 70% chance
     if random.random() < 0.7:
         where_clause = generate_condition(columns)
diff --git a/tests/misc/test_connection.py b/tests/misc/test_connection.py
index 296f2341f..69422d23d 100644
--- a/tests/misc/test_connection.py
+++ b/tests/misc/test_connection.py
@@ -31,6 +31,7 @@ def test_connection():
     cur.close()
 
 
+
 def test_execute():
     import pandas
 
diff --git a/tests/misc/test_cursor.py b/tests/misc/test_cursor.py
index ddb86db5c..2ac6ff7a3 100644
--- a/tests/misc/test_cursor.py
+++ b/tests/misc/test_cursor.py
@@ -185,6 +185,15 @@ def test_execute_unsupported_syntax_error():
     with pytest.raises(UnsupportedSyntaxError):
         cursor.execute("SELECT * FROM table; SELECT * FROM table2", params=[1])
 
+def test_non_tabular_result():
+    cursor = setup_function()
+    cursor.execute("SET @name = 'tim'")
+    cursor.fetchall()
+
+def test_limit():
+    cursor = setup_function()
+    dataset = cursor.execute_to_arrow("SELECT * FROM $planets", limit=3)
+    assert dataset.num_rows == 3
 
 if __name__ == "__main__":  # pragma: no cover
     from tests.tools import run_tests
diff --git a/tests/plan_optimization/test_optimizations_invoked.py b/tests/plan_optimization/test_optimizations_invoked.py
index cda3d4c8d..a8e686582 100644
--- a/tests/plan_optimization/test_optimizations_invoked.py
+++ b/tests/plan_optimization/test_optimizations_invoked.py
@@ -19,7 +19,9 @@
         ("SELECT * FROM $planets WHERE id = 4 + 4", "optimization_constant_fold_expression"),
         ("SELECT * FROM $planets WHERE id * 0 = 1", "optimization_constant_fold_reduce"),
         ("SELECT id ^ 1 = 1 FROM $planets LIMIT 10", "optimization_limit_pushdown"),
-        ("SELECT name FROM $astronauts WHERE name = 'Neil A. Armstrong'", "optimization_predicate_pushdown")
+        ("SELECT name FROM $astronauts WHERE name = 'Neil A. Armstrong'", "optimization_predicate_pushdown"),
+        ("SELECT name FROM $planets WHERE name LIKE '%'", "optimization_constant_fold_reduce"), # rewritten to `name is not null`
+        ("SELECT name FROM $planets WHERE name ILIKE '%'", "optimization_constant_fold_reduce"), # rewritten to `name is not null`
     ]
 # fmt:on
 
diff --git a/tests/sql_battery/test_shapes_and_errors_battery.py b/tests/sql_battery/test_shapes_and_errors_battery.py
index d7c04d3ca..08f6ed5b9 100644
--- a/tests/sql_battery/test_shapes_and_errors_battery.py
+++ b/tests/sql_battery/test_shapes_and_errors_battery.py
@@ -257,6 +257,7 @@
         ("SELECT * FROM $satellites WHERE name != 'Calypso'", 176, 8, None),
         ("SELECT * FROM $satellites WHERE name = '********'", 0, 8, None),
         ("SELECT * FROM $satellites WHERE name LIKE '_a_y_s_'", 1, 8, None),
+        ("SELECT * FROM $satellites WHERE name LIKE 'Cal%%'", 4, 8, None),
         ("SELECT * FROM $satellites WHERE name LIKE 'Cal%'", 4, 8, None),
         ("SELECT * FROM $satellites WHERE name like 'Cal%'", 4, 8, None),
         ("SELECT * FROM $satellites WHERE name ILIKE '_a_y_s_'", 1, 8, None),
@@ -1125,6 +1126,7 @@
         ("SELECT name, SEARCH(birth_place, 'Italy') FROM $astronauts", 357, 2, None),
         ("SELECT name, birth_place FROM $astronauts WHERE SEARCH(birth_place, 'Italy')", 1, 2, None),
         ("SELECT name, birth_place FROM $astronauts WHERE SEARCH(birth_place, 'Rome')", 1, 2, None),
+        ("SELECT SEARCH($satellites.name, 'a') FROM $planets LEFT JOIN $satellites ON $planets.id = $satellites.planetId", 179, 1, None),
 
         ("SELECT birth_date FROM $astronauts WHERE EXTRACT(year FROM birth_date) < 1930;", 14, 1, None),
         ("SELECT EXTRACT(month FROM birth_date) FROM $astronauts", 357, 1, None),
@@ -1474,6 +1476,7 @@
         ("SELECT p1.name AS planet1_name, p2.name AS planet2_name, p3.name AS planet3_name, p4.name AS planet4_name, p5.name AS planet5_name, p6.name AS planet6_name, p7.name AS planet7_name, p8.name AS planet8_name, p9.name AS planet9_name, p10.name AS planet10_name, p1.diameter AS planet1_diameter, p2.gravity AS planet2_gravity, p3.orbitalPeriod AS planet3_orbitalPeriod, p4.numberOfMoons AS planet4_numberOfMoons, p5.meanTemperature AS planet5_meanTemperature FROM $planets p1 JOIN $planets p2 ON p1.id = p2.id JOIN $planets p3 ON p1.id = p3.id JOIN $planets p4 ON p1.id = p4.id JOIN $planets p5 ON p1.id = p5.id JOIN $planets p6 ON p1.id = p6.id JOIN $planets p7 ON p1.id = p7.id JOIN $planets p8 ON p1.id = p8.id JOIN $planets p9 ON p1.id = p9.id JOIN $planets p10 ON p1.id = p10.id WHERE p1.diameter > 10000 ORDER BY p1.name, p2.name, p3.name, p4.name, p5.name;", 6, 15, None),
 
         ("SELECT mission, LIST(name) FROM $missions INNER JOIN (SELECT * FROM $astronauts CROSS JOIN UNNEST(missions) AS mission) AS astronauts ON Mission = mission GROUP BY mission", 16, 2, None),
+        ("SELECT alma_matered FROM (SELECT alma_mater FROM $astronauts CROSS JOIN $satellites) AS bulked CROSS JOIN UNNEST(alma_mater) AS alma_matered", 120537, 1, None),
 
         # virtual dataset doesn't exist
         ("SELECT * FROM $RomanGods", None, None, DatasetNotFoundError),
diff --git a/tests/sql_battery/tests/system.run_tests b/tests/sql_battery/tests/system.run_tests
index d255bf720..19c4c6f9b 100644
--- a/tests/sql_battery/tests/system.run_tests
+++ b/tests/sql_battery/tests/system.run_tests
@@ -1,6 +1,7 @@
 SELECT version();
 SELECT connection_id() AS pid;
 SHOW VARIABLES;
+SET @name = 'bob';
 
 # SELECT * FROM information_schema.tables;
 # SELECT * FROM information_schema.views;
\ No newline at end of file
diff --git a/tests/sql_battery/tests/v2_planner.run_tests b/tests/sql_battery/tests/v2_planner.run_tests
index f28658566..1faec341e 100644
--- a/tests/sql_battery/tests/v2_planner.run_tests
+++ b/tests/sql_battery/tests/v2_planner.run_tests
@@ -30,4 +30,79 @@ EXPLAIN ANALYZE FORMAT JSON SELECT * FROM $planets AS a INNER JOIN (SELECT id FR
 SELECT DISTINCT ON (planetId) planetId, name FROM $satellites;
 
 # CONDITIONS IN AGGREGATES
-SELECT SUM(DISTINCT id ORDER BY id) FROM $planets
\ No newline at end of file
+SELECT SUM(DISTINCT id ORDER BY id) FROM $planets
+
+# INNER JOIN with FULL OUTER JOIN
+SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id FULL OUTER JOIN $planets AS p3 ON p1.id = p3.id;
+
+# INNER JOIN with LEFT OUTER JOIN
+SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id LEFT OUTER JOIN $planets AS p3 ON p1.id = p3.id;
+
+# INNER JOIN with RIGHT OUTER JOIN
+SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id RIGHT OUTER JOIN $planets AS p3 ON p1.id = p3.id;
+
+# INNER JOIN with NATURAL JOIN
+SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id NATURAL JOIN $planets AS p3;
+
+# INNER JOIN with LEFT ANTI JOIN
+SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id LEFT ANTI JOIN $planets AS p3 ON p1.id = p3.id;
+
+# INNER JOIN with LEFT SEMI JOIN
+SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id LEFT SEMI JOIN $planets AS p3 ON p1.id = p3.id;
+
+# FULL OUTER JOIN with LEFT OUTER JOIN
+SELECT * FROM $planets AS p1 FULL OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT OUTER JOIN $planets AS p3 ON p1.id = p3.id;
+
+# FULL OUTER JOIN with RIGHT OUTER JOIN
+SELECT * FROM $planets AS p1 FULL OUTER JOIN $planets AS p2 ON p1.id = p2.id RIGHT OUTER JOIN $planets AS p3 ON p1.id = p3.id;
+
+# FULL OUTER JOIN with NATURAL JOIN
+SELECT * FROM $planets AS p1 FULL OUTER JOIN $planets AS p2 ON p1.id = p2.id NATURAL JOIN $planets AS p3;
+
+# FULL OUTER JOIN with LEFT ANTI JOIN
+SELECT * FROM $planets AS p1 FULL OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT ANTI JOIN $planets AS p3 ON p1.id = p3.id;
+
+# FULL OUTER JOIN with LEFT SEMI JOIN
+SELECT * FROM $planets AS p1 FULL OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT SEMI JOIN $planets AS p3 ON p1.id = p3.id;
+
+# LEFT OUTER JOIN with RIGHT OUTER JOIN
+SELECT * FROM $planets AS p1 LEFT OUTER JOIN $planets AS p2 ON p1.id = p2.id RIGHT OUTER JOIN $planets AS p3 ON p1.id = p3.id;
+
+# LEFT OUTER JOIN with NATURAL JOIN
+SELECT * FROM $planets AS p1 LEFT OUTER JOIN $planets AS p2 ON p1.id = p2.id NATURAL JOIN $planets AS p3;
+
+# LEFT OUTER JOIN with LEFT ANTI JOIN
+SELECT * FROM $planets AS p1 LEFT OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT ANTI JOIN $planets AS p3 ON p1.id = p3.id;
+
+# LEFT OUTER JOIN with LEFT SEMI JOIN
+SELECT * FROM $planets AS p1 LEFT OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT SEMI JOIN $planets AS p3 ON p1.id = p3.id;
+
+# RIGHT OUTER JOIN with NATURAL JOIN
+SELECT * FROM $planets AS p1 RIGHT OUTER JOIN $planets AS p2 ON p1.id = p2.id NATURAL JOIN $planets AS p3;
+
+# RIGHT OUTER JOIN with LEFT ANTI JOIN
+SELECT * FROM $planets AS p1 RIGHT OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT ANTI JOIN $planets AS p3 ON p1.id = p3.id;
+
+# RIGHT OUTER JOIN with LEFT SEMI JOIN
+SELECT * FROM $planets AS p1 RIGHT OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT SEMI JOIN $planets AS p3 ON p1.id = p3.id;
+
+# LEFT ANTI JOIN with LEFT SEMI JOIN
+SELECT * FROM $planets AS p1 LEFT ANTI JOIN $planets AS p2 ON p1.id = p2.id LEFT SEMI JOIN $planets AS p3 ON p1.id = p3.id;
+
+# INNER JOIN with INNER JOIN
+SELECT * FROM $planets AS p1 INNER JOIN $planets AS p2 ON p1.id = p2.id INNER JOIN $planets AS p3 ON p1.id = p3.id;
+
+# FULL OUTER JOIN with FULL OUTER JOIN
+SELECT * FROM $planets AS p1 FULL OUTER JOIN $planets AS p2 ON p1.id = p2.id FULL OUTER JOIN $planets AS p3 ON p1.id = p3.id;
+
+# LEFT OUTER JOIN with LEFT OUTER JOIN
+SELECT * FROM $planets AS p1 LEFT OUTER JOIN $planets AS p2 ON p1.id = p2.id LEFT OUTER JOIN $planets AS p3 ON p1.id = p3.id;
+
+# RIGHT OUTER JOIN with RIGHT OUTER JOIN
+SELECT * FROM $planets AS p1 RIGHT OUTER JOIN $planets AS p2 ON p1.id = p2.id RIGHT OUTER JOIN $planets AS p3 ON p1.id = p3.id;
+
+# LEFT ANTI JOIN with LEFT ANTI JOIN
+SELECT * FROM $planets AS p1 LEFT ANTI JOIN $planets AS p2 ON p1.id = p2.id LEFT ANTI JOIN $planets AS p3 ON p1.id = p3.id;
+
+# LEFT SEMI JOIN with LEFT SEMI JOIN
+SELECT * FROM $planets AS p1 LEFT SEMI JOIN $planets AS p2 ON p1.id = p2.id LEFT SEMI JOIN $planets AS p3 ON p1.id = p3.id;
\ No newline at end of file
diff --git a/tests/storage/test_cache_memcached.py b/tests/storage/test_cache_memcached.py
index f73d6a227..8f0e52404 100644
--- a/tests/storage/test_cache_memcached.py
+++ b/tests/storage/test_cache_memcached.py
@@ -6,6 +6,7 @@
 
 import os
 import sys
+import pytest
 
 os.environ["OPTERYX_DEBUG"] = "1"
 
@@ -128,6 +129,15 @@ def test_memcache_threaded():
             assert result == load, f"Post-thread check failed: {result} != {load}"
 
 
+def test_skip_on_error():
+    from opteryx.managers.cache import MemcachedCache
+    cache = MemcachedCache()
+    cache.set(b"key", b"value")
+    assert cache.get(b"key") == b"value"
+    cache._consecutive_failures = 10
+    assert cache.get(b"key") is None
+
+
 if __name__ == "__main__":  # pragma: no cover
     from tests.tools import run_tests
 
diff --git a/tests/storage/test_cache_redis.py b/tests/storage/test_cache_redis.py
index 5fdd6de2d..dc3f1f1e4 100644
--- a/tests/storage/test_cache_redis.py
+++ b/tests/storage/test_cache_redis.py
@@ -6,6 +6,7 @@
 
 import os
 import sys
+import pytest
 
 sys.path.insert(1, os.path.join(sys.path[0], "../.."))
 
@@ -46,6 +47,23 @@ def test_redis_cache():
     assert stats.get("cache_misses", 0) == 0, stats
 
 
+def test_invalid_config():
+    from opteryx.managers.cache import RedisCache
+
+    with pytest.raises(Exception):
+        RedisCache(server="")
+
+    v = RedisCache(server=None)
+    assert v._consecutive_failures == 10
+
+def test_skip_on_error():
+    from opteryx.managers.cache import RedisCache
+    cache = RedisCache()
+    cache.set(b"key", b"value")
+    assert cache.get(b"key") == b"value"
+    cache._consecutive_failures = 10
+    assert cache.get(b"key") is None
+
 if __name__ == "__main__":  # pragma: no cover
     from tests.tools import run_tests
 
diff --git a/tests/storage/test_cache_valkey.py b/tests/storage/test_cache_valkey.py
index fe7c6f0da..52ba187ab 100644
--- a/tests/storage/test_cache_valkey.py
+++ b/tests/storage/test_cache_valkey.py
@@ -6,6 +6,7 @@
 
 import os
 import sys
+import pytest
 
 sys.path.insert(1, os.path.join(sys.path[0], "../.."))
 
@@ -45,6 +46,22 @@ def test_valkey_cache():
     assert stats.get("remote_cache_hits", 0) >= stats["blobs_read"], stats
     assert stats.get("cache_misses", 0) == 0, stats
 
+def test_invalid_config():
+    from opteryx.managers.cache import ValkeyCache
+
+    with pytest.raises(Exception):
+        ValkeyCache(server="")
+
+    v = ValkeyCache(server=None)
+    assert v._consecutive_failures == 10
+
+def test_skip_on_error():
+    from opteryx.managers.cache import ValkeyCache
+    cache = ValkeyCache()
+    cache.set(b"key", b"value")
+    assert cache.get(b"key") == b"value"
+    cache._consecutive_failures = 10
+    assert cache.get(b"key") is None
 
 if __name__ == "__main__":  # pragma: no cover
     from tests.tools import run_tests

From 436ee2258135e14498ab1d3097d6a769fd71b551 Mon Sep 17 00:00:00 2001
From: XB500 <XB500@users.noreply.github.com>
Date: Mon, 23 Dec 2024 12:27:19 +0000
Subject: [PATCH 2/2] Opteryx Version 0.19.0-alpha.912

---
 opteryx/__version__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opteryx/__version__.py b/opteryx/__version__.py
index a17ea2163..fed3ecee2 100644
--- a/opteryx/__version__.py
+++ b/opteryx/__version__.py
@@ -1,4 +1,4 @@
-__build__ = 910
+__build__ = 912
 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.