Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed Dec 24, 2024
1 parent 08b39af commit 53dd797
Show file tree
Hide file tree
Showing 8 changed files with 14 additions and 8 deletions.
2 changes: 2 additions & 0 deletions opteryx/operators/cross_join_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ class CrossJoinNode(JoinNode):
Implements a SQL CROSS JOIN
"""

join_type = "cross"

def __init__(self, properties: QueryProperties, **parameters):
JoinNode.__init__(self, properties=properties, **parameters)

Expand Down
4 changes: 3 additions & 1 deletion opteryx/operators/filter_join_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@ def execute(self, morsel: pyarrow.Table, join_leg: str) -> pyarrow.Table:
if morsel == EOS:
right_relation = pyarrow.concat_tables(self.right_buffer, promote_options="none")
self.right_buffer.clear()
non_null_right_values = right_relation.select(self.right_columns).drop_null().itercolumns()
non_null_right_values = (
right_relation.select(self.right_columns).drop_null().itercolumns()
)
self.right_hash_set = set(map(hash, zip(*non_null_right_values)))
else:
self.right_buffer.append(morsel)
Expand Down
1 change: 0 additions & 1 deletion opteryx/operators/inner_join_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ def inner_join_with_preprocessed_left_side(left_relation, right_relation, join_c


class InnerJoinNode(JoinNode):

join_type = "inner"

def __init__(self, properties: QueryProperties, **parameters):
Expand Down
1 change: 0 additions & 1 deletion opteryx/operators/inner_join_node_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,6 @@ def inner_join_with_preprocessed_left_side(left_relation, right_relation, join_c


class InnerJoinSingleNode(JoinNode):

join_type = "inner"

def __init__(self, properties: QueryProperties, **parameters):
Expand Down
1 change: 0 additions & 1 deletion opteryx/operators/outer_join_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ def config(self) -> str: # pragma: no cover
return f"{self.join_type.upper()}"

def execute(self, morsel: pyarrow.Table, join_leg: str) -> pyarrow.Table:
print("OuterJoinNode.execute", join_leg, type(morsel))
if join_leg == "left":
if morsel == EOS:
self.left_relation = pyarrow.concat_tables(self.left_buffer, promote_options="none")
Expand Down
1 change: 1 addition & 0 deletions opteryx/utils/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def _replacer(match):
"OUTER",
"RIGHT",
"SELECT",
"SEMI",
"SET",
"SHOW",
"SINCE",
Expand Down
2 changes: 1 addition & 1 deletion opteryx/utils/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def split_sql_statements(sql: str) -> List[str]:
def regex_match_any(
arr: numpy.ndarray,
patterns: List[str],
flags: int = re.NOFLAG,
flags: int = 0,
invert: bool = False,
) -> numpy.ndarray:
"""
Expand Down
10 changes: 7 additions & 3 deletions tests/fuzzing/test_sql_fuzzer_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def generate_condition(table, columns):
return f"{table}.{where_column.name} {where_operator} {where_value}"

def generate_random_sql_join(columns1, table1, columns2, table2) -> str:
join_type = random.choice(["JOIN", "INNER JOIN", "LEFT JOIN", "RIGHT JOIN", "FULL OUTER JOIN"])
join_type = random.choice(["JOIN", "INNER JOIN", "LEFT JOIN", "LEFT OUTER JOIN", "RIGHT JOIN", "FULL OUTER JOIN", "LEFT ANTI JOIN", "LEFT SEMI JOIN"])

last_value = -1
this_value = random.random()
Expand All @@ -70,7 +70,11 @@ def generate_random_sql_join(columns1, table1, columns2, table2) -> str:
conditions.append(condition)

join_condition = " AND ".join(conditions)
selected_columns = [f"{table1}.{col.name}" for col in columns1 if random.random() < 0.2] + [f"{table2}.{col.name}" for col in columns2 if random.random() < 0.2]

if join_type in ("LEFT ANTI JOIN", "LEFT SEMI JOIN"):
selected_columns = [f"{table1}.{col.name}" for col in columns1 if random.random() < 0.2]
else:
selected_columns = [f"{table1}.{col.name}" for col in columns1 if random.random() < 0.2] + [f"{table2}.{col.name}" for col in columns2 if random.random() < 0.2]
if len(selected_columns) == 0:
selected_columns = ["*"]
select_clause = "SELECT " + ", ".join(selected_columns)
Expand All @@ -86,7 +90,7 @@ def generate_random_sql_join(columns1, table1, columns2, table2) -> str:
linking_condition = random.choice(["AND", "OR", "AND NOT"])
where_clause += f" {linking_condition} {generate_condition(table1, columns1)}"

if random.random() < 0.3:
if join_type not in ("LEFT ANTI JOIN", "LEFT SEMI JOIN") and random.random() < 0.3:
if where_clause == "--":
where_clause = " WHERE "
else:
Expand Down

0 comments on commit 53dd797

Please sign in to comment.