Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

@W-17357226: Fix for issue where zero threshold defaulted to select #3853

Merged
merged 1 commit into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cumulusci/tasks/bulkdata/select_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def annoy_post_process(
# Retrieve the corresponding record from the database
record = query_record_data[neighbor_index]
closest_record_id = record_to_id_map[tuple(record)]
if threshold and (neighbor_distances[idx] >= threshold):
if threshold is not None and (neighbor_distances[idx] >= threshold):
selected_records.append(None)
insertion_candidates.append(load_shaped_records[i])
else:
Expand Down Expand Up @@ -445,7 +445,7 @@ def levenshtein_post_process(
select_record, target_records, similarity_weights
)

if distance_threshold and match_distance > distance_threshold:
if distance_threshold is not None and match_distance > distance_threshold:
# Append load record for insertion if distance exceeds threshold
insertion_candidates.append(load_record)
selected_records.append(None)
Expand Down
189 changes: 187 additions & 2 deletions cumulusci/tasks/bulkdata/tests/test_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -1232,7 +1232,9 @@ def test_process_insert_records_failure(self, download_mock):
)

@mock.patch("cumulusci.tasks.bulkdata.step.download_file")
def test_select_records_similarity_strategy__insert_records(self, download_mock):
def test_select_records_similarity_strategy__insert_records__non_zero_threshold(
self, download_mock
):
# Set up mock context and BulkApiDmlOperation
context = mock.Mock()
# Add step with threshold
Expand Down Expand Up @@ -1325,6 +1327,102 @@ def test_select_records_similarity_strategy__insert_records(self, download_mock)
== 1
)

@mock.patch("cumulusci.tasks.bulkdata.step.download_file")
def test_select_records_similarity_strategy__insert_records__zero_threshold(
self, download_mock
):
# Set up mock context and BulkApiDmlOperation
context = mock.Mock()
# Add step with threshold
step = BulkApiDmlOperation(
sobject="Contact",
operation=DataOperationType.QUERY,
api_options={"batch_size": 10, "update_key": "LastName"},
context=context,
fields=["Name", "Email"],
selection_strategy=SelectStrategy.SIMILARITY,
threshold=0,
)

# Mock Bulk API responses
step.bulk.endpoint = "https://test"
step.bulk.create_query_job.return_value = "JOB"
step.bulk.query.return_value = "BATCH"
step.bulk.get_query_batch_result_ids.return_value = ["RESULT"]

# Mock the downloaded CSV content with a single record
select_results = io.StringIO(
"""[{"Id":"003000000000001", "Name":"Jawad", "Email":"[email protected]"}]"""
)
insert_results = io.StringIO(
"Id,Success,Created\n003000000000002,true,true\n003000000000003,true,true\n"
)
download_mock.side_effect = [select_results, insert_results]

# Mock the _wait_for_job method to simulate a successful job
step._wait_for_job = mock.Mock()
step._wait_for_job.return_value = DataOperationJobResult(
DataOperationStatus.SUCCESS, [], 0, 0
)

# Prepare input records
records = iter(
[
["Jawad", "[email protected]"],
["Aditya", "[email protected]"],
["Tom", "[email protected]"],
]
)

# Mock sub-operation for BulkApiDmlOperation
insert_step = mock.Mock(spec=BulkApiDmlOperation)
insert_step.start = mock.Mock()
insert_step.load_records = mock.Mock()
insert_step.end = mock.Mock()
insert_step.batch_ids = ["BATCH1"]
insert_step.bulk = mock.Mock()
insert_step.bulk.endpoint = "https://test"
insert_step.job_id = "JOB"

with mock.patch(
"cumulusci.tasks.bulkdata.step.BulkApiDmlOperation",
return_value=insert_step,
):
# Execute the select_records operation
step.start()
step.select_records(records)
step.end()

# Get the results and assert their properties
results = list(step.get_results())

assert len(results) == 3 # Expect 3 results (matching the input records count)
# Assert that all results have the expected ID, success, and created values
assert (
results.count(
DataOperationResult(
id="003000000000001", success=True, error="", created=False
)
)
== 1
)
assert (
results.count(
DataOperationResult(
id="003000000000002", success=True, error="", created=True
)
)
== 1
)
assert (
results.count(
DataOperationResult(
id="003000000000003", success=True, error="", created=True
)
)
== 1
)

@mock.patch("cumulusci.tasks.bulkdata.step.download_file")
def test_select_records_similarity_strategy__insert_records__no_select_records(
self, download_mock
Expand Down Expand Up @@ -2807,7 +2905,9 @@ def test_process_insert_records_failure(self):
mock_rest_api_dml_operation.end.assert_not_called()

@responses.activate
def test_select_records_similarity_strategy__insert_records(self):
def test_select_records_similarity_strategy__insert_records__non_zero_threshold(
self,
):
mock_describe_calls()
task = _make_task(
LoadData,
Expand Down Expand Up @@ -2891,6 +2991,91 @@ def test_select_records_similarity_strategy__insert_records(self):
== 1
)

@responses.activate
def test_select_records_similarity_strategy__insert_records__zero_threshold(self):
mock_describe_calls()
task = _make_task(
LoadData,
{
"options": {
"database_url": "sqlite:///test.db",
"mapping": "mapping.yml",
}
},
)
task.project_config.project__package__api_version = CURRENT_SF_API_VERSION
task._init_task()

# Create step with threshold
step = RestApiDmlOperation(
sobject="Contact",
operation=DataOperationType.UPSERT,
api_options={"batch_size": 10},
context=task,
fields=["Name", "Email"],
selection_strategy=SelectStrategy.SIMILARITY,
threshold=0,
)

results_select_call = {
"records": [
{
"Id": "003000000000001",
"Name": "Jawad",
"Email": "[email protected]",
},
],
"done": True,
}

results_insert_call = [
{"id": "003000000000002", "success": True, "created": True},
{"id": "003000000000003", "success": True, "created": True},
]

step.sf.restful = mock.Mock(
side_effect=[results_select_call, results_insert_call]
)
records = iter(
[
["Jawad", "[email protected]"],
["Aditya", "[email protected]"],
["Tom Cruise", "[email protected]"],
]
)
step.start()
step.select_records(records)
step.end()

# Get the results and assert their properties
results = list(step.get_results())
assert len(results) == 3 # Expect 3 results (matching the input records count)
# Assert that all results have the expected ID, success, and created values
assert (
results.count(
DataOperationResult(
id="003000000000001", success=True, error="", created=False
)
)
== 1
)
assert (
results.count(
DataOperationResult(
id="003000000000002", success=True, error="", created=True
)
)
== 1
)
assert (
results.count(
DataOperationResult(
id="003000000000003", success=True, error="", created=True
)
)
== 1
)

@responses.activate
def test_insert_dml_operation__boolean_conversion(self):
mock_describe_calls()
Expand Down
Loading