Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions import-automation/workflow/ingestion-helper/spanner_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,11 +565,11 @@ def seed_database(self):

def _seed(transaction: Transaction):
candidates = {
"StatisticalVariable": ["StatisticalVariable", ["Class"], spanner.COMMIT_TIMESTAMP],
"StatVarGroup": ["StatVarGroup", ["Class"], spanner.COMMIT_TIMESTAMP],
"StatVarObservation": ["StatVarObservation", ["Class"], spanner.COMMIT_TIMESTAMP],
"Topic": ["Topic", ["Class"], spanner.COMMIT_TIMESTAMP],
"c/g/Root": ["c/g/Root", ["StatVarGroup"], spanner.COMMIT_TIMESTAMP],
"StatisticalVariable": ["StatisticalVariable", "StatisticalVariable", "StatisticalVariable", ["Class"], spanner.COMMIT_TIMESTAMP],
"StatVarGroup": ["StatVarGroup", "StatVarGroup", "StatVarGroup", ["Class"], spanner.COMMIT_TIMESTAMP],
"StatVarObservation": ["StatVarObservation", "StatVarObservation", "StatVarObservation", ["Class"], spanner.COMMIT_TIMESTAMP],
"Topic": ["Topic", "Topic", "Topic", ["Class"], spanner.COMMIT_TIMESTAMP],
"c/g/Root": ["c/g/Root", "c/g/Root", "c/g/Root", ["StatVarGroup"], spanner.COMMIT_TIMESTAMP],
}
Comment on lines 567 to 573
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The candidates dictionary contains a lot of repeated strings. To improve maintainability and reduce redundancy, you could generate this dictionary programmatically. This would make it easier to add or modify fields in the future.

By defining the unique parts (like the types) separately and then building the full dictionary with a comprehension, the code becomes cleaner and less error-prone for future modifications.

Suggested change
candidates = {
"StatisticalVariable": ["StatisticalVariable", ["Class"], spanner.COMMIT_TIMESTAMP],
"StatVarGroup": ["StatVarGroup", ["Class"], spanner.COMMIT_TIMESTAMP],
"StatVarObservation": ["StatVarObservation", ["Class"], spanner.COMMIT_TIMESTAMP],
"Topic": ["Topic", ["Class"], spanner.COMMIT_TIMESTAMP],
"c/g/Root": ["c/g/Root", ["StatVarGroup"], spanner.COMMIT_TIMESTAMP],
"StatisticalVariable": ["StatisticalVariable", "StatisticalVariable", "StatisticalVariable", ["Class"], spanner.COMMIT_TIMESTAMP],
"StatVarGroup": ["StatVarGroup", "StatVarGroup", "StatVarGroup", ["Class"], spanner.COMMIT_TIMESTAMP],
"StatVarObservation": ["StatVarObservation", "StatVarObservation", "StatVarObservation", ["Class"], spanner.COMMIT_TIMESTAMP],
"Topic": ["Topic", "Topic", "Topic", ["Class"], spanner.COMMIT_TIMESTAMP],
"c/g/Root": ["c/g/Root", "c/g/Root", "c/g/Root", ["StatVarGroup"], spanner.COMMIT_TIMESTAMP],
}
base_nodes = {
"StatisticalVariable": ["Class"],
"StatVarGroup": ["Class"],
"StatVarObservation": ["Class"],
"Topic": ["Class"],
"c/g/Root": ["StatVarGroup"],
}
candidates = {
subj: [subj, subj, subj, types, spanner.COMMIT_TIMESTAMP]
for subj, types in base_nodes.items()
}

subjects = list(candidates.keys())
sql = "SELECT subject_id FROM Node WHERE subject_id IN UNNEST(@subjects)"
Expand All @@ -582,7 +582,7 @@ def _seed(transaction: Transaction):
values = [candidates[subj] for subj in subjects if subj not in existing]

if values:
columns = ["subject_id", "types", "last_update_timestamp"]
columns = ["subject_id", "name", "value", "types", "last_update_timestamp"]
transaction.insert(table="Node", columns=columns, values=values)

try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,15 @@ def run_in_transaction_side_effect(callback, *args, **kwargs):
mock_transaction.insert.assert_called_once()
args, kwargs = mock_transaction.insert.call_args
self.assertEqual(kwargs['table'], 'Node')
self.assertEqual(kwargs['columns'], ["subject_id", "name", "value", "types", "last_update_timestamp"])
self.assertEqual(len(kwargs['values']), 5)
expected_subjects = ["StatisticalVariable", "StatVarGroup", "StatVarObservation", "Topic", "c/g/Root"]
actual_subjects = [val[0] for val in kwargs['values']]
actual_names = [val[1] for val in kwargs['values']]
actual_values = [val[2] for val in kwargs['values']]
self.assertEqual(actual_subjects, expected_subjects)
self.assertEqual(actual_names, expected_subjects)
self.assertEqual(actual_values, expected_subjects)
Comment on lines 252 to +257
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

These assertions can be made more concise and robust by iterating through the values and checking the properties for each row within a loop. This makes the test easier to read and extend if more fields need to be checked against the subject_id in the future. It also avoids iterating over the list of values multiple times.

Suggested change
actual_subjects = [val[0] for val in kwargs['values']]
actual_names = [val[1] for val in kwargs['values']]
actual_values = [val[2] for val in kwargs['values']]
self.assertEqual(actual_subjects, expected_subjects)
self.assertEqual(actual_names, expected_subjects)
self.assertEqual(actual_values, expected_subjects)
actual_subjects = []
for val in kwargs['values']:
subject_id, name, value, _, _ = val
self.assertEqual(name, subject_id)
self.assertEqual(value, subject_id)
actual_subjects.append(subject_id)
self.assertEqual(actual_subjects, expected_subjects)


@patch('google.cloud.spanner.Client')
def test_seed_database_already_exists(self, mock_spanner_client):
Expand Down
Loading