fix: Use tuples for pkey check (#446)

SpaceCondor · edgarrmondragon · web-flow · commit 1a60b45aa9cc · 2024-09-27T12:54:10.000-06:00
https://github.com/MeltanoLabs/target-postgres/blob/d07b41583e8ff77ee770a0d40779ea9485772461/target_postgres/sinks.py#L161-L169 Currently the code above uses string concatenation to check for duplicate primary key values, however this is problematic since the records below will be treated as the same record and the first will be omitted: Record 1: - Primary Key 1: AB - Primary Key 2: C Record 2: - Primary Key 1: A - Primary Key 2: BC Changing to a tuple should mitigate this. Co-authored-by: Edgar Ramírez Mondragón <16805946+edgarrmondragon@users.noreply.github.com>
diff --git a/target_postgres/sinks.py b/target_postgres/sinks.py
@@ -157,15 +157,15 @@ def bulk_insert_records(  # type: ignore[override]
         data_to_insert: list[dict[str, t.Any]] = []
 
         if self.append_only is False:
-            insert_records: dict[str, dict] = {}  # pk : record
+            insert_records: dict[tuple, dict] = {}  # pk tuple: record
             for record in records:
                 insert_record = {
                     column.name: record.get(column.name) for column in columns
                 }
                 # No need to check for a KeyError here because the SDK already
                 # guarantees that all key properties exist in the record.
-                primary_key_value = "".join([str(record[key]) for key in primary_keys])
-                insert_records[primary_key_value] = insert_record
+                primary_key_tuple = tuple(record[key] for key in primary_keys)
+                insert_records[primary_key_tuple] = insert_record
             data_to_insert = list(insert_records.values())
         else:
             for record in records: