|
1 | 1 | """Postgres target sink class, which handles writing streams."""
|
2 | 2 |
|
| 3 | +import csv |
3 | 4 | import uuid
|
4 |
| -from typing import Any, Dict, Iterable, List, Optional, Sequence, Union, cast |
| 5 | +from io import StringIO |
| 6 | +from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union, cast |
5 | 7 |
|
6 | 8 | import sqlalchemy as sa
|
7 | 9 | from pendulum import now
|
@@ -142,35 +144,35 @@ def bulk_insert_records( # type: ignore[override]
|
142 | 144 | True if table exists, False if not, None if unsure or undetectable.
|
143 | 145 | """
|
144 | 146 | columns = self.column_representation(schema)
|
145 |
| - insert: str = cast( |
146 |
| - str, |
147 |
| - self.generate_insert_statement( |
148 |
| - table.name, |
149 |
| - columns, |
150 |
| - ), |
151 |
| - ) |
152 |
| - self.logger.info("Inserting with SQL: %s", insert) |
| 147 | + copy_statement: str = self.generate_copy_statement(table.name, columns) |
| 148 | + self.logger.info("Inserting with SQL: %s", copy_statement) |
153 | 149 | # Only one record per PK, we want to take the last one
|
154 |
| - data_to_insert: List[Dict[str, Any]] = [] |
| 150 | + data_to_insert: Tuple[Tuple[Any]] = None |
155 | 151 |
|
156 | 152 | if self.append_only is False:
|
157 |
| - insert_records: Dict[str, Dict] = {} # pk : record |
| 153 | + copy_values: Dict[str, Tuple] = {} # pk : values |
158 | 154 | for record in records:
|
159 |
| - insert_record = {} |
160 |
| - for column in columns: |
161 |
| - insert_record[column.name] = record.get(column.name) |
| 155 | + values = tuple((record.get(column.name) for column in columns)) |
162 | 156 | # No need to check for a KeyError here because the SDK already
|
163 | 157 | # guaruntees that all key properties exist in the record.
|
164 | 158 | primary_key_value = "".join([str(record[key]) for key in primary_keys])
|
165 |
| - insert_records[primary_key_value] = insert_record |
166 |
| - data_to_insert = list(insert_records.values()) |
| 159 | + copy_values[primary_key_value] = values |
| 160 | + data_to_insert = tuple(copy_values.values()) |
167 | 161 | else:
|
168 |
| - for record in records: |
169 |
| - insert_record = {} |
170 |
| - for column in columns: |
171 |
| - insert_record[column.name] = record.get(column.name) |
172 |
| - data_to_insert.append(insert_record) |
173 |
| - connection.execute(insert, data_to_insert) |
| 162 | + data_to_insert = [ |
| 163 | + tuple((record.get(column.name) for column in columns)) |
| 164 | + for record in records |
| 165 | + ] |
| 166 | + |
| 167 | + # Prepare a buffer with the values as csv. |
| 168 | + buffer = StringIO() |
| 169 | + writer = csv.writer(buffer) |
| 170 | + writer.writerows(data_to_insert) |
| 171 | + buffer.seek(0) |
| 172 | + |
| 173 | + with connection.connection.cursor() as cur: |
| 174 | + cur.copy_expert(sql=copy_statement, file=buffer) |
| 175 | + |
174 | 176 | return True
|
175 | 177 |
|
176 | 178 | def upsert(
|
|
0 commit comments