|
26 | 26 | Fetch,
|
27 | 27 | FetchShuffle,
|
28 | 28 | execute,
|
| 29 | + MapReduceOperand, |
| 30 | + OperandStage, |
29 | 31 | )
|
30 | 32 | from ....metrics import Metrics
|
31 | 33 | from ....optimization.physical import optimize
|
@@ -424,6 +426,28 @@ async def set_chunks_meta():
|
424 | 426 | # set result data size
|
425 | 427 | self.result.data_size = result_data_size
|
426 | 428 |
|
| 429 | + async def push_mapper_data(self, chunk_graph): |
| 430 | + # TODO: use task api to get reducer bands |
| 431 | + reducer_idx_to_band = dict() |
| 432 | + if not reducer_idx_to_band: |
| 433 | + return |
| 434 | + storage_api_to_fetch_tasks = defaultdict(list) |
| 435 | + for result_chunk in chunk_graph.result_chunks: |
| 436 | + key = result_chunk.key |
| 437 | + reducer_idx = key[1] |
| 438 | + if isinstance(key, tuple): |
| 439 | + # mapper key is a tuple |
| 440 | + address, band_name = reducer_idx_to_band[reducer_idx] |
| 441 | + storage_api = StorageAPI(address, self._session_id, band_name) |
| 442 | + fetch_task = storage_api.fetch.delay( |
| 443 | + key, band_name=self._band[1], remote_address=self._band[0] |
| 444 | + ) |
| 445 | + storage_api_to_fetch_tasks[storage_api].append(fetch_task) |
| 446 | + batch_tasks = [] |
| 447 | + for storage_api, tasks in storage_api_to_fetch_tasks.items(): |
| 448 | + batch_tasks.append(asyncio.create_task(storage_api.fetch.batch(*tasks))) |
| 449 | + await asyncio.gather(*batch_tasks) |
| 450 | + |
427 | 451 | async def done(self):
|
428 | 452 | if self.result.status == SubtaskStatus.running:
|
429 | 453 | self.result.status = SubtaskStatus.succeeded
|
@@ -495,6 +519,8 @@ async def run(self):
|
495 | 519 | await self._unpin_data(input_keys)
|
496 | 520 |
|
497 | 521 | await self.done()
|
| 522 | + # after done, we push mapper data to reducers in advance. |
| 523 | + await self.push_mapper_data(chunk_graph) |
498 | 524 | if self.result.status == SubtaskStatus.succeeded:
|
499 | 525 | cost_time_secs = (
|
500 | 526 | self.result.execution_end_time - self.result.execution_start_time
|
|
0 commit comments