|
| 1 | +# |
| 2 | +# Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | +# contributor license agreements. See the NOTICE file distributed with |
| 4 | +# this work for additional information regarding copyright ownership. |
| 5 | +# The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | +# (the "License"); you may not use this file except in compliance with |
| 7 | +# the License. You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, software |
| 12 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | +# See the License for the specific language governing permissions and |
| 15 | +# limitations under the License. |
| 16 | +# |
| 17 | +from contextlib import contextmanager |
| 18 | +from typing import Generator, NoReturn, List, Callable |
| 19 | + |
| 20 | +from pyspark.errors import PySparkException |
| 21 | +from pyspark.sql.connect.catalog import Catalog |
| 22 | +from pyspark.sql.connect.conf import RuntimeConf |
| 23 | +from pyspark.sql.connect.dataframe import DataFrame |
| 24 | +from pyspark.sql.connect.udf import UDFRegistration |
| 25 | + |
| 26 | +# pyspark methods that should be blocked from executing in python pipeline definition files |
| 27 | +ERROR_CLASS = "SESSION_MUTATION_IN_DECLARATIVE_PIPELINE" |
| 28 | +BLOCKED_METHODS: List = [ |
| 29 | + { |
| 30 | + "class": RuntimeConf, |
| 31 | + "method": "set", |
| 32 | + "error_sub_class": "SET_RUNTIME_CONF", |
| 33 | + }, |
| 34 | + { |
| 35 | + "class": Catalog, |
| 36 | + "method": "setCurrentCatalog", |
| 37 | + "error_sub_class": "SET_CURRENT_CATALOG", |
| 38 | + }, |
| 39 | + { |
| 40 | + "class": Catalog, |
| 41 | + "method": "setCurrentDatabase", |
| 42 | + "error_sub_class": "SET_CURRENT_DATABASE", |
| 43 | + }, |
| 44 | + { |
| 45 | + "class": Catalog, |
| 46 | + "method": "dropTempView", |
| 47 | + "error_sub_class": "DROP_TEMP_VIEW", |
| 48 | + }, |
| 49 | + { |
| 50 | + "class": Catalog, |
| 51 | + "method": "dropGlobalTempView", |
| 52 | + "error_sub_class": "DROP_GLOBAL_TEMP_VIEW", |
| 53 | + }, |
| 54 | + { |
| 55 | + "class": DataFrame, |
| 56 | + "method": "createTempView", |
| 57 | + "error_sub_class": "CREATE_TEMP_VIEW", |
| 58 | + }, |
| 59 | + { |
| 60 | + "class": DataFrame, |
| 61 | + "method": "createOrReplaceTempView", |
| 62 | + "error_sub_class": "CREATE_OR_REPLACE_TEMP_VIEW", |
| 63 | + }, |
| 64 | + { |
| 65 | + "class": DataFrame, |
| 66 | + "method": "createGlobalTempView", |
| 67 | + "error_sub_class": "CREATE_GLOBAL_TEMP_VIEW", |
| 68 | + }, |
| 69 | + { |
| 70 | + "class": DataFrame, |
| 71 | + "method": "createOrReplaceGlobalTempView", |
| 72 | + "error_sub_class": "CREATE_OR_REPLACE_GLOBAL_TEMP_VIEW", |
| 73 | + }, |
| 74 | + { |
| 75 | + "class": UDFRegistration, |
| 76 | + "method": "register", |
| 77 | + "error_sub_class": "REGISTER_UDF", |
| 78 | + }, |
| 79 | + { |
| 80 | + "class": UDFRegistration, |
| 81 | + "method": "registerJavaFunction", |
| 82 | + "error_sub_class": "REGISTER_JAVA_UDF", |
| 83 | + }, |
| 84 | + { |
| 85 | + "class": UDFRegistration, |
| 86 | + "method": "registerJavaUDAF", |
| 87 | + "error_sub_class": "REGISTER_JAVA_UDAF", |
| 88 | + }, |
| 89 | +] |
| 90 | + |
| 91 | + |
| 92 | +def _create_blocked_method(error_method_name: str, error_sub_class: str) -> Callable: |
| 93 | + def blocked_method(*args: object, **kwargs: object) -> NoReturn: |
| 94 | + raise PySparkException( |
| 95 | + errorClass=f"{ERROR_CLASS}.{error_sub_class}", |
| 96 | + messageParameters={ |
| 97 | + "method": error_method_name, |
| 98 | + }, |
| 99 | + ) |
| 100 | + |
| 101 | + return blocked_method |
| 102 | + |
| 103 | + |
| 104 | +@contextmanager |
| 105 | +def block_session_mutations() -> Generator[None, None, None]: |
| 106 | + """ |
| 107 | + Context manager that blocks imperative constructs found in a pipeline python definition file |
| 108 | + See BLOCKED_METHODS above for a list |
| 109 | + """ |
| 110 | + # Store original methods |
| 111 | + original_methods = {} |
| 112 | + for method_info in BLOCKED_METHODS: |
| 113 | + cls = method_info["class"] |
| 114 | + method_name = method_info["method"] |
| 115 | + original_methods[(cls, method_name)] = getattr(cls, method_name) |
| 116 | + |
| 117 | + try: |
| 118 | + # Replace methods with blocked versions |
| 119 | + for method_info in BLOCKED_METHODS: |
| 120 | + cls = method_info["class"] |
| 121 | + method_name = method_info["method"] |
| 122 | + error_method_name = f"'{cls.__name__}.{method_name}'" |
| 123 | + blocked_method = _create_blocked_method( |
| 124 | + error_method_name, method_info["error_sub_class"] |
| 125 | + ) |
| 126 | + setattr(cls, method_name, blocked_method) |
| 127 | + |
| 128 | + yield |
| 129 | + finally: |
| 130 | + # Restore original methods |
| 131 | + for method_info in BLOCKED_METHODS: |
| 132 | + cls = method_info["class"] |
| 133 | + method_name = method_info["method"] |
| 134 | + original_method = original_methods[(cls, method_name)] |
| 135 | + setattr(cls, method_name, original_method) |
0 commit comments