diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index a7becfb4..f42348cd 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -69,7 +69,7 @@ jobs: shell: bash run: | COVERAGE_PROCESS_START=$GITHUB_WORKSPACE/.coveragerc \ - PYTHONPATH='.:tests' python -m pytest -r s + PYTHONPATH='.:tests' python -m pytest -r s -vs coverage combine --append coverage xml -i - name: Publish coverage results diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 4f4d857a..ee842bce 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -829,6 +829,11 @@ def _code_reduce(obj): # See the inline comment in _class_setstate for details. co_name = "".join(obj.co_name) + # co_filename is not used in the constructor of code objects, so we can + # safely set it to indicate that this is dynamic code. This also makes + # the payload deterministic, independent of where the function is defined. + co_filename = "".join("") + # Create shallow copies of these tuple to make cloudpickle payload deterministic. # When creating a code object during load, copies of these four tuples are # created, while in the main process, these tuples can be shared. @@ -851,7 +856,7 @@ def _code_reduce(obj): obj.co_consts, co_names, co_varnames, - obj.co_filename, + co_filename, co_name, obj.co_qualname, obj.co_firstlineno, @@ -874,7 +879,7 @@ def _code_reduce(obj): obj.co_consts, co_names, co_varnames, - obj.co_filename, + co_filename, co_name, obj.co_firstlineno, obj.co_linetable, @@ -895,7 +900,7 @@ def _code_reduce(obj): obj.co_code, obj.co_consts, co_varnames, - obj.co_filename, + co_filename, co_name, obj.co_firstlineno, obj.co_lnotab, @@ -919,7 +924,7 @@ def _code_reduce(obj): obj.co_consts, co_names, co_varnames, - obj.co_filename, + co_filename, co_name, obj.co_firstlineno, obj.co_lnotab, diff --git a/dev-requirements.txt b/dev-requirements.txt index 35c529fe..7735fd92 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -7,6 +7,8 @@ pytest-cov psutil # To be able to test tornado coroutines tornado +# To be able to test behavior in jupyter-notebooks +ipykernel # To be able to test numpy specific things # but do not build numpy from source on Python nightly numpy >=1.18.5; python_version <= '3.12' diff --git a/tests/cloudpickle_ipykernel_test.py b/tests/cloudpickle_ipykernel_test.py new file mode 100644 index 00000000..63340766 --- /dev/null +++ b/tests/cloudpickle_ipykernel_test.py @@ -0,0 +1,68 @@ +import sys +import pytest +import textwrap + +from .testutils import check_deterministic_pickle + +if sys.platform == "win32": + if sys.version_info < (3, 11): + pytest.skip( + "ipykernel requires Python 3.11 or later", + allow_module_level=True + ) +ipykernel = pytest.importorskip("ipykernel") + + +def run_in_notebook(code): + km = ipykernel.connect.jupyter_client.KernelManager() + km.start_kernel() + kc = km.client() + kc.start_channels() + status, output, err = "run", None, None + try: + assert km.is_alive() and kc.is_alive() + kc.wait_for_ready() + idx = kc.execute(code) + running = True + while running: + res = kc.iopub_channel.get_msg(timeout=None) + if res['parent_header'].get('msg_id') != idx: + continue + content = res['content'] + if content.get("name", "state") == "stdout": + output = content['text'] + if "traceback" in content: + err = "\n".join(content['traceback']) + status = "error" + running = res['content'].get('execution_state', None) != "idle" + finally: + kc.shutdown() + kc.stop_channels() + km.shutdown_kernel(now=True, restart=False) + assert not km.is_alive() + if status != "error": + status = "ok" if not running else "exec_error" + return status, output, err + + +def test_deterministic_payload_for_dynamic_func_in_notebook(): + code = textwrap.dedent(""" + import cloudpickle + + MY_PI = 3.1415 + + def get_pi(): + return MY_PI + + print(cloudpickle.dumps(get_pi)) + """) + + status, output, err = run_in_notebook(code) + assert status == "ok" + payload = eval(output.strip(), {}) + + status, output, err = run_in_notebook(code) + assert status == "ok" + payload2 = eval(output.strip(), {}) + + check_deterministic_pickle(payload, payload2)