From 262b4f1f6f026c3c7a8ea1e3c0caaa3b5285334e Mon Sep 17 00:00:00 2001 From: Ryan Georgi Date: Wed, 19 Nov 2025 08:48:38 -0800 Subject: [PATCH 01/15] =?UTF-8?q?Add=20=E2=80=94drop-output-type=20arg?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- nbstripout/_nbstripout.py | 2 ++ nbstripout/_utils.py | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index 3578828..5f84d13 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -372,6 +372,7 @@ def process_jupyter_notebook( drop_empty_cells=args.drop_empty_cells, drop_tagged_cells=args.drop_tagged_cells.split(), strip_init_cells=args.strip_init_cells, + drop_output_types=set(args.drop_output_type), max_size=_parse_size(args.max_size), ) @@ -451,6 +452,7 @@ def main(): ) parser.add_argument('--keep-count', action='store_true', help='Do not strip the execution count/prompt number') parser.add_argument('--keep-output', action='store_true', help='Do not strip output', default=None) + parser.add_argument('--drop-output-type', help='Types of output cells to drop, e.g. "error" or "stream"', nargs='+') parser.add_argument( '--keep-id', action='store_true', diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index 3ebde52..b11560c 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -1,6 +1,8 @@ from collections import defaultdict import sys -from typing import Any, Callable, Iterator, List, Optional +from typing import Any, Callable, Iterator, List, Optional, Set +import logging +logger = logging.getLogger(__name__) from nbformat import NotebookNode @@ -104,6 +106,7 @@ def strip_output( drop_empty_cells: bool = False, drop_tagged_cells: List[str] = [], strip_init_cells: bool = False, + drop_output_types: Set[str] = None, max_size: int = 0, ) -> NotebookNode: """ @@ -113,6 +116,11 @@ def strip_output( `extra_keys` could be 'metadata.foo cell.metadata.bar metadata.baz' """ + + # Replace mutable defaults + drop_output_types = drop_output_types or {'error'} + print(drop_output_types) + if keep_output is None and 'keep_output' in nb.metadata: keep_output = bool(nb.metadata['keep_output']) @@ -149,6 +157,13 @@ def strip_output( if 'execution_count' in output: output['execution_count'] = None + # Remove specific output types + if drop_output_types: + cell['outputs'] = [ + output for output in cell['outputs'] + if output.get('output_type') not in drop_output_types + ] + # If keep_output_this_cell and keep_count, do nothing. # Remove the prompt_number/execution_count, unless directed otherwise From 8ae87a06503d9e59519ca52e45ebad295003aafe Mon Sep 17 00:00:00 2001 From: Ryan Georgi Date: Wed, 19 Nov 2025 08:53:40 -0800 Subject: [PATCH 02/15] Update readme to document new behavior --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index c6b882c..3e6890e 100644 --- a/README.md +++ b/README.md @@ -324,6 +324,10 @@ Do not strip the output, only metadata: nbstripout --keep-output +When keeping the output, drop a specific [`output_type`](https://ipython.readthedocs.io/en/3.x/notebook/nbformat.html#code-cell-outputs), like `error` or `stream` + + nbstripout --drop-output-type error stream + Do not reassign the cell ids to be sequential (which is the default behavior): nbstripout --keep-id From db6dd39a20dc85eb1138d1d6952e7e19f1fc95e3 Mon Sep 17 00:00:00 2001 From: Ryan Georgi Date: Wed, 19 Nov 2025 09:43:22 -0800 Subject: [PATCH 03/15] =?UTF-8?q?Updating=20readme=20and=20unit=20tests,?= =?UTF-8?q?=20and=20adding=20=E2=80=94keep-output-type?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 5 ++ nbstripout/_nbstripout.py | 4 +- nbstripout/_utils.py | 9 ++- tests/test_drop_outputs.ipynb | 119 ++++++++++++++++++++++++++++++++++ tests/test_drop_outputs.py | 68 +++++++++++++++++++ 5 files changed, 201 insertions(+), 4 deletions(-) create mode 100644 tests/test_drop_outputs.ipynb create mode 100644 tests/test_drop_outputs.py diff --git a/README.md b/README.md index 3e6890e..c56aa6d 100644 --- a/README.md +++ b/README.md @@ -218,6 +218,7 @@ Note that you need to uninstall with the same flags: `nbstripout` can be used to rewrite an existing Git repository using [`git filter-repo`](https://github.com/newren/git-filter-repo) to strip output from existing notebooks. This invocation operates on all `.ipynb` files in the repo: + ```bash #!/bin/bash git-filter-repo \ @@ -328,6 +329,10 @@ When keeping the output, drop a specific [`output_type`](https://ipython.readthe nbstripout --drop-output-type error stream +Drop all output except specific output types: + + nbstripout --keep-output-type execute_result + Do not reassign the cell ids to be sequential (which is the default behavior): nbstripout --keep-id diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index 5f84d13..f60a43d 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -373,6 +373,7 @@ def process_jupyter_notebook( drop_tagged_cells=args.drop_tagged_cells.split(), strip_init_cells=args.strip_init_cells, drop_output_types=set(args.drop_output_type), + keep_output_types = set(args.keep_output_type), max_size=_parse_size(args.max_size), ) @@ -452,7 +453,8 @@ def main(): ) parser.add_argument('--keep-count', action='store_true', help='Do not strip the execution count/prompt number') parser.add_argument('--keep-output', action='store_true', help='Do not strip output', default=None) - parser.add_argument('--drop-output-type', help='Types of output cells to drop, e.g. "error" or "stream"', nargs='+') + parser.add_argument('--drop-output-type', help='Types of output cells to drop, e.g. "error" or "stream". Only has effect with --keep-output', nargs='+') + parser.add_argument('--keep-output-type', help='Types of output cells to keep, e.g. "error" or "stream". Will take effect without --keep-output', nargs='+') parser.add_argument( '--keep-id', action='store_true', diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index b11560c..1aab45f 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -107,6 +107,7 @@ def strip_output( drop_tagged_cells: List[str] = [], strip_init_cells: bool = False, drop_output_types: Set[str] = None, + keep_output_types: Set[str] = None, max_size: int = 0, ) -> NotebookNode: """ @@ -118,8 +119,8 @@ def strip_output( """ # Replace mutable defaults - drop_output_types = drop_output_types or {'error'} - print(drop_output_types) + drop_output_types = drop_output_types or set() + keep_output_types = keep_output_types or set() if keep_output is None and 'keep_output' in nb.metadata: keep_output = bool(nb.metadata['keep_output']) @@ -149,7 +150,9 @@ def strip_output( if 'outputs' in cell: # Default behavior (max_size == 0) strips all outputs. if not keep_output_this_cell: - cell['outputs'] = [output for output in cell['outputs'] if get_size(output) <= max_size] + cell['outputs'] = [output for output in cell['outputs'] + if get_size(output) <= max_size + or output.get('output_type') in keep_output_types] # Strip the counts from the outputs that were kept if not keep_count. if not keep_count: diff --git a/tests/test_drop_outputs.ipynb b/tests/test_drop_outputs.ipynb new file mode 100644 index 0000000..dd133e9 --- /dev/null +++ b/tests/test_drop_outputs.ipynb @@ -0,0 +1,119 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1c2d582b-7d99-413d-9263-e1b4bdce4066", + "metadata": {}, + "source": [ + "# Test Notebook for nbstripout error types" + ] + }, + { + "cell_type": "code", + "id": "0fec6660-9485-42fd-90d6-5c5046096759", + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-19T17:19:29.492338Z", + "start_time": "2025-11-19T17:19:29.478763Z" + } + }, + "source": [ + "import sys\n", + "print('This is not an error')\n", + "sys.stderr.write('This is stderr\\n')\n", + "import notalibrary" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "This is not an error\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "This is stderr\n" + ] + }, + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'notalibrary'", + "output_type": "error", + "traceback": [ + "\u001B[31m---------------------------------------------------------------------------\u001B[39m", + "\u001B[31mModuleNotFoundError\u001B[39m Traceback (most recent call last)", + "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[3]\u001B[39m\u001B[32m, line 4\u001B[39m\n\u001B[32m 2\u001B[39m \u001B[38;5;28mprint\u001B[39m(\u001B[33m'\u001B[39m\u001B[33mThis is not an error\u001B[39m\u001B[33m'\u001B[39m)\n\u001B[32m 3\u001B[39m sys.stderr.write(\u001B[33m'\u001B[39m\u001B[33mThis is stderr\u001B[39m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[33m'\u001B[39m)\n\u001B[32m----> \u001B[39m\u001B[32m4\u001B[39m \u001B[38;5;28;01mimport\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01mnotalibrary\u001B[39;00m\n", + "\u001B[31mModuleNotFoundError\u001B[39m: No module named 'notalibrary'" + ] + } + ], + "execution_count": 3 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-19T17:15:32.884433Z", + "start_time": "2025-11-19T17:15:32.881115Z" + } + }, + "cell_type": "code", + "source": "'a'", + "id": "abd5c048ed6667b7", + "outputs": [ + { + "data": { + "text/plain": [ + "'a'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 2 + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a060078e-cb50-4ed8-8dff-0a7938b8a897", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "test\n" + ] + } + ], + "source": [ + "print('test')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/test_drop_outputs.py b/tests/test_drop_outputs.py new file mode 100644 index 0000000..17344d4 --- /dev/null +++ b/tests/test_drop_outputs.py @@ -0,0 +1,68 @@ +import os +from copy import deepcopy + +import nbformat +import pytest + +from nbstripout import strip_output, MetadataError +directory = os.path.dirname(__file__) + +@pytest.fixture +def orig_nb(): + fname = 'test_drop_outputs.ipynb' + return nbformat.read(os.path.join(directory, fname), nbformat.NO_CONVERT) + +def test_drop_errors(orig_nb): + nb_stripped = strip_output(deepcopy(orig_nb), + keep_output=True, + keep_count=False, + keep_id=False, + drop_output_types={'error'}) + + # No outputs in the markdown + assert not hasattr(nb_stripped.cells[0], 'outputs') + + # Original cell should have 3 outputs, with the last being error + assert len(orig_nb.cells[1].outputs) == 3 + assert orig_nb.cells[1].outputs[2]['output_type'] == 'error' + + # Second cell should have a stdout stream, stderr stream and an error + stripped_output_1 = nb_stripped.cells[1].outputs[0] + stripped_output_2 = nb_stripped.cells[1].outputs[1] + assert len(nb_stripped.cells[1].outputs) == 2 + assert stripped_output_1['output_type'] == 'stream' + assert stripped_output_2['output_type'] == 'stream' + assert stripped_output_1['name'] == 'stdout' + assert stripped_output_2['name'] == 'stderr' + + # Third cell should have an execution output + assert len(nb_stripped.cells[2].outputs) == 1 + assert nb_stripped.cells[2].outputs[0]['output_type'] == 'execute_result' + + # Should be an error in the original cell, but not in the output + # assert orig_nb.cells[1].outputs[0]['output_type'] == 'error' + # print(nb_stripped.cells[1].outputs) + + +def test_keep_output(orig_nb): + """ + Te4st keep output types + """ + nb_stripped = strip_output(deepcopy(orig_nb), + keep_output=False, + keep_count=False, + keep_id=False, + keep_output_types={'execute_result'}) + + # No outputs in the markdown + assert not hasattr(nb_stripped.cells[0], 'outputs') + + # Original cell should have 3 outputs, with the last being error + assert len(orig_nb.cells[1].outputs) == 3 + assert orig_nb.cells[1].outputs[2]['output_type'] == 'error' + + # All outputs should be stripped in the second cell + assert len(nb_stripped.cells[1].outputs) == 0 + + # Third cell should have an execution output + assert len(nb_stripped.cells[2].outputs) == 1 \ No newline at end of file From 912839fc4931f0581c686ee7c1305505657a6ec4 Mon Sep 17 00:00:00 2001 From: Ryan Georgi Date: Wed, 19 Nov 2025 09:51:02 -0800 Subject: [PATCH 04/15] Fixing unused metadataerror per ruff --- tests/test_drop_outputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_drop_outputs.py b/tests/test_drop_outputs.py index 17344d4..80f7383 100644 --- a/tests/test_drop_outputs.py +++ b/tests/test_drop_outputs.py @@ -4,7 +4,7 @@ import nbformat import pytest -from nbstripout import strip_output, MetadataError +from nbstripout import strip_output directory = os.path.dirname(__file__) @pytest.fixture From 407c27bde4d834d13e86e7b194b2da695769b76d Mon Sep 17 00:00:00 2001 From: Ryan Georgi Date: Wed, 19 Nov 2025 10:05:09 -0800 Subject: [PATCH 05/15] Fix behavior if keep_output is true AND keep_output_types --- nbstripout/_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index 1aab45f..a03ca49 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -149,7 +149,7 @@ def strip_output( # Remove the outputs, unless directed otherwise if 'outputs' in cell: # Default behavior (max_size == 0) strips all outputs. - if not keep_output_this_cell: + if not keep_output_this_cell or keep_output_types: cell['outputs'] = [output for output in cell['outputs'] if get_size(output) <= max_size or output.get('output_type') in keep_output_types] @@ -160,6 +160,8 @@ def strip_output( if 'execution_count' in output: output['execution_count'] = None + + # Remove specific output types if drop_output_types: cell['outputs'] = [ From 76ef7dd80cc382d1a566359151da81cc89572e87 Mon Sep 17 00:00:00 2001 From: Ryan Georgi Date: Wed, 19 Nov 2025 11:03:15 -0800 Subject: [PATCH 06/15] Add empty list as default to avoid error --- nbstripout/_nbstripout.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index f60a43d..501898b 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -453,8 +453,8 @@ def main(): ) parser.add_argument('--keep-count', action='store_true', help='Do not strip the execution count/prompt number') parser.add_argument('--keep-output', action='store_true', help='Do not strip output', default=None) - parser.add_argument('--drop-output-type', help='Types of output cells to drop, e.g. "error" or "stream". Only has effect with --keep-output', nargs='+') - parser.add_argument('--keep-output-type', help='Types of output cells to keep, e.g. "error" or "stream". Will take effect without --keep-output', nargs='+') + parser.add_argument('--drop-output-type', help='Types of output cells to drop, e.g. "error" or "stream". Only has effect with --keep-output', nargs='+', default=[]) + parser.add_argument('--keep-output-type', help='Types of output cells to keep, e.g. "error" or "stream". Will take effect without --keep-output', nargs='+', default=[]) parser.add_argument( '--keep-id', action='store_true', From b5a7d8a27872a0a4b424ff16e669f1c001c934a6 Mon Sep 17 00:00:00 2001 From: Ryan Georgi Date: Wed, 19 Nov 2025 11:59:21 -0800 Subject: [PATCH 07/15] Add output_type:name to support subclasses of output_types --- README.md | 10 +++++ nbstripout/_utils.py | 38 ++++++++++++++++--- ..._outputs.ipynb => test_output_types.ipynb} | 0 ...t_drop_outputs.py => test_output_types.py} | 26 ++++++++++++- 4 files changed, 67 insertions(+), 7 deletions(-) rename tests/{test_drop_outputs.ipynb => test_output_types.ipynb} (100%) rename tests/{test_drop_outputs.py => test_output_types.py} (75%) diff --git a/README.md b/README.md index c56aa6d..2e2a869 100644 --- a/README.md +++ b/README.md @@ -325,6 +325,8 @@ Do not strip the output, only metadata: nbstripout --keep-output +##### Output Types + When keeping the output, drop a specific [`output_type`](https://ipython.readthedocs.io/en/3.x/notebook/nbformat.html#code-cell-outputs), like `error` or `stream` nbstripout --drop-output-type error stream @@ -333,10 +335,18 @@ Drop all output except specific output types: nbstripout --keep-output-type execute_result +For stripping certain outputs that have names (like `stream` which can be `stderr` or `stdout`) you can use a colon to specify the name. The following would strip all `stderr` output. + + nbstripout --drop-output-type stream:stderr + +##### Cell IDs + Do not reassign the cell ids to be sequential (which is the default behavior): nbstripout --keep-id +##### Keeping Output on Specific Cells + To mark special cells so that the output is not stripped, you can either: 1. Set the `keep_output` tag on the cell. To do this, enable the tags toolbar diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index a03ca49..9df9d36 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -1,6 +1,6 @@ from collections import defaultdict import sys -from typing import Any, Callable, Iterator, List, Optional, Set +from typing import Any, Callable, Iterator, List, Optional, Set, Dict import logging logger = logging.getLogger(__name__) @@ -96,6 +96,34 @@ def strip_zeppelin_output(nb: dict) -> dict: cell['results'] = {} return nb +class OutputType: + output_type: str + name: str + + def __init__(self, output_type: str, + name: str = None): + self.output_type = output_type + self.name = name + + @classmethod + def from_string(cls, s: str) -> 'OutputType': + if ':' in s: + return cls(*s.split(':')) + else: + return cls(s) + + def matches_output(self, output: Dict): + return (output.get('output_type') == self.output_type and + (self.name is None or output.get('name') == self.name)) + + def __hash__(self): + return hash(self.output_type + str(self.name)) + + def __repr__(self): + return f'{self.output_type}:{self.name}' + + + def strip_output( nb: NotebookNode, @@ -119,8 +147,8 @@ def strip_output( """ # Replace mutable defaults - drop_output_types = drop_output_types or set() - keep_output_types = keep_output_types or set() + drop_output_types = {OutputType.from_string(s) for s in drop_output_types} if drop_output_types else set() + keep_output_types = {OutputType.from_string(s) for s in keep_output_types} if keep_output_types else set() if keep_output is None and 'keep_output' in nb.metadata: keep_output = bool(nb.metadata['keep_output']) @@ -152,7 +180,7 @@ def strip_output( if not keep_output_this_cell or keep_output_types: cell['outputs'] = [output for output in cell['outputs'] if get_size(output) <= max_size - or output.get('output_type') in keep_output_types] + or any([ot.matches_output(output) for ot in keep_output_types])] # Strip the counts from the outputs that were kept if not keep_count. if not keep_count: @@ -166,7 +194,7 @@ def strip_output( if drop_output_types: cell['outputs'] = [ output for output in cell['outputs'] - if output.get('output_type') not in drop_output_types + if not any([ot.matches_output(output) for ot in drop_output_types]) ] # If keep_output_this_cell and keep_count, do nothing. diff --git a/tests/test_drop_outputs.ipynb b/tests/test_output_types.ipynb similarity index 100% rename from tests/test_drop_outputs.ipynb rename to tests/test_output_types.ipynb diff --git a/tests/test_drop_outputs.py b/tests/test_output_types.py similarity index 75% rename from tests/test_drop_outputs.py rename to tests/test_output_types.py index 80f7383..08d430f 100644 --- a/tests/test_drop_outputs.py +++ b/tests/test_output_types.py @@ -9,7 +9,7 @@ @pytest.fixture def orig_nb(): - fname = 'test_drop_outputs.ipynb' + fname = 'test_output_types.ipynb' return nbformat.read(os.path.join(directory, fname), nbformat.NO_CONVERT) def test_drop_errors(orig_nb): @@ -49,7 +49,7 @@ def test_keep_output(orig_nb): Te4st keep output types """ nb_stripped = strip_output(deepcopy(orig_nb), - keep_output=False, + keep_output=True, keep_count=False, keep_id=False, keep_output_types={'execute_result'}) @@ -61,8 +61,30 @@ def test_keep_output(orig_nb): assert len(orig_nb.cells[1].outputs) == 3 assert orig_nb.cells[1].outputs[2]['output_type'] == 'error' + print(nb_stripped.cells[2].outputs) + # All outputs should be stripped in the second cell assert len(nb_stripped.cells[1].outputs) == 0 # Third cell should have an execution output + assert len(nb_stripped.cells[2].outputs) == 1 + +def test_output_format_tags(orig_nb): + """ + Te4st keep output types + """ + nb_stripped = strip_output(deepcopy(orig_nb), + keep_output=False, + keep_count=False, + keep_id=False, + keep_output_types={'stream:stdout', 'execute_result'}) + + # No outputs in the markdown + assert not hasattr(nb_stripped.cells[0], 'outputs') + + # Stripping all but stdout should leave only the print statement + assert len(orig_nb.cells[1].outputs) == 3 + assert len(nb_stripped.cells[1].outputs) == 1 + + # Third cell should have only the execute_result assert len(nb_stripped.cells[2].outputs) == 1 \ No newline at end of file From 3071c323633818a2ed52044ff1a66a8de17ac3ab Mon Sep 17 00:00:00 2001 From: Ryan Georgi Date: Tue, 25 Nov 2025 10:23:30 -0800 Subject: [PATCH 08/15] Adding suggested changes 1. Reflowing Readme to 80 cols, and adding notes on usage 2. Reworking output type filter as method 3. Cleaning up some leftover debug --- README.md | 14 ++++++++--- nbstripout/_utils.py | 48 +++++++++++++++----------------------- tests/test_output_types.py | 19 +++++++-------- 3 files changed, 39 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 2e2a869..3eb41fe 100644 --- a/README.md +++ b/README.md @@ -325,9 +325,11 @@ Do not strip the output, only metadata: nbstripout --keep-output -##### Output Types +#### Output Types -When keeping the output, drop a specific [`output_type`](https://ipython.readthedocs.io/en/3.x/notebook/nbformat.html#code-cell-outputs), like `error` or `stream` +When keeping the output, drop a specific [ +`output_type`](https://ipython.readthedocs.io/en/3.x/notebook/nbformat.html#code-cell-outputs), +like `error` or `stream` nbstripout --drop-output-type error stream @@ -335,10 +337,16 @@ Drop all output except specific output types: nbstripout --keep-output-type execute_result -For stripping certain outputs that have names (like `stream` which can be `stderr` or `stdout`) you can use a colon to specify the name. The following would strip all `stderr` output. +_**Note: `--keep-output-type` will override `--max-size` for outputs that match.**_ + +For stripping certain outputs that have names (like `stream` which can be +`stderr` or `stdout`) you can use a colon to specify the name. The following +would strip all `stderr` output. nbstripout --drop-output-type stream:stderr +`stream:stdout` would strip all `stdout` output, including print statements. + ##### Cell IDs Do not reassign the cell ids to be sequential (which is the default behavior): diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index 9df9d36..fedbbd6 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -1,8 +1,7 @@ +import re from collections import defaultdict import sys from typing import Any, Callable, Iterator, List, Optional, Set, Dict -import logging -logger = logging.getLogger(__name__) from nbformat import NotebookNode @@ -96,34 +95,25 @@ def strip_zeppelin_output(nb: dict) -> dict: cell['results'] = {} return nb -class OutputType: - output_type: str - name: str - - def __init__(self, output_type: str, - name: str = None): - self.output_type = output_type - self.name = name - - @classmethod - def from_string(cls, s: str) -> 'OutputType': - if ':' in s: - return cls(*s.split(':')) - else: - return cls(s) - - def matches_output(self, output: Dict): - return (output.get('output_type') == self.output_type and - (self.name is None or output.get('name') == self.name)) +def match_output_type(output: Dict, + output_type: str) -> bool: + """ + Take the `output_type` string, and return whether the output matches. - def __hash__(self): - return hash(self.output_type + str(self.name)) + Currently, supported formats are `output_type:name` or `output_type`. - def __repr__(self): - return f'{self.output_type}:{self.name}' + :param output: The output dictionary from a notebook cell. + :param output_type: User-provided string to match against. + """ + # Check if the ':' format is used, and if so, split into output_type and name + name = None + if ':' in output_type: + output_type, name = re.search(r'^(.*?):(.*)$', output_type).groups() + return (output.get('output_type') == output_type + and (name is None or output.get('name') == name)) def strip_output( nb: NotebookNode, @@ -147,8 +137,8 @@ def strip_output( """ # Replace mutable defaults - drop_output_types = {OutputType.from_string(s) for s in drop_output_types} if drop_output_types else set() - keep_output_types = {OutputType.from_string(s) for s in keep_output_types} if keep_output_types else set() + drop_output_types = drop_output_types or set() + keep_output_types = keep_output_types or set() if keep_output is None and 'keep_output' in nb.metadata: keep_output = bool(nb.metadata['keep_output']) @@ -180,7 +170,7 @@ def strip_output( if not keep_output_this_cell or keep_output_types: cell['outputs'] = [output for output in cell['outputs'] if get_size(output) <= max_size - or any([ot.matches_output(output) for ot in keep_output_types])] + or any(match_output_type(output, ot) for ot in keep_output_types)] # Strip the counts from the outputs that were kept if not keep_count. if not keep_count: @@ -194,7 +184,7 @@ def strip_output( if drop_output_types: cell['outputs'] = [ output for output in cell['outputs'] - if not any([ot.matches_output(output) for ot in drop_output_types]) + if not any(match_output_type(output, ot) for ot in drop_output_types) ] # If keep_output_this_cell and keep_count, do nothing. diff --git a/tests/test_output_types.py b/tests/test_output_types.py index 08d430f..58a963f 100644 --- a/tests/test_output_types.py +++ b/tests/test_output_types.py @@ -9,10 +9,15 @@ @pytest.fixture def orig_nb(): + # Grab the original notebook fname = 'test_output_types.ipynb' return nbformat.read(os.path.join(directory, fname), nbformat.NO_CONVERT) def test_drop_errors(orig_nb): + """ + Confirm that --drop-output-types works as expected, + when asking just to drop `error` outputs. + """ nb_stripped = strip_output(deepcopy(orig_nb), keep_output=True, keep_count=False, @@ -39,14 +44,10 @@ def test_drop_errors(orig_nb): assert len(nb_stripped.cells[2].outputs) == 1 assert nb_stripped.cells[2].outputs[0]['output_type'] == 'execute_result' - # Should be an error in the original cell, but not in the output - # assert orig_nb.cells[1].outputs[0]['output_type'] == 'error' - # print(nb_stripped.cells[1].outputs) - - def test_keep_output(orig_nb): """ - Te4st keep output types + Confirm that --keep-output-types works as expected, + dropping all but `execute_result` outputs from the notebook. """ nb_stripped = strip_output(deepcopy(orig_nb), keep_output=True, @@ -61,8 +62,6 @@ def test_keep_output(orig_nb): assert len(orig_nb.cells[1].outputs) == 3 assert orig_nb.cells[1].outputs[2]['output_type'] == 'error' - print(nb_stripped.cells[2].outputs) - # All outputs should be stripped in the second cell assert len(nb_stripped.cells[1].outputs) == 0 @@ -71,8 +70,8 @@ def test_keep_output(orig_nb): def test_output_format_tags(orig_nb): """ - Te4st keep output types - """ + Confirm that both : and formats work. + """ nb_stripped = strip_output(deepcopy(orig_nb), keep_output=False, keep_count=False, From 7dd1d4a29174a8677046e27764c8efa0b5f719e2 Mon Sep 17 00:00:00 2001 From: Ryan Date: Mon, 1 Dec 2025 12:36:27 -0800 Subject: [PATCH 09/15] Update nbstripout/_utils.py Co-authored-by: Florian Rathgeber --- nbstripout/_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index fedbbd6..bc125ca 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -178,8 +178,6 @@ def strip_output( if 'execution_count' in output: output['execution_count'] = None - - # Remove specific output types if drop_output_types: cell['outputs'] = [ From 7ed6dbad995fc1743bc9636dd8b8bc55627f1102 Mon Sep 17 00:00:00 2001 From: Ryan Date: Mon, 1 Dec 2025 12:38:10 -0800 Subject: [PATCH 10/15] Update nbstripout/_utils.py Co-authored-by: Florian Rathgeber --- nbstripout/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index bc125ca..707c585 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -110,7 +110,7 @@ def match_output_type(output: Dict, name = None if ':' in output_type: - output_type, name = re.search(r'^(.*?):(.*)$', output_type).groups() + output_type, name = output_type.split(':') return (output.get('output_type') == output_type and (name is None or output.get('name') == name)) From 3400342b2ba7aebbd6cef1da4d3266818a65fd0a Mon Sep 17 00:00:00 2001 From: Ryan Date: Mon, 1 Dec 2025 12:38:17 -0800 Subject: [PATCH 11/15] Update README.md Co-authored-by: Florian Rathgeber --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3eb41fe..c726d2f 100644 --- a/README.md +++ b/README.md @@ -353,7 +353,7 @@ Do not reassign the cell ids to be sequential (which is the default behavior): nbstripout --keep-id -##### Keeping Output on Specific Cells +#### Keeping Output on Specific Cells To mark special cells so that the output is not stripped, you can either: From 8e44e3c7e33507654a257a584067747351b4d08d Mon Sep 17 00:00:00 2001 From: Ryan Date: Mon, 1 Dec 2025 12:38:28 -0800 Subject: [PATCH 12/15] Update README.md Co-authored-by: Florian Rathgeber --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c726d2f..ecc1b1e 100644 --- a/README.md +++ b/README.md @@ -347,7 +347,7 @@ would strip all `stderr` output. `stream:stdout` would strip all `stdout` output, including print statements. -##### Cell IDs +#### Cell IDs Do not reassign the cell ids to be sequential (which is the default behavior): From 0b5c608646ea78c19294d90785f5e803914fb27b Mon Sep 17 00:00:00 2001 From: Ryan Georgi Date: Mon, 1 Dec 2025 12:40:07 -0800 Subject: [PATCH 13/15] Add trailing newline --- tests/test_output_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_output_types.py b/tests/test_output_types.py index 58a963f..748950b 100644 --- a/tests/test_output_types.py +++ b/tests/test_output_types.py @@ -86,4 +86,4 @@ def test_output_format_tags(orig_nb): assert len(nb_stripped.cells[1].outputs) == 1 # Third cell should have only the execute_result - assert len(nb_stripped.cells[2].outputs) == 1 \ No newline at end of file + assert len(nb_stripped.cells[2].outputs) == 1 From f22da39cb45e40bc0d3ff0822d414c3a0562cbe7 Mon Sep 17 00:00:00 2001 From: Ryan Date: Mon, 1 Dec 2025 12:40:39 -0800 Subject: [PATCH 14/15] Update README.md Co-authored-by: Florian Rathgeber --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ecc1b1e..6e37440 100644 --- a/README.md +++ b/README.md @@ -327,8 +327,8 @@ Do not strip the output, only metadata: #### Output Types -When keeping the output, drop a specific [ -`output_type`](https://ipython.readthedocs.io/en/3.x/notebook/nbformat.html#code-cell-outputs), +When keeping the output, drop a specific +[`output_type`](https://ipython.readthedocs.io/en/3.x/notebook/nbformat.html#code-cell-outputs), like `error` or `stream` nbstripout --drop-output-type error stream From 78f4013976c1d18c97b53d6880ed4c61c333edac Mon Sep 17 00:00:00 2001 From: Ryan Georgi Date: Sat, 13 Dec 2025 10:12:35 -0800 Subject: [PATCH 15/15] Remove unneeded import --- nbstripout/_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index 707c585..0f9d652 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -1,4 +1,3 @@ -import re from collections import defaultdict import sys from typing import Any, Callable, Iterator, List, Optional, Set, Dict