From d0a187352b93ffa644491dc01f5eaa2da5edeb0e Mon Sep 17 00:00:00 2001
From: Gavin Medley <gavin.medley@lasp.colorado.edu>
Date: Fri, 7 Mar 2025 17:40:23 -0700
Subject: [PATCH] Add spp.create_packet_list function

---
 docs/source/changelog.md               |  1 +
 space_packet_parser/__init__.py        | 40 +++++++++++++++-
 space_packet_parser/xarr.py            | 66 +++++++++++++-------------
 tests/integration/test_xarr.py         |  5 +-
 tests/unit/test_space_packet_parser.py | 24 ++++++++--
 5 files changed, 96 insertions(+), 40 deletions(-)

diff --git a/docs/source/changelog.md b/docs/source/changelog.md
index 92fb988..b886d19 100644
--- a/docs/source/changelog.md
+++ b/docs/source/changelog.md
@@ -11,6 +11,7 @@ Release notes for the `space_packet_parser` library
 - *BREAKING*: Reorganization of the project into different submodules for more explicit handling
   of imports. There is now an `space_packet_parser.xtce` module with xtce representations separated
   into modules underneath that.
+- Add `spp.create_packet_list` function to directly create a list from packet files and a definition.
 - Add support for creating a packet definition from Python objects and serializing it as XML.
 - BUGFIX: Fix kbps calculation in packet generator for showing progress.
 - Add support for string and float encoded enumerated lookup parameters.
diff --git a/space_packet_parser/__init__.py b/space_packet_parser/__init__.py
index e27e9dd..9623b52 100644
--- a/space_packet_parser/__init__.py
+++ b/space_packet_parser/__init__.py
@@ -1,4 +1,5 @@
 """Space Packet Parser"""
+from collections.abc import Iterable
 from pathlib import Path
 from typing import Union
 
@@ -15,6 +16,43 @@ def load_xtce(filename: Union[str, Path]) -> XtcePacketDefinition:
 
     Returns
     -------
-    : definitions.XtcePacketDefinition
+    : XtcePacketDefinition
     """
     return XtcePacketDefinition.from_xtce(filename)
+
+
+def create_packet_list(
+        packet_files: Union[str, Path, Iterable[Union[str, Path]]],
+        xtce_packet_definition: Union[str, Path, XtcePacketDefinition],
+        **packet_generator_kwargs: any
+):
+    """Directly create a list of Packet objects directly from one or more binary files and an XTCE definition
+
+    Parameters
+    ----------
+    packet_files : Union[str, Path, Iterable[Union[str, Path]]]
+        Packet files
+    xtce_packet_definition : Union[str, Path, xtce.definitions.XtcePacketDefinition]
+        Packet definition for parsing the packet data
+    packet_generator_kwargs : Optional[dict]
+        Keyword arguments passed to `XtcePacketDefinition.packet_generator()`
+
+    Returns
+    -------
+    : list[packets.Packet]
+        List of parsed Packet objects. Can be used like a list of dictionaries.
+    """
+    packet_generator_kwargs = packet_generator_kwargs or {}
+
+    if not isinstance(xtce_packet_definition, XtcePacketDefinition):
+        xtce_packet_definition = XtcePacketDefinition.from_xtce(xtce_packet_definition)
+
+    if isinstance(packet_files, (str, Path)):
+        packet_files = [packet_files]
+
+    packet_list = []
+    for packet_file in packet_files:
+        with open(packet_file, "rb") as f:
+            packet_list += list(xtce_packet_definition.packet_generator(f, **packet_generator_kwargs))
+
+    return packet_list
diff --git a/space_packet_parser/xarr.py b/space_packet_parser/xarr.py
index f9471de..17652a1 100644
--- a/space_packet_parser/xarr.py
+++ b/space_packet_parser/xarr.py
@@ -155,9 +155,9 @@ def create_dataset(
         packet_files = [packet_files]
 
     # Set up containers to store our data
-    # We are getting a packet file that may contain multiple apids
-    # Each apid is expected to contain consistent data fields, so we want to create a
-    # dataset per apid.
+    # We are getting a packet file that may contain multiple APIDs
+    # Each APID is expected to contain consistent data fields, so we want to create a
+    # dataset per APID.
     # {apid1: dataset1, apid2: dataset2, ...}
     data_dict: dict[int, dict] = {}
     # Also keep track of the datatype mapping for each field
@@ -167,38 +167,38 @@ def create_dataset(
 
     for packet_file in packet_files:
         with open(packet_file, "rb") as f:
-            packet_generator = list(xtce_packet_definition.packet_generator(f, **packet_generator_kwargs))
-
-        for packet in packet_generator:
-            apid = packet.raw_data.apid
-            if apid not in data_dict:
-                # This is the first packet for this APID
-                data_dict[apid] = collections.defaultdict(list)
-                datatype_mapping[apid] = {}
-                variable_mapping[apid] = packet.keys()
-
-            if variable_mapping[apid] != packet.keys():
-                raise ValueError(
-                    f"Packet fields do not match for APID {apid}. This could be "
-                    f"due to a conditional (polymorphic) packet definition in the XTCE, while this "
-                    f"function currently only supports flat packet definitions."
-                    f"\nExpected: {variable_mapping[apid]},\ngot: {list(packet.keys())}"
-                )
-
-            for key, value in packet.items():
-                if use_raw_values:
-                    # Use the derived value if it exists, otherwise use the raw value
-                    val = value.raw_value
-                else:
-                    val = value
-
-                data_dict[apid][key].append(val)
-                if key not in datatype_mapping[apid]:
-                    # Add this datatype to the mapping
-                    datatype_mapping[apid][key] = _get_minimum_numpy_datatype(
-                        key, xtce_packet_definition, use_raw_value=use_raw_values
+            packet_generator = xtce_packet_definition.packet_generator(f, **packet_generator_kwargs)
+
+            for packet in packet_generator:
+                apid = packet.raw_data.apid
+                if apid not in data_dict:
+                    # This is the first packet for this APID
+                    data_dict[apid] = collections.defaultdict(list)
+                    datatype_mapping[apid] = {}
+                    variable_mapping[apid] = set(packet.keys())
+
+                if variable_mapping[apid] != packet.keys():
+                    raise ValueError(
+                        f"Packet fields do not match for APID {apid}. This could be "
+                        f"due to a conditional (polymorphic) packet definition in the XTCE, while this "
+                        f"function currently only supports flat packet definitions."
+                        f"\nExpected: {variable_mapping[apid]},\ngot: {list(packet.keys())}"
                     )
 
+                for key, value in packet.items():
+                    if use_raw_values:
+                        # Use the derived value if it exists, otherwise use the raw value
+                        val = value.raw_value
+                    else:
+                        val = value
+
+                    data_dict[apid][key].append(val)
+                    if key not in datatype_mapping[apid]:
+                        # Add this datatype to the mapping
+                        datatype_mapping[apid][key] = _get_minimum_numpy_datatype(
+                            key, xtce_packet_definition, use_raw_value=use_raw_values
+                        )
+
     # Turn the dict into an xarray dataset
     dataset_by_apid = {}
 
diff --git a/tests/integration/test_xarr.py b/tests/integration/test_xarr.py
index 501acf6..7baaee2 100644
--- a/tests/integration/test_xarr.py
+++ b/tests/integration/test_xarr.py
@@ -44,8 +44,7 @@ def test_create_xarray_dataset_ctim(ctim_test_data_dir, caplog):
     """CTIM data contains many APIDs"""
     packet_file = ctim_test_data_dir / "ccsds_2021_155_14_39_51"
     definition_file = ctim_test_data_dir / "ctim_xtce_v1.xml"
-    ds = create_dataset(packet_file, definition_file, root_container_name="CCSDSTelemetryPacket", parse_bad_pkts=False)
-    print(ds)
+    _ = create_dataset(packet_file, definition_file, root_container_name="CCSDSTelemetryPacket", parse_bad_pkts=False)
 
 
 def test_create_xarray_dataset_suda(suda_test_data_dir):
@@ -54,4 +53,4 @@ def test_create_xarray_dataset_suda(suda_test_data_dir):
     definition_file = suda_test_data_dir / "suda_combined_science_definition.xml"
     # SUDA has a polymorphic packet structure
     with pytest.raises(ValueError, match="Packet fields do not match for APID 1425"):
-        create_dataset(packet_file, definition_file, skip_header_bytes=4)
+        _ = create_dataset(packet_file, definition_file, skip_header_bytes=4)
diff --git a/tests/unit/test_space_packet_parser.py b/tests/unit/test_space_packet_parser.py
index 2be8987..9a0f15b 100644
--- a/tests/unit/test_space_packet_parser.py
+++ b/tests/unit/test_space_packet_parser.py
@@ -1,15 +1,33 @@
 """Tests for main space_packet_parser.__init__ module"""
-import space_packet_parser
+import space_packet_parser as spp
 from space_packet_parser.xtce import definitions
 
 
 def test_load_xtce(jpss_test_data_dir, tmp_path):
     """Test high level function for loading an XTCE definition file"""
-    xtcedef = space_packet_parser.load_xtce(jpss_test_data_dir / "jpss1_geolocation_xtce_v1.xml")
+    xtcedef = spp.load_xtce(jpss_test_data_dir / "jpss1_geolocation_xtce_v1.xml")
     assert isinstance(xtcedef, definitions.XtcePacketDefinition)
 
     outpath = tmp_path / "test_output.xml"
     xtcedef.write_xml(outpath)
     assert outpath.exists()
 
-    assert space_packet_parser.load_xtce(outpath) == xtcedef
+    assert spp.load_xtce(outpath) == xtcedef
+
+
+def test_create_packet_list(jpss_test_data_dir):
+    """Test directly creating a list of Packets from a data file and a definition"""
+    jpss_packets = jpss_test_data_dir / "J01_G011_LZ_2021-04-09T00-00-00Z_V01.DAT1"
+    jpss_xtce = jpss_test_data_dir / "jpss1_geolocation_xtce_v1.xml"
+
+    # Single file
+    packet_list = spp.create_packet_list(jpss_packets, jpss_xtce)
+    assert len(packet_list) == 7200
+    assert packet_list[0]["PKT_APID"] == 11
+    assert packet_list[-1]["PKT_APID"] == 11
+
+    # Multiple files
+    packet_list = spp.create_packet_list([jpss_packets, jpss_packets], jpss_xtce)
+    assert len(packet_list) == 14400
+    assert packet_list[0]["PKT_APID"] == 11
+    assert packet_list[-1]["PKT_APID"] == 11