From 7ed59b412f00dfd105b9c4b7444dc5714764ec93 Mon Sep 17 00:00:00 2001 From: brimoor Date: Thu, 16 Jan 2025 20:37:50 -0500 Subject: [PATCH 1/2] use estimated document count when possible --- fiftyone/core/collections.py | 14 ++++++++++++++ fiftyone/core/dataset.py | 9 +++++++++ 2 files changed, 23 insertions(+) diff --git a/fiftyone/core/collections.py b/fiftyone/core/collections.py index c8314b64e92..65629960c07 100644 --- a/fiftyone/core/collections.py +++ b/fiftyone/core/collections.py @@ -7726,6 +7726,20 @@ def count(self, field_or_expr=None, expr=None, safe=False): Returns: the count """ + + # Optimization: use estimated document count when possible + if ( + isinstance(self, fod.Dataset) + and expr is None + and ( + field_or_expr is None + or (field_or_expr == "frames" and self._has_frame_fields()) + ) + ): + frames = field_or_expr == "frames" + # pylint: disable=no-member + return self._estimated_count(frames=frames) + make = lambda field_or_expr: foa.Count( field_or_expr, expr=expr, safe=safe ) diff --git a/fiftyone/core/dataset.py b/fiftyone/core/dataset.py index c385a7e66bb..3213402da1c 100644 --- a/fiftyone/core/dataset.py +++ b/fiftyone/core/dataset.py @@ -360,6 +360,15 @@ def __deepcopy__(self, memo): def __len__(self): return self.count() + def _estimated_count(self, frames=False): + if frames: + if self._frame_collection is None: + return None + + return self._frame_collection.estimated_document_count() + + return self._sample_collection.estimated_document_count() + def __getitem__(self, id_filepath_slice): if isinstance(id_filepath_slice, numbers.Integral): raise ValueError( From d11868cb1e37fa2f6a06264a3895cc728879f131 Mon Sep 17 00:00:00 2001 From: brimoor Date: Fri, 17 Jan 2025 23:27:32 -0500 Subject: [PATCH 2/2] support grouped collections --- fiftyone/core/collections.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/fiftyone/core/collections.py b/fiftyone/core/collections.py index 65629960c07..3170dbf74bb 100644 --- a/fiftyone/core/collections.py +++ b/fiftyone/core/collections.py @@ -7728,17 +7728,19 @@ def count(self, field_or_expr=None, expr=None, safe=False): """ # Optimization: use estimated document count when possible - if ( - isinstance(self, fod.Dataset) - and expr is None + if self._is_full_collection() and ( + expr is None and ( field_or_expr is None - or (field_or_expr == "frames" and self._has_frame_fields()) + or ( + etau.is_str(field_or_expr) + and field_or_expr == "frames" + and self._has_frame_fields() + ) ) ): frames = field_or_expr == "frames" - # pylint: disable=no-member - return self._estimated_count(frames=frames) + return self._dataset._estimated_count(frames=frames) make = lambda field_or_expr: foa.Count( field_or_expr, expr=expr, safe=safe @@ -10616,6 +10618,22 @@ def _has_frame_fields(self): def _handle_id_fields(self, field_name): return _handle_id_fields(self, field_name) + def _is_full_collection(self): + if isinstance(self, fod.Dataset) and self.media_type != fom.GROUP: + return True + + # pylint:disable=no-member + if ( + isinstance(self, fov.DatasetView) + and self._dataset.media_type == fom.GROUP + and len(self._stages) == 1 + and isinstance(self._stages[0], fos.SelectGroupSlices) + and self._pipeline() == [] + ): + return True + + return False + def _is_label_field(self, field_name, label_type_or_types): try: label_type = self._get_label_field_type(field_name)