diff --git a/fiftyone/core/collections.py b/fiftyone/core/collections.py index 38eea76422f..1495b2808ec 100644 --- a/fiftyone/core/collections.py +++ b/fiftyone/core/collections.py @@ -7680,6 +7680,22 @@ def count(self, field_or_expr=None, expr=None, safe=False): Returns: the count """ + + # Optimization: use estimated document count when possible + if self._is_full_collection() and ( + expr is None + and ( + field_or_expr is None + or ( + etau.is_str(field_or_expr) + and field_or_expr == "frames" + and self._has_frame_fields() + ) + ) + ): + frames = field_or_expr == "frames" + return self._dataset._estimated_count(frames=frames) + make = lambda field_or_expr: foa.Count( field_or_expr, expr=expr, safe=safe ) @@ -10556,6 +10572,22 @@ def _has_frame_fields(self): def _handle_id_fields(self, field_name): return _handle_id_fields(self, field_name) + def _is_full_collection(self): + if isinstance(self, fod.Dataset) and self.media_type != fom.GROUP: + return True + + # pylint:disable=no-member + if ( + isinstance(self, fov.DatasetView) + and self._dataset.media_type == fom.GROUP + and len(self._stages) == 1 + and isinstance(self._stages[0], fos.SelectGroupSlices) + and self._pipeline() == [] + ): + return True + + return False + def _is_label_field(self, field_name, label_type_or_types): try: label_type = self._get_label_field_type(field_name) diff --git a/fiftyone/core/dataset.py b/fiftyone/core/dataset.py index 304346778f2..559cd9af69e 100644 --- a/fiftyone/core/dataset.py +++ b/fiftyone/core/dataset.py @@ -360,6 +360,15 @@ def __deepcopy__(self, memo): def __len__(self): return self.count() + def _estimated_count(self, frames=False): + if frames: + if self._frame_collection is None: + return None + + return self._frame_collection.estimated_document_count() + + return self._sample_collection.estimated_document_count() + def __getitem__(self, id_filepath_slice): if isinstance(id_filepath_slice, numbers.Integral): raise ValueError(