static performance output

dougbrn · dougbrn · commit 5e77bb4b4dc6 · 2025-04-30T11:26:11.000-07:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -21,6 +21,7 @@ repos:
         stages: [pre-commit]
         language: system
         entry: jupyter nbconvert --clear-output
+        exclude: docs/about/performance.ipynb
     # Prevents committing directly branches named 'main' and 'master'.
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.4.0
diff --git a/docs/about/performance.ipynb b/docs/about/performance.ipynb
@@ -11,7 +11,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,17 +30,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "494 ms ± 3.34 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%timeit\n",
     "\n",
@@ -50,16 +42,16 @@
     "\n",
     "# Filter on object\n",
     "filtered_object = object_df.query(\"ra > 10.0\")\n",
-    "#sync object to source --removes any index values of source not found in object\n",
+    "# sync object to source --removes any index values of source not found in object\n",
     "filtered_source = filtered_object[[]].join(source_df, how=\"left\")\n",
     "\n",
     "# Count number of observations per photometric band and add it to the object table\n",
-    "band_counts = source_df.groupby(level=0).apply(lambda x: \n",
-    "                                               x[[\"band\"]].value_counts().reset_index()).pivot_table(values=\"count\", \n",
-    "                                                                                                     index=\"index\", \n",
-    "                                                                                                     columns=\"band\", \n",
-    "                                                                                                     aggfunc=\"sum\")\n",
-    "filtered_object = filtered_object.join(band_counts[[\"g\",\"r\"]])\n",
+    "band_counts = (\n",
+    "    source_df.groupby(level=0)\n",
+    "    .apply(lambda x: x[[\"band\"]].value_counts().reset_index())\n",
+    "    .pivot_table(values=\"count\", index=\"index\", columns=\"band\", aggfunc=\"sum\")\n",
+    ")\n",
+    "filtered_object = filtered_object.join(band_counts[[\"g\", \"r\"]])\n",
     "\n",
     "# Filter on our nobs\n",
     "filtered_object = filtered_object.query(\"g > 520\")\n",
@@ -81,28 +73,20 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "230 ms ± 2.81 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%timeit\n",
     "\n",
-    "#Read in parquet data\n",
-    "#nesting sources into objects\n",
-    "nf = npd.read_parquet(data=\"objects.parquet\",\n",
-    "                  to_pack={\"ztf_sources\": \"ztf_sources.parquet\"})\n",
+    "# Read in parquet data\n",
+    "# nesting sources into objects\n",
+    "nf = npd.read_parquet(data=\"objects.parquet\", to_pack={\"ztf_sources\": \"ztf_sources.parquet\"})\n",
     "\n",
     "# Filter on object\n",
     "nf = nf.query(\"ra > 10.0\")\n",
     "\n",
     "# Count number of observations per photometric band and add it as a column\n",
-    "from nested_pandas.utils import count_nested # utility function of nested_pandas\n",
+    "from nested_pandas.utils import count_nested  # utility function of nested_pandas\n",
+    "\n",
     "nf = count_nested(nf, \"ztf_sources\", by=\"band\", join=True)\n",
     "\n",
     "# Filter on our nobs\n",