|
11 | 11 | }, |
12 | 12 | { |
13 | 13 | "cell_type": "code", |
14 | | - "execution_count": 1, |
| 14 | + "execution_count": null, |
15 | 15 | "metadata": {}, |
16 | 16 | "outputs": [], |
17 | 17 | "source": [ |
|
30 | 30 | }, |
31 | 31 | { |
32 | 32 | "cell_type": "code", |
33 | | - "execution_count": 2, |
| 33 | + "execution_count": null, |
34 | 34 | "metadata": {}, |
35 | | - "outputs": [ |
36 | | - { |
37 | | - "name": "stdout", |
38 | | - "output_type": "stream", |
39 | | - "text": [ |
40 | | - "494 ms ± 3.34 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" |
41 | | - ] |
42 | | - } |
43 | | - ], |
| 35 | + "outputs": [], |
44 | 36 | "source": [ |
45 | 37 | "%%timeit\n", |
46 | 38 | "\n", |
|
50 | 42 | "\n", |
51 | 43 | "# Filter on object\n", |
52 | 44 | "filtered_object = object_df.query(\"ra > 10.0\")\n", |
53 | | - "#sync object to source --removes any index values of source not found in object\n", |
| 45 | + "# sync object to source --removes any index values of source not found in object\n", |
54 | 46 | "filtered_source = filtered_object[[]].join(source_df, how=\"left\")\n", |
55 | 47 | "\n", |
56 | 48 | "# Count number of observations per photometric band and add it to the object table\n", |
57 | | - "band_counts = source_df.groupby(level=0).apply(lambda x: \n", |
58 | | - " x[[\"band\"]].value_counts().reset_index()).pivot_table(values=\"count\", \n", |
59 | | - " index=\"index\", \n", |
60 | | - " columns=\"band\", \n", |
61 | | - " aggfunc=\"sum\")\n", |
62 | | - "filtered_object = filtered_object.join(band_counts[[\"g\",\"r\"]])\n", |
| 49 | + "band_counts = (\n", |
| 50 | + " source_df.groupby(level=0)\n", |
| 51 | + " .apply(lambda x: x[[\"band\"]].value_counts().reset_index())\n", |
| 52 | + " .pivot_table(values=\"count\", index=\"index\", columns=\"band\", aggfunc=\"sum\")\n", |
| 53 | + ")\n", |
| 54 | + "filtered_object = filtered_object.join(band_counts[[\"g\", \"r\"]])\n", |
63 | 55 | "\n", |
64 | 56 | "# Filter on our nobs\n", |
65 | 57 | "filtered_object = filtered_object.query(\"g > 520\")\n", |
|
81 | 73 | "cell_type": "code", |
82 | 74 | "execution_count": null, |
83 | 75 | "metadata": {}, |
84 | | - "outputs": [ |
85 | | - { |
86 | | - "name": "stdout", |
87 | | - "output_type": "stream", |
88 | | - "text": [ |
89 | | - "230 ms ± 2.81 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" |
90 | | - ] |
91 | | - } |
92 | | - ], |
| 76 | + "outputs": [], |
93 | 77 | "source": [ |
94 | 78 | "%%timeit\n", |
95 | 79 | "\n", |
96 | | - "#Read in parquet data\n", |
97 | | - "#nesting sources into objects\n", |
98 | | - "nf = npd.read_parquet(data=\"objects.parquet\",\n", |
99 | | - " to_pack={\"ztf_sources\": \"ztf_sources.parquet\"})\n", |
| 80 | + "# Read in parquet data\n", |
| 81 | + "# nesting sources into objects\n", |
| 82 | + "nf = npd.read_parquet(data=\"objects.parquet\", to_pack={\"ztf_sources\": \"ztf_sources.parquet\"})\n", |
100 | 83 | "\n", |
101 | 84 | "# Filter on object\n", |
102 | 85 | "nf = nf.query(\"ra > 10.0\")\n", |
103 | 86 | "\n", |
104 | 87 | "# Count number of observations per photometric band and add it as a column\n", |
105 | | - "from nested_pandas.utils import count_nested # utility function of nested_pandas\n", |
| 88 | + "from nested_pandas.utils import count_nested # utility function of nested_pandas\n", |
| 89 | + "\n", |
106 | 90 | "nf = count_nested(nf, \"ztf_sources\", by=\"band\", join=True)\n", |
107 | 91 | "\n", |
108 | 92 | "# Filter on our nobs\n", |
|
0 commit comments