From 56f4571d1d0604bcc28a0094ab4b09258541354a Mon Sep 17 00:00:00 2001 From: Michael Morisi Date: Wed, 18 Sep 2024 10:35:03 -0400 Subject: [PATCH 1/6] DOCSP-43088: Aggregations --- source/aggregation.txt | 189 ++++++++++++++++++++++ source/includes/aggregation/aggregation.c | 77 +++++++++ source/index.txt | 14 +- 3 files changed, 271 insertions(+), 9 deletions(-) create mode 100644 source/aggregation.txt create mode 100644 source/includes/aggregation/aggregation.c diff --git a/source/aggregation.txt b/source/aggregation.txt new file mode 100644 index 00000000..10854689 --- /dev/null +++ b/source/aggregation.txt @@ -0,0 +1,189 @@ +.. _c-aggregation: + +==================================== +Transform Your Data with Aggregation +==================================== + +.. facet:: + :name: genre + :values: reference + +.. meta:: + :keywords: code example, transform, computed, pipeline + :description: Learn how to use the C driver to perform aggregation operations. + +.. contents:: On this page + :local: + :backlinks: none + :depth: 2 + :class: singlecol + +Overview +-------- + +In this guide, you can learn how to use the {+driver-short+} to perform +**aggregation operations**. + +You can use aggregation operations to process data in your MongoDB collections and +return computed results. The MongoDB Aggregation framework, which is +part of the Query API, is modeled on the concept of a data processing +pipeline. Documents enter a pipeline that contains one or more stages, +and each stage transforms the documents to output a final aggregated result. + +You can think of an aggregation operation as similar to a car factory. A car factory has +an assembly line, which contains assembly stations with specialized +tools to do specific jobs, like drills and welders. Raw parts enter the +factory, and then the assembly line transforms and assembles them into a +finished product. + +The **aggregation pipeline** is the assembly line, **aggregation stages** are the +assembly stations, and **operator expressions** are the +specialized tools. + +Compare Aggregation and Find Operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can use find operations to perform the following actions: + +- Select which documents to return +- Select which fields to return +- Sort the results + +You can use aggregation operations to perform the following actions: + +- Perform find operations +- Rename fields +- Calculate fields +- Summarize data +- Group values + +Limitations +~~~~~~~~~~~ + +The following limitations apply when using aggregation operations: + +- Returned documents must not violate the + :manual:`BSON document size limit ` + of 16 megabytes. +- Pipeline stages have a memory limit of 100 megabytes by default. You can exceed this + limit by setting the ``allowDiskUse`` option to ``true``. + +.. important:: $graphLookup exception + + The :manual:`$graphLookup + ` stage has a strict + memory limit of 100 megabytes and ignores the ``allowDiskUse`` option. + +Aggregation Example +------------------- + +The examples in this section use the ``restaurants`` collection in the ``sample_restaurants`` +database from the :atlas:`Atlas sample datasets `. To learn how to create a +free MongoDB Atlas cluster and load the sample datasets, see the +:atlas:`Get Started with Atlas ` guide. + +Build and Execute an Aggregation Pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To perform an aggregation on the documents in a collection, pass a ``bson_t`` structure +that represents the pipeline stages to the ``mongoc_collection_aggregate()`` function. + +This example outputs a count of the number of bakeries in each borough +of New York City. The following code creates aggregation pipeline that contains the +following stages: + +- A :manual:`$match ` stage to filter for documents + in which the value of the ``cuisine`` field is ``"Bakery"``. + +- A :manual:`$group ` stage to group the matching + documents by the ``borough`` field, producing a count of documents for each distinct + value of that field. + +.. io-code-block:: + + .. input:: /includes/aggregation/aggregation.c + :language: c + :start-after: start-aggregation-pipeline + :end-before: end-aggregation-pipeline + :dedent: + + .. output:: + :visible: false + + { "_id" : "Queens", "count" : { "$numberInt" : "204" } } + { "_id" : "Staten Island", "count" : { "$numberInt" : "20" } } + { "_id" : "Missing", "count" : { "$numberInt" : "2" } } + { "_id" : "Bronx", "count" : { "$numberInt" : "71" } } + { "_id" : "Brooklyn", "count" : { "$numberInt" : "173" } } + { "_id" : "Manhattan", "count" : { "$numberInt" : "221" } } + +Explain an Aggregation +~~~~~~~~~~~~~~~~~~~~~~ + +To view information about how MongoDB executes your operation, you can +enable the ``explain`` option on your pipeline. When MongoDB explains an +operation, it returns **execution plans** and performance statistics. An execution +plan is a potential way MongoDB can complete an operation. +When you instruct MongoDB to explain an operation, it returns both the +plan MongoDB selected for the operation and any rejected execution plans. + +The following code example runs the same aggregation shown in the preceding section, but +uses the ``mongoc_client_command_simple()`` function to run the ``explain`` operation to +output the operation details: + +.. io-code-block:: + + .. input:: /includes/aggregation/aggregation.c + :language: c + :start-after: start-aggregation-explain + :end-before: end-aggregation-explain + :dedent: + + .. output:: + :visible: false + + { + "explainVersion": "2", + "queryPlanner": { + "namespace": "sample_restaurants.restaurants" + "indexFilterSet": false, + "parsedQuery": { + "cuisine": {"$eq": "Bakery"} + }, + "queryHash": "865F14C3", + "planCacheKey": "0697561B", + "optimizedPipeline": true, + "maxIndexedOrSolutionsReached": false, + "maxIndexedAndSolutionsReached": false, + "maxScansToExplodeReached": false, + "winningPlan": { ... }, + "rejectedPlans": [] + ... + } + ... + } + +Additional Information +---------------------- + +To view a full list of expression operators, see :manual:`Aggregation +Operators ` in the {+mdb-server+} manual. + +To learn about assembling an aggregation pipeline and view examples, see +:manual:`Aggregation Pipeline ` in the {+mdb-server+} manual. + +To learn more about creating pipeline stages, see :manual:`Aggregation +Stages ` in the {+mdb-server+} manual. + +To learn more about explaining MongoDB operations, see +:manual:`Explain Output ` and +:manual:`Query Plans ` in the {+mdb-server+} manual. + +API Documentation +~~~~~~~~~~~~~~~~~ + +For more information about executing aggregation operations with the {+driver-short+}, +see the following API documentation: + +- `mongoc_collection_aggregate() <{+api-libmongoc+}/mongoc_collection_aggregate.html>`__ +- `mongoc_client_command_simple() <{+api-libmongoc+}/mongoc_client_command_simple.html>`__ \ No newline at end of file diff --git a/source/includes/aggregation/aggregation.c b/source/includes/aggregation/aggregation.c new file mode 100644 index 00000000..4747580c --- /dev/null +++ b/source/includes/aggregation/aggregation.c @@ -0,0 +1,77 @@ +#include +#include +#include + +int +main (int argc, char *argv[]) +{ + mongoc_client_t *client; + mongoc_collection_t *collection; + mongoc_init (); + + client = + mongoc_client_new (""); + collection = mongoc_client_get_collection (client, "sample_restaurants", "restaurants"); + + { + // Executes an aggregation pipeline containing the $match and $group stages and prints the results + // start-aggregation-pipeline + const bson_t *doc; + bson_t *pipeline = BCON_NEW ("pipeline", + "[", + "{", "$match", "{", "cuisine", BCON_UTF8 ("Bakery"), "}", "}", + "{", "$group", "{", + "_id", BCON_UTF8 ("$borough"), "count", "{", "$sum", BCON_INT32 (1), "}", "}", + "}", + "]"); + + mongoc_cursor_t *results = + mongoc_collection_aggregate (collection, MONGOC_QUERY_NONE, pipeline, NULL, NULL); + + while (mongoc_cursor_next (results, &doc)) { + char *str = bson_as_canonical_extended_json (doc, NULL); + printf ("%s\n", str); + bson_free (str); + } + + bson_destroy (pipeline); + mongoc_cursor_destroy (results); + // end-aggregation-pipeline + } + + { + // Runs a command to explain the logic behind the aggregation + // start-aggregation-explain + bson_t reply; + bson_error_t error; + + bson_t *command = BCON_NEW ( + "aggregate", BCON_UTF8 ("restaurants"), + "explain", BCON_BOOL(true), + "pipeline", + "[", + "{", "$match", "{", "cuisine", BCON_UTF8("Bakery"), "}", "}", + "{", "$group", "{", + "_id", BCON_UTF8("$borough"), "count", "{", "$sum", BCON_INT32(1), "}", "}", + "}", + "]"); + + if (mongoc_client_command_simple (client, "sample_restaurants", command, NULL, &reply, &error)) { + char *str = bson_as_canonical_extended_json (&reply, NULL); + printf ("%s\n", str); + bson_free (str); + } else { + fprintf (stderr, "Command failed: %s\n", error.message); + } + + bson_destroy (command); + bson_destroy (&reply); + // end-aggregation-explain + } + + mongoc_collection_destroy (collection); + mongoc_client_destroy (client); + mongoc_cleanup (); + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/source/index.txt b/source/index.txt index dfb494f9..51857a48 100644 --- a/source/index.txt +++ b/source/index.txt @@ -9,6 +9,7 @@ /get-started /databases-collections /read + /aggregation /whats-new libbson API Documentation <{+api-libbson+}> libmongoc API Documentation <{+api-libmongoc+}> @@ -65,16 +66,11 @@ Learn how you can retrieve data from MongoDB in the :ref:`c-read` section. .. Learn how to work with common types of indexes in the :ref:`c-indexes` .. section. -.. TODO -.. Transform Your Data with Aggregation -.. ------------------------------------ - -.. Learn how to use the {+driver-short+} to perform aggregation operations in the -.. :ref:`c-aggregation` section. - -.. Learn how to use aggregation expression operations to build -.. aggregation stages in the :ref:`c-aggregation-expression-operations` section. +Transform Your Data with Aggregation +------------------------------------ +Learn how to use the {+driver-short+} to perform aggregation operations in the +:ref:`c-aggregation` section. .. TODO: .. FAQ From 0c528a38c8460026f7dd5b7407e6921f85c0e40a Mon Sep 17 00:00:00 2001 From: Michael Morisi Date: Wed, 18 Sep 2024 10:49:18 -0400 Subject: [PATCH 2/6] Fixes --- source/aggregation.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/source/aggregation.txt b/source/aggregation.txt index 10854689..4e8bf0ec 100644 --- a/source/aggregation.txt +++ b/source/aggregation.txt @@ -121,15 +121,14 @@ Explain an Aggregation ~~~~~~~~~~~~~~~~~~~~~~ To view information about how MongoDB executes your operation, you can -enable the ``explain`` option on your pipeline. When MongoDB explains an +run the the ``explain`` operation on your pipeline. When MongoDB explains an operation, it returns **execution plans** and performance statistics. An execution plan is a potential way MongoDB can complete an operation. When you instruct MongoDB to explain an operation, it returns both the plan MongoDB selected for the operation and any rejected execution plans. The following code example runs the same aggregation shown in the preceding section, but -uses the ``mongoc_client_command_simple()`` function to run the ``explain`` operation to -output the operation details: +uses the ``mongoc_client_command_simple()`` function to explain the operation details: .. io-code-block:: From 253bb87989e8ccdc65902c12075366f05e510454 Mon Sep 17 00:00:00 2001 From: Michael Morisi Date: Wed, 18 Sep 2024 11:11:47 -0400 Subject: [PATCH 3/6] Fix --- source/get-started.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/get-started.txt b/source/get-started.txt index 812efb9b..e5097534 100644 --- a/source/get-started.txt +++ b/source/get-started.txt @@ -1,8 +1,8 @@ .. _c-get-started: -============================= -Get Started with the {+driver-short+} -============================= +=========== +Get Started +=========== .. contents:: On this page :local: From b61b0eaa4f23a7f61b1f3012924651b4cdcfdfc0 Mon Sep 17 00:00:00 2001 From: Michael Morisi Date: Wed, 18 Sep 2024 11:14:35 -0400 Subject: [PATCH 4/6] Fix --- source/get-started.txt | 6 +++--- source/index.txt | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/source/get-started.txt b/source/get-started.txt index e5097534..ade9b7fa 100644 --- a/source/get-started.txt +++ b/source/get-started.txt @@ -1,8 +1,8 @@ .. _c-get-started: -=========== -Get Started -=========== +============================= +Get Started with the C Driver +============================= .. contents:: On this page :local: diff --git a/source/index.txt b/source/index.txt index 51857a48..6db08da4 100644 --- a/source/index.txt +++ b/source/index.txt @@ -6,7 +6,7 @@ :titlesonly: :maxdepth: 1 - /get-started + Get Started /databases-collections /read /aggregation From 4a9aa05d663af2c9042115dcc415307c6f24e141 Mon Sep 17 00:00:00 2001 From: Michael Morisi Date: Wed, 18 Sep 2024 11:51:50 -0400 Subject: [PATCH 5/6] JS feedback --- source/aggregation.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/aggregation.txt b/source/aggregation.txt index 4e8bf0ec..3c0d7fd8 100644 --- a/source/aggregation.txt +++ b/source/aggregation.txt @@ -89,7 +89,7 @@ To perform an aggregation on the documents in a collection, pass a ``bson_t`` st that represents the pipeline stages to the ``mongoc_collection_aggregate()`` function. This example outputs a count of the number of bakeries in each borough -of New York City. The following code creates aggregation pipeline that contains the +of New York City. The following code creates an aggregation pipeline that contains the following stages: - A :manual:`$match ` stage to filter for documents From 9770111c8722e031a3ad3198f2b00e70939538cd Mon Sep 17 00:00:00 2001 From: Michael Morisi Date: Wed, 18 Sep 2024 15:59:10 -0400 Subject: [PATCH 6/6] Address KA feedback --- source/includes/aggregation/aggregation.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/source/includes/aggregation/aggregation.c b/source/includes/aggregation/aggregation.c index 4747580c..7ebbafd1 100644 --- a/source/includes/aggregation/aggregation.c +++ b/source/includes/aggregation/aggregation.c @@ -28,10 +28,16 @@ main (int argc, char *argv[]) mongoc_cursor_t *results = mongoc_collection_aggregate (collection, MONGOC_QUERY_NONE, pipeline, NULL, NULL); - while (mongoc_cursor_next (results, &doc)) { - char *str = bson_as_canonical_extended_json (doc, NULL); - printf ("%s\n", str); - bson_free (str); + bson_error_t error; + if (mongoc_cursor_error (results, &error)) + { + fprintf (stderr, "Aggregate failed: %s\n", error.message); + } else { + while (mongoc_cursor_next (results, &doc)) { + char *str = bson_as_canonical_extended_json (doc, NULL); + printf ("%s\n", str); + bson_free (str); + } } bson_destroy (pipeline);