From 001c51c1e02ac559130889826b32e873df164ccc Mon Sep 17 00:00:00 2001
From: KhaledBousrih <khaled.bousrih@netsach.fr>
Date: Tue, 14 Sep 2021 18:44:22 +0200
Subject: [PATCH 1/3] Add possibily to group stat results by field names

---
 CHANGELOG.md                               |   2 +-
 concrete_datastore/api/v1/views.py         |  42 +++++
 docs/api-routes.md                         | 158 +++++++++++++++++-
 tests/tests_api_v1_1/test_api_v1_1_CRUD.py | 176 ++++++++++++++++++++-
 4 files changed, 368 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d351260..51ac1194 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@
 
 ### Added
 
-- nothing added
+- Add possibily to group stat results by field names
 
 ### Changed
 
diff --git a/concrete_datastore/api/v1/views.py b/concrete_datastore/api/v1/views.py
index c71c8e15..e9187242 100644
--- a/concrete_datastore/api/v1/views.py
+++ b/concrete_datastore/api/v1/views.py
@@ -6,6 +6,7 @@
 import sys
 import re
 import os
+from collections import Counter
 from urllib.parse import urljoin, unquote, urlparse, urlunparse
 from importlib import import_module
 from itertools import chain
@@ -1315,6 +1316,9 @@ def get_stats(self, request, timestamp_start=None, timestamp_end=None):
 
         dict_pages = dict()
 
+        url = remove_query_param(url, 'group_by')
+        url = remove_query_param(url, 'combine')
+
         for page_number in range(1, _num_pages + 1):
             if page_number == 1:
                 dict_pages['page{}'.format(page_number)] = unquote(
@@ -1335,6 +1339,44 @@ def get_stats(self, request, timestamp_start=None, timestamp_end=None):
             ],
             'page_urls': dict_pages,
         }
+        group_by_fields = request.GET.get('group_by', None)
+        combine_results = request.GET.get('combine', 'false').lower()
+        if combine_results not in ('true', 'false'):
+            return Response(
+                data={
+                    'message': f'"combine" value ("{combine_results}") is not valid. Should be either "true" or "false"',
+                    '_errors': ['INVALID_QUERY'],
+                },
+                status=HTTP_400_BAD_REQUEST,
+            )
+        if group_by_fields is not None:
+            group_by_fields = group_by_fields.split(',')
+            if any(map(lambda x: x not in self.fields, group_by_fields)):
+                return Response(
+                    data={
+                        'message': 'lookup query is not a valid field',
+                        '_errors': ['INVALID_QUERY'],
+                    },
+                    status=HTTP_400_BAD_REQUEST,
+                )
+
+            grouped_data = {}
+            if combine_results == 'false':
+                for field_name in group_by_fields:
+                    grouped_data[field_name] = dict(
+                        Counter(queryset.values_list(field_name, flat=True))
+                    )
+            else:
+                grouped_data[','.join(group_by_fields)] = dict(
+                    Counter(
+                        map(
+                            lambda x: ','.join(map(str, x)),
+                            queryset.values_list(*group_by_fields),
+                        )
+                    )
+                )
+            data.update({"results": grouped_data})
+
         return Response(data)
 
     def _get_queryset_filtered_since_timestamp(
diff --git a/docs/api-routes.md b/docs/api-routes.md
index e29e7854..2ed95494 100644
--- a/docs/api-routes.md
+++ b/docs/api-routes.md
@@ -14,7 +14,7 @@ For each model, Concrete Datastore exposes two routes accepting different method
 
 #### List all instances of model MyModel
 
-A `GET` on the root url of the model MyModel will retrieve all instances of this model.
+A `GET` on the root url of the model MyModel will retrieve all instances of this model. This endpoint accepts filtering (see [API Authentication](authentication.md) for more information)
 
 - **Method**: `GET`
 
@@ -120,7 +120,7 @@ curl \
 
 #### Update a specific instance of model MyModel by its UID
 
-#### Update some of the fields with `PATCH`
+##### Update some of the fields with `PATCH`
 
 A `PATCH` on the url of a given instance of model MyModel will update the fields of this given instance.
 
@@ -142,7 +142,7 @@ curl \
 
 **Response**: with status code HTTP `200 (OK)`, the response body is a JSON containing all the fields of the given instance, updated.
 
-#### Update all the fields with `PUT`
+##### Update all the fields with `PUT`
 
 A `PUT` on the url of a given instance of model MyModel will update the fields of this given instance.
 
@@ -187,6 +187,158 @@ curl \
 
 This operation could fail. If the instance is related to a protected instance, it cannot be deleted. In this case, the HTTP status code is `412 (PRECONDITION FAILED)` with the error code `"PROTECTED_RELATION"` in the response.
 
+#### Get stats on model MyModel
+
+A `GET` on the endpoint `https://<webapp>/api/v1.1/my-model/stats/` allows to get stats on the given model.
+
+- **Method**: `GET`
+
+- **ENDPOINT**: `https://<webapp>/api/v1.1/my-model/stats/`
+
+- **Example**: 
+
+**Request**
+```shell
+curl \
+  -H "Authorization: Token <auth_token>" \
+  "https://<webapp>/api/v1.1/my-model/stats/"
+```
+
+**Response**:with status code HTTP `200 (OK)`, the response body is a JSON containing stats on the instances of the model such as:
+
+- `objects_count`: the number of instances of this model.
+
+- `num_total_pages`: the number of total pages.
+ 
+- `page_urls`: An object containing all the pages.
+
+Example of a response:
+
+```json
+{
+  "objects_count": 125,
+  "num_total_pages": 2,
+  "max_allowed_objects_per_page": 100,
+  "timestamp_start": 0.0,
+  "timestamp_end": 701184650.0,
+  "page_urls": {
+    "page1": "https://<webapp>/api/v1.1/my-model/",
+    "page2": "https://<webapp>/api/v1.1/my-model/?page=2"
+  }
+}
+```
+
+##### Optional parameters for the stats endpoint
+
+- **Path parameters:** The stats endpoint accepts two path parameters: `timestamp_start` and `timestamp_end` to get stats between two timestamps. You can either specify only a `timestamp_start` (the end will be the current timestamp) or both start and end:
+
+```shell
+curl \
+  -H "Authorization: Token <auth_token>" \
+  "https://<webapp>/api/v1.1/my-model/stats/timestamp_start=123456789.123"
+```
+
+```shell
+curl \
+  -H "Authorization: Token <auth_token>" \
+  "https://<webapp>/api/v1.1/my-model/stats/timestamp_start=123456789.0-timestamp_end:123456832.0"
+```
+
+- **Query parameters:** Two additional query parameters are accepted for this endpoint: `group_by` and `combine`:
+
+  + `group_by`: Accepts one or more comma separated field names. If the value of this param contains an invalid field name, the API responds with a `400 BAD REQUEST` Used to group the stats by the field values. The results will appear as an object within a `results` field. Example:
+
+**Request:**
+```shell
+curl \
+  -H "Authorization: Token <auth_token>" \
+  "https://<webapp>/api/v1.1/my-model/stats/?group_by=status"
+```
+
+**Response:**
+```json
+{
+  "objects_count": 125,
+  "num_total_pages": 2,
+  "max_allowed_objects_per_page": 100,
+  "timestamp_start": 0.0,
+  "timestamp_end": 701184650.0,
+  "page_urls": {
+    "page1": "https://<webapp>/api/v1.1/my-model/",
+    "page2": "https://<webapp>/api/v1.1/my-model/?page=2"
+  },
+  "results":{
+    "status":{
+      "COMPLETED": 113,
+      "FAILED": 12
+    }
+  }
+}
+```
+
+  + `combine`: Accepts `true` or `false` (default to `false`). if any other value is given, the API responds with a `400 BAD REQUEST`. This param is used when more than one field name is given in the `group_by` parameter, otherwise it is ignored. If `combine` is `true`, the results will be a combination of the possible values of the two fields. Otherwise the results will be given separately for each field
+
+**Request with combine to false:**
+```shell
+curl \
+  -H "Authorization: Token <auth_token>" \
+  "https://<webapp>/api/v1.1/my-model/stats/?group_by=status,archived"
+```
+
+**Response:**
+```json
+{
+  "objects_count": 125,
+  "num_total_pages": 2,
+  "max_allowed_objects_per_page": 100,
+  "timestamp_start": 0.0,
+  "timestamp_end": 701184650.0,
+  "page_urls": {
+    "page1": "https://<webapp>/api/v1.1/my-model/",
+    "page2": "https://<webapp>/api/v1.1/my-model/?page=2"
+  },
+  "results":{
+    "status":{
+      "COMPLETED": 113,
+      "FAILED": 12
+    },
+    "archived":{
+      "true": 53,
+      "false": 72
+    }
+  }
+}
+```
+
+**Request with combine to true:**
+```shell
+curl \
+  -H "Authorization: Token <auth_token>" \
+  "https://<webapp>/api/v1.1/my-model/stats/?group_by=status,archived&combine=true"
+```
+
+**Response:**
+```json
+{
+  "objects_count": 125,
+  "num_total_pages": 2,
+  "max_allowed_objects_per_page": 100,
+  "timestamp_start": 0.0,
+  "timestamp_end": 701184650.0,
+  "page_urls": {
+    "page1": "https://<webapp>/api/v1.1/my-model/",
+    "page2": "https://<webapp>/api/v1.1/my-model/?page=2"
+  },
+  "results":{
+    "status,archived":{
+      "COMPLETED,true": 45,
+      "COMPLETED,false": 68,
+      "FAILED,true": 8
+      "FAILED,false": 4
+    }
+  }
+}
+```
 
 ### Specific API endpoints
 
diff --git a/tests/tests_api_v1_1/test_api_v1_1_CRUD.py b/tests/tests_api_v1_1/test_api_v1_1_CRUD.py
index 40fc789c..a7ecbee9 100644
--- a/tests/tests_api_v1_1/test_api_v1_1_CRUD.py
+++ b/tests/tests_api_v1_1/test_api_v1_1_CRUD.py
@@ -3,11 +3,7 @@
 from rest_framework.test import APITestCase
 from django.conf import settings
 from rest_framework import status
-from concrete_datastore.concrete.models import (
-    User,
-    UserConfirmation,
-    Project,
-)
+from concrete_datastore.concrete.models import User, UserConfirmation, Project
 from django.test import override_settings
 
 
@@ -26,7 +22,7 @@ def setUp(self):
         self.confirmation.save()
         url = '/api/v1.1/auth/login/'
         resp = self.client.post(
-            url, {"email": "johndoe@netsach.org", "password": "plop",},
+            url, {"email": "johndoe@netsach.org", "password": "plop"}
         )
         self.token = resp.data['token']
 
@@ -156,6 +152,174 @@ def test_stats_endpoint(self):
         }
         self.assertDictEqual(resp.data['page_urls'], pages_dict)
 
+    @override_settings(API_MAX_PAGINATION_SIZE_NESTED=10)
+    def test_grouped_stats_bad_request(self):
+        url = '/api/v1.1/project/stats/?group_by=fake_field_name'
+        resp = self.client.get(
+            url, {}, HTTP_AUTHORIZATION='Token {}'.format(self.token)
+        )
+        self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertDictEqual(
+            resp.data,
+            {
+                'message': 'lookup query is not a valid field',
+                '_errors': ['INVALID_QUERY'],
+            },
+        )
+        url = '/api/v1.1/project/stats/?group_by=name,archived&combine=toto'
+        resp = self.client.get(
+            url, {}, HTTP_AUTHORIZATION='Token {}'.format(self.token)
+        )
+        self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertDictEqual(
+            resp.data,
+            {
+                'message': '"combine" value ("toto") is not valid. Should be either "true" or "false"',
+                '_errors': ['INVALID_QUERY'],
+            },
+        )
+
+    @override_settings(API_MAX_PAGINATION_SIZE_NESTED=10)
+    def test_grouped_stats(self):
+        Project.objects.create(name="A", archived=False, created_by=self.user)
+        Project.objects.create(name="A", archived=False, created_by=self.user)
+        Project.objects.create(name="A", archived=False, created_by=self.user)
+        Project.objects.create(name="A", archived=True, created_by=self.user)
+        Project.objects.create(name="B", archived=False, created_by=self.user)
+        Project.objects.create(name="B", archived=False, created_by=self.user)
+        Project.objects.create(name="B", archived=True, created_by=self.user)
+        Project.objects.create(name="B", archived=True, created_by=self.user)
+
+        url = '/api/v1.1/project/stats/?group_by=name'
+        resp = self.client.get(
+            url, {}, HTTP_AUTHORIZATION='Token {}'.format(self.token)
+        )
+        self.assertEqual(resp.status_code, status.HTTP_200_OK)
+        self.assertIn('objects_count', resp.data)
+        self.assertIn('timestamp_start', resp.data)
+        self.assertIn('timestamp_end', resp.data)
+        self.assertIn('num_total_pages', resp.data)
+        self.assertIn('max_allowed_objects_per_page', resp.data)
+        self.assertIn('page_urls', resp.data)
+        self.assertIn('results', resp.data)
+        self.assertEqual(resp.data["objects_count"], 8)
+        self.assertEqual(resp.data['timestamp_start'], 0)
+        self.assertEqual(resp.data['max_allowed_objects_per_page'], 10)
+        pages_dict = {'page1': 'http://testserver/api/v1.1/project/'}
+        self.assertDictEqual(resp.data['page_urls'], pages_dict)
+
+        results = resp.data['results']
+
+        self.assertIn('name', results)
+
+        self.assertIn('A', results['name'])
+        self.assertEqual(4, results['name']['A'])
+
+        self.assertIn('B', results['name'])
+        self.assertEqual(4, results['name']['B'])
+
+        #: URL with filters
+        url = '/api/v1.1/project/stats/?archived=false&group_by=name'
+        resp = self.client.get(
+            url, {}, HTTP_AUTHORIZATION='Token {}'.format(self.token)
+        )
+        self.assertEqual(resp.status_code, status.HTTP_200_OK)
+        self.assertIn('objects_count', resp.data)
+        self.assertIn('timestamp_start', resp.data)
+        self.assertIn('timestamp_end', resp.data)
+        self.assertIn('num_total_pages', resp.data)
+        self.assertIn('max_allowed_objects_per_page', resp.data)
+        self.assertIn('page_urls', resp.data)
+        self.assertIn('results', resp.data)
+        self.assertEqual(resp.data["objects_count"], 5)
+        self.assertEqual(resp.data['timestamp_start'], 0)
+        self.assertEqual(resp.data['max_allowed_objects_per_page'], 10)
+        pages_dict = {
+            'page1': 'http://testserver/api/v1.1/project/?archived=false'
+        }
+        self.assertDictEqual(resp.data['page_urls'], pages_dict)
+
+        results = resp.data['results']
+
+        self.assertIn('name', results)
+
+        self.assertIn('A', results['name'])
+        self.assertEqual(3, results['name']['A'])
+
+        self.assertIn('B', results['name'])
+        self.assertEqual(2, results['name']['B'])
+
+        # Multiple group by separated
+        url = '/api/v1.1/project/stats/?group_by=name,archived'
+        resp = self.client.get(
+            url, {}, HTTP_AUTHORIZATION='Token {}'.format(self.token)
+        )
+        self.assertEqual(resp.status_code, status.HTTP_200_OK)
+        self.assertIn('objects_count', resp.data)
+        self.assertIn('timestamp_start', resp.data)
+        self.assertIn('timestamp_end', resp.data)
+        self.assertIn('num_total_pages', resp.data)
+        self.assertIn('max_allowed_objects_per_page', resp.data)
+        self.assertIn('page_urls', resp.data)
+        self.assertIn('results', resp.data)
+        self.assertEqual(resp.data["objects_count"], 8)
+        self.assertEqual(resp.data['timestamp_start'], 0)
+        self.assertEqual(resp.data['max_allowed_objects_per_page'], 10)
+        pages_dict = {'page1': 'http://testserver/api/v1.1/project/'}
+        self.assertDictEqual(resp.data['page_urls'], pages_dict)
+
+        results = resp.data['results']
+
+        self.assertIn('name', results)
+        self.assertIn('archived', results)
+
+        self.assertIn('A', results['name'])
+        self.assertEqual(4, results['name']['A'])
+
+        self.assertIn('B', results['name'])
+        self.assertEqual(4, results['name']['B'])
+
+        self.assertIn(True, results['archived'])
+        self.assertEqual(3, results['archived'][True])
+
+        self.assertIn(False, results['archived'])
+        self.assertEqual(5, results['archived'][False])
+
+        # Multiple group by combined
+        url = '/api/v1.1/project/stats/?group_by=name,archived&combine=true'
+        resp = self.client.get(
+            url, {}, HTTP_AUTHORIZATION='Token {}'.format(self.token)
+        )
+        self.assertEqual(resp.status_code, status.HTTP_200_OK)
+        self.assertIn('objects_count', resp.data)
+        self.assertIn('timestamp_start', resp.data)
+        self.assertIn('timestamp_end', resp.data)
+        self.assertIn('num_total_pages', resp.data)
+        self.assertIn('max_allowed_objects_per_page', resp.data)
+        self.assertIn('page_urls', resp.data)
+        self.assertIn('results', resp.data)
+        self.assertEqual(resp.data["objects_count"], 8)
+        self.assertEqual(resp.data['timestamp_start'], 0)
+        self.assertEqual(resp.data['max_allowed_objects_per_page'], 10)
+        pages_dict = {'page1': 'http://testserver/api/v1.1/project/'}
+        self.assertDictEqual(resp.data['page_urls'], pages_dict)
+
+        results = resp.data['results']
+
+        self.assertIn('name,archived', results)
+
+        self.assertIn('A,True', results['name,archived'])
+        self.assertEqual(1, results['name,archived']['A,True'])
+
+        self.assertIn('A,False', results['name,archived'])
+        self.assertEqual(3, results['name,archived']['A,False'])
+
+        self.assertIn('B,True', results['name,archived'])
+        self.assertEqual(2, results['name,archived']['B,True'])
+
+        self.assertIn('B,False', results['name,archived'])
+        self.assertEqual(2, results['name,archived']['B,False'])
+
     @override_settings(API_MAX_PAGINATION_SIZE_NESTED=10)
     def test_stats_endpoint_with_start(self):
         for i in range(20):

From 8470ce3a08cf8bd0513a69d3f26cc1120fd615f4 Mon Sep 17 00:00:00 2001
From: KhaledBousrih <khaled.bousrih@netsach.fr>
Date: Tue, 14 Sep 2021 18:56:01 +0200
Subject: [PATCH 2/3] Add more details about timestamp_start and timestamp_end
 for stats

---
 docs/api-routes.md | 8 +++++---
 docs/filters.md    | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/api-routes.md b/docs/api-routes.md
index 2ed95494..400d953d 100644
--- a/docs/api-routes.md
+++ b/docs/api-routes.md
@@ -14,7 +14,7 @@ For each model, Concrete Datastore exposes two routes accepting different method
 
 #### List all instances of model MyModel
 
-A `GET` on the root url of the model MyModel will retrieve all instances of this model. This endpoint accepts filtering (see [API Authentication](authentication.md) for more information)
+A `GET` on the root url of the model MyModel will retrieve all instances of this model. This endpoint accepts filtering (see [filters section](filters.md) for more information)
 
 - **Method**: `GET`
 
@@ -189,7 +189,9 @@ This operation could fail. If the instance is related to a protected instance, i
 
 #### Get stats on model MyModel
 
-A `GET` on the endpoint `https://<webapp>/api/v1.1/my-model/stats/` allows to get stats on the given model.
+A `GET` on the endpoint `https://<webapp>/api/v1.1/my-model/stats/` allows to get stats on the given model. This endpoint accepts filtering (see [filters section](filters.md) for more information)
+
+*N.B* Some query parameters in the [filters section](filters.md) are not needed for this endpoint (hence ignored) such as `c_resp_page_size`, `page` and `c_resp_nested`. As for the `timestamp_start` and `timestamp_end`, they are used differently for this endpoint. (Please refer to the [option parameters section](#OptionalParameters) to see more information about the `timestamp_start` and `timestamp_end`, as well as the itroduction of two other query parameters)
 
 - **Method**: `GET`
 
@@ -228,7 +230,7 @@ Example of a response:
 }
 ```
 
-##### Optional parameters for the stats endpoint
+<a name="OptionalParameters"></a>##### Optional parameters for the stats endpoint
 
 - **Path parameters:** The stats endpoint accepts two path parameters: `timestamp_start` and `timestamp_end` to get stats between two timestamps. You can either specify only a `timestamp_start` (the end will be the current timestamp) or both start and end:
 
diff --git a/docs/filters.md b/docs/filters.md
index 62c81c61..8c5ec59e 100644
--- a/docs/filters.md
+++ b/docs/filters.md
@@ -22,6 +22,7 @@ example:
 example: `?creation_date__range=2018-01-01,2018-12-31` returns all objects with creation date is between 1st Jan 2018 and 31st Dec 2018
 
 - `c_resp_page_size`: The API also features pagination by the use of the query parameter `c_resp_page_size` that takes an integer representing the number of results per page that sould be returned
+- `page`: If this query param is specified, it returns the results of requested page. Returns `404 NOT FOUND` is the page is not found
 - `c_resp_nested`: If there are relation between objects, by default the API shows the relation completely, it is nested.
 Example:
 

From adf44955ea082b17d668acadba2a0c9afe74b01b Mon Sep 17 00:00:00 2001
From: Khaled Bousrih <60177005+KhaledBousrih@users.noreply.github.com>
Date: Wed, 13 Oct 2021 15:23:35 +0200
Subject: [PATCH 3/3] Apply suggestions from code review

---
 docs/api-routes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/api-routes.md b/docs/api-routes.md
index 400d953d..37d49987 100644
--- a/docs/api-routes.md
+++ b/docs/api-routes.md
@@ -335,7 +335,7 @@ curl \
     "status,archived":{
       "COMPLETED,true": 45,
       "COMPLETED,false": 68,
-      "FAILED,true": 8
+      "FAILED,true": 8,
       "FAILED,false": 4
     }
   }