From 001c51c1e02ac559130889826b32e873df164ccc Mon Sep 17 00:00:00 2001 From: KhaledBousrih Date: Tue, 14 Sep 2021 18:44:22 +0200 Subject: [PATCH 1/3] Add possibily to group stat results by field names --- CHANGELOG.md | 2 +- concrete_datastore/api/v1/views.py | 42 +++++ docs/api-routes.md | 158 +++++++++++++++++- tests/tests_api_v1_1/test_api_v1_1_CRUD.py | 176 ++++++++++++++++++++- 4 files changed, 368 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d351260..51ac1194 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### Added -- nothing added +- Add possibily to group stat results by field names ### Changed diff --git a/concrete_datastore/api/v1/views.py b/concrete_datastore/api/v1/views.py index c71c8e15..e9187242 100644 --- a/concrete_datastore/api/v1/views.py +++ b/concrete_datastore/api/v1/views.py @@ -6,6 +6,7 @@ import sys import re import os +from collections import Counter from urllib.parse import urljoin, unquote, urlparse, urlunparse from importlib import import_module from itertools import chain @@ -1315,6 +1316,9 @@ def get_stats(self, request, timestamp_start=None, timestamp_end=None): dict_pages = dict() + url = remove_query_param(url, 'group_by') + url = remove_query_param(url, 'combine') + for page_number in range(1, _num_pages + 1): if page_number == 1: dict_pages['page{}'.format(page_number)] = unquote( @@ -1335,6 +1339,44 @@ def get_stats(self, request, timestamp_start=None, timestamp_end=None): ], 'page_urls': dict_pages, } + group_by_fields = request.GET.get('group_by', None) + combine_results = request.GET.get('combine', 'false').lower() + if combine_results not in ('true', 'false'): + return Response( + data={ + 'message': f'"combine" value ("{combine_results}") is not valid. Should be either "true" or "false"', + '_errors': ['INVALID_QUERY'], + }, + status=HTTP_400_BAD_REQUEST, + ) + if group_by_fields is not None: + group_by_fields = group_by_fields.split(',') + if any(map(lambda x: x not in self.fields, group_by_fields)): + return Response( + data={ + 'message': 'lookup query is not a valid field', + '_errors': ['INVALID_QUERY'], + }, + status=HTTP_400_BAD_REQUEST, + ) + + grouped_data = {} + if combine_results == 'false': + for field_name in group_by_fields: + grouped_data[field_name] = dict( + Counter(queryset.values_list(field_name, flat=True)) + ) + else: + grouped_data[','.join(group_by_fields)] = dict( + Counter( + map( + lambda x: ','.join(map(str, x)), + queryset.values_list(*group_by_fields), + ) + ) + ) + data.update({"results": grouped_data}) + return Response(data) def _get_queryset_filtered_since_timestamp( diff --git a/docs/api-routes.md b/docs/api-routes.md index e29e7854..2ed95494 100644 --- a/docs/api-routes.md +++ b/docs/api-routes.md @@ -14,7 +14,7 @@ For each model, Concrete Datastore exposes two routes accepting different method #### List all instances of model MyModel -A `GET` on the root url of the model MyModel will retrieve all instances of this model. +A `GET` on the root url of the model MyModel will retrieve all instances of this model. This endpoint accepts filtering (see [API Authentication](authentication.md) for more information) - **Method**: `GET` @@ -120,7 +120,7 @@ curl \ #### Update a specific instance of model MyModel by its UID -#### Update some of the fields with `PATCH` +##### Update some of the fields with `PATCH` A `PATCH` on the url of a given instance of model MyModel will update the fields of this given instance. @@ -142,7 +142,7 @@ curl \ **Response**: with status code HTTP `200 (OK)`, the response body is a JSON containing all the fields of the given instance, updated. -#### Update all the fields with `PUT` +##### Update all the fields with `PUT` A `PUT` on the url of a given instance of model MyModel will update the fields of this given instance. @@ -187,6 +187,158 @@ curl \ This operation could fail. If the instance is related to a protected instance, it cannot be deleted. In this case, the HTTP status code is `412 (PRECONDITION FAILED)` with the error code `"PROTECTED_RELATION"` in the response. +#### Get stats on model MyModel + +A `GET` on the endpoint `https:///api/v1.1/my-model/stats/` allows to get stats on the given model. + +- **Method**: `GET` + +- **ENDPOINT**: `https:///api/v1.1/my-model/stats/` + +- **Example**: + +**Request** +```shell +curl \ + -H "Authorization: Token " \ + "https:///api/v1.1/my-model/stats/" +``` + +**Response**:with status code HTTP `200 (OK)`, the response body is a JSON containing stats on the instances of the model such as: + +- `objects_count`: the number of instances of this model. + +- `num_total_pages`: the number of total pages. + +- `page_urls`: An object containing all the pages. + +Example of a response: + +```json +{ + "objects_count": 125, + "num_total_pages": 2, + "max_allowed_objects_per_page": 100, + "timestamp_start": 0.0, + "timestamp_end": 701184650.0, + "page_urls": { + "page1": "https:///api/v1.1/my-model/", + "page2": "https:///api/v1.1/my-model/?page=2" + } +} +``` + +##### Optional parameters for the stats endpoint + +- **Path parameters:** The stats endpoint accepts two path parameters: `timestamp_start` and `timestamp_end` to get stats between two timestamps. You can either specify only a `timestamp_start` (the end will be the current timestamp) or both start and end: + +```shell +curl \ + -H "Authorization: Token " \ + "https:///api/v1.1/my-model/stats/timestamp_start=123456789.123" +``` + +```shell +curl \ + -H "Authorization: Token " \ + "https:///api/v1.1/my-model/stats/timestamp_start=123456789.0-timestamp_end:123456832.0" +``` + +- **Query parameters:** Two additional query parameters are accepted for this endpoint: `group_by` and `combine`: + + + `group_by`: Accepts one or more comma separated field names. If the value of this param contains an invalid field name, the API responds with a `400 BAD REQUEST` Used to group the stats by the field values. The results will appear as an object within a `results` field. Example: + +**Request:** +```shell +curl \ + -H "Authorization: Token " \ + "https:///api/v1.1/my-model/stats/?group_by=status" +``` + +**Response:** +```json +{ + "objects_count": 125, + "num_total_pages": 2, + "max_allowed_objects_per_page": 100, + "timestamp_start": 0.0, + "timestamp_end": 701184650.0, + "page_urls": { + "page1": "https:///api/v1.1/my-model/", + "page2": "https:///api/v1.1/my-model/?page=2" + }, + "results":{ + "status":{ + "COMPLETED": 113, + "FAILED": 12 + } + } +} +``` + + + `combine`: Accepts `true` or `false` (default to `false`). if any other value is given, the API responds with a `400 BAD REQUEST`. This param is used when more than one field name is given in the `group_by` parameter, otherwise it is ignored. If `combine` is `true`, the results will be a combination of the possible values of the two fields. Otherwise the results will be given separately for each field + +**Request with combine to false:** +```shell +curl \ + -H "Authorization: Token " \ + "https:///api/v1.1/my-model/stats/?group_by=status,archived" +``` + +**Response:** +```json +{ + "objects_count": 125, + "num_total_pages": 2, + "max_allowed_objects_per_page": 100, + "timestamp_start": 0.0, + "timestamp_end": 701184650.0, + "page_urls": { + "page1": "https:///api/v1.1/my-model/", + "page2": "https:///api/v1.1/my-model/?page=2" + }, + "results":{ + "status":{ + "COMPLETED": 113, + "FAILED": 12 + }, + "archived":{ + "true": 53, + "false": 72 + } + } +} +``` + +**Request with combine to true:** +```shell +curl \ + -H "Authorization: Token " \ + "https:///api/v1.1/my-model/stats/?group_by=status,archived&combine=true" +``` + +**Response:** +```json +{ + "objects_count": 125, + "num_total_pages": 2, + "max_allowed_objects_per_page": 100, + "timestamp_start": 0.0, + "timestamp_end": 701184650.0, + "page_urls": { + "page1": "https:///api/v1.1/my-model/", + "page2": "https:///api/v1.1/my-model/?page=2" + }, + "results":{ + "status,archived":{ + "COMPLETED,true": 45, + "COMPLETED,false": 68, + "FAILED,true": 8 + "FAILED,false": 4 + } + } +} +``` ### Specific API endpoints diff --git a/tests/tests_api_v1_1/test_api_v1_1_CRUD.py b/tests/tests_api_v1_1/test_api_v1_1_CRUD.py index 40fc789c..a7ecbee9 100644 --- a/tests/tests_api_v1_1/test_api_v1_1_CRUD.py +++ b/tests/tests_api_v1_1/test_api_v1_1_CRUD.py @@ -3,11 +3,7 @@ from rest_framework.test import APITestCase from django.conf import settings from rest_framework import status -from concrete_datastore.concrete.models import ( - User, - UserConfirmation, - Project, -) +from concrete_datastore.concrete.models import User, UserConfirmation, Project from django.test import override_settings @@ -26,7 +22,7 @@ def setUp(self): self.confirmation.save() url = '/api/v1.1/auth/login/' resp = self.client.post( - url, {"email": "johndoe@netsach.org", "password": "plop",}, + url, {"email": "johndoe@netsach.org", "password": "plop"} ) self.token = resp.data['token'] @@ -156,6 +152,174 @@ def test_stats_endpoint(self): } self.assertDictEqual(resp.data['page_urls'], pages_dict) + @override_settings(API_MAX_PAGINATION_SIZE_NESTED=10) + def test_grouped_stats_bad_request(self): + url = '/api/v1.1/project/stats/?group_by=fake_field_name' + resp = self.client.get( + url, {}, HTTP_AUTHORIZATION='Token {}'.format(self.token) + ) + self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + resp.data, + { + 'message': 'lookup query is not a valid field', + '_errors': ['INVALID_QUERY'], + }, + ) + url = '/api/v1.1/project/stats/?group_by=name,archived&combine=toto' + resp = self.client.get( + url, {}, HTTP_AUTHORIZATION='Token {}'.format(self.token) + ) + self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + resp.data, + { + 'message': '"combine" value ("toto") is not valid. Should be either "true" or "false"', + '_errors': ['INVALID_QUERY'], + }, + ) + + @override_settings(API_MAX_PAGINATION_SIZE_NESTED=10) + def test_grouped_stats(self): + Project.objects.create(name="A", archived=False, created_by=self.user) + Project.objects.create(name="A", archived=False, created_by=self.user) + Project.objects.create(name="A", archived=False, created_by=self.user) + Project.objects.create(name="A", archived=True, created_by=self.user) + Project.objects.create(name="B", archived=False, created_by=self.user) + Project.objects.create(name="B", archived=False, created_by=self.user) + Project.objects.create(name="B", archived=True, created_by=self.user) + Project.objects.create(name="B", archived=True, created_by=self.user) + + url = '/api/v1.1/project/stats/?group_by=name' + resp = self.client.get( + url, {}, HTTP_AUTHORIZATION='Token {}'.format(self.token) + ) + self.assertEqual(resp.status_code, status.HTTP_200_OK) + self.assertIn('objects_count', resp.data) + self.assertIn('timestamp_start', resp.data) + self.assertIn('timestamp_end', resp.data) + self.assertIn('num_total_pages', resp.data) + self.assertIn('max_allowed_objects_per_page', resp.data) + self.assertIn('page_urls', resp.data) + self.assertIn('results', resp.data) + self.assertEqual(resp.data["objects_count"], 8) + self.assertEqual(resp.data['timestamp_start'], 0) + self.assertEqual(resp.data['max_allowed_objects_per_page'], 10) + pages_dict = {'page1': 'http://testserver/api/v1.1/project/'} + self.assertDictEqual(resp.data['page_urls'], pages_dict) + + results = resp.data['results'] + + self.assertIn('name', results) + + self.assertIn('A', results['name']) + self.assertEqual(4, results['name']['A']) + + self.assertIn('B', results['name']) + self.assertEqual(4, results['name']['B']) + + #: URL with filters + url = '/api/v1.1/project/stats/?archived=false&group_by=name' + resp = self.client.get( + url, {}, HTTP_AUTHORIZATION='Token {}'.format(self.token) + ) + self.assertEqual(resp.status_code, status.HTTP_200_OK) + self.assertIn('objects_count', resp.data) + self.assertIn('timestamp_start', resp.data) + self.assertIn('timestamp_end', resp.data) + self.assertIn('num_total_pages', resp.data) + self.assertIn('max_allowed_objects_per_page', resp.data) + self.assertIn('page_urls', resp.data) + self.assertIn('results', resp.data) + self.assertEqual(resp.data["objects_count"], 5) + self.assertEqual(resp.data['timestamp_start'], 0) + self.assertEqual(resp.data['max_allowed_objects_per_page'], 10) + pages_dict = { + 'page1': 'http://testserver/api/v1.1/project/?archived=false' + } + self.assertDictEqual(resp.data['page_urls'], pages_dict) + + results = resp.data['results'] + + self.assertIn('name', results) + + self.assertIn('A', results['name']) + self.assertEqual(3, results['name']['A']) + + self.assertIn('B', results['name']) + self.assertEqual(2, results['name']['B']) + + # Multiple group by separated + url = '/api/v1.1/project/stats/?group_by=name,archived' + resp = self.client.get( + url, {}, HTTP_AUTHORIZATION='Token {}'.format(self.token) + ) + self.assertEqual(resp.status_code, status.HTTP_200_OK) + self.assertIn('objects_count', resp.data) + self.assertIn('timestamp_start', resp.data) + self.assertIn('timestamp_end', resp.data) + self.assertIn('num_total_pages', resp.data) + self.assertIn('max_allowed_objects_per_page', resp.data) + self.assertIn('page_urls', resp.data) + self.assertIn('results', resp.data) + self.assertEqual(resp.data["objects_count"], 8) + self.assertEqual(resp.data['timestamp_start'], 0) + self.assertEqual(resp.data['max_allowed_objects_per_page'], 10) + pages_dict = {'page1': 'http://testserver/api/v1.1/project/'} + self.assertDictEqual(resp.data['page_urls'], pages_dict) + + results = resp.data['results'] + + self.assertIn('name', results) + self.assertIn('archived', results) + + self.assertIn('A', results['name']) + self.assertEqual(4, results['name']['A']) + + self.assertIn('B', results['name']) + self.assertEqual(4, results['name']['B']) + + self.assertIn(True, results['archived']) + self.assertEqual(3, results['archived'][True]) + + self.assertIn(False, results['archived']) + self.assertEqual(5, results['archived'][False]) + + # Multiple group by combined + url = '/api/v1.1/project/stats/?group_by=name,archived&combine=true' + resp = self.client.get( + url, {}, HTTP_AUTHORIZATION='Token {}'.format(self.token) + ) + self.assertEqual(resp.status_code, status.HTTP_200_OK) + self.assertIn('objects_count', resp.data) + self.assertIn('timestamp_start', resp.data) + self.assertIn('timestamp_end', resp.data) + self.assertIn('num_total_pages', resp.data) + self.assertIn('max_allowed_objects_per_page', resp.data) + self.assertIn('page_urls', resp.data) + self.assertIn('results', resp.data) + self.assertEqual(resp.data["objects_count"], 8) + self.assertEqual(resp.data['timestamp_start'], 0) + self.assertEqual(resp.data['max_allowed_objects_per_page'], 10) + pages_dict = {'page1': 'http://testserver/api/v1.1/project/'} + self.assertDictEqual(resp.data['page_urls'], pages_dict) + + results = resp.data['results'] + + self.assertIn('name,archived', results) + + self.assertIn('A,True', results['name,archived']) + self.assertEqual(1, results['name,archived']['A,True']) + + self.assertIn('A,False', results['name,archived']) + self.assertEqual(3, results['name,archived']['A,False']) + + self.assertIn('B,True', results['name,archived']) + self.assertEqual(2, results['name,archived']['B,True']) + + self.assertIn('B,False', results['name,archived']) + self.assertEqual(2, results['name,archived']['B,False']) + @override_settings(API_MAX_PAGINATION_SIZE_NESTED=10) def test_stats_endpoint_with_start(self): for i in range(20): From 8470ce3a08cf8bd0513a69d3f26cc1120fd615f4 Mon Sep 17 00:00:00 2001 From: KhaledBousrih Date: Tue, 14 Sep 2021 18:56:01 +0200 Subject: [PATCH 2/3] Add more details about timestamp_start and timestamp_end for stats --- docs/api-routes.md | 8 +++++--- docs/filters.md | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/api-routes.md b/docs/api-routes.md index 2ed95494..400d953d 100644 --- a/docs/api-routes.md +++ b/docs/api-routes.md @@ -14,7 +14,7 @@ For each model, Concrete Datastore exposes two routes accepting different method #### List all instances of model MyModel -A `GET` on the root url of the model MyModel will retrieve all instances of this model. This endpoint accepts filtering (see [API Authentication](authentication.md) for more information) +A `GET` on the root url of the model MyModel will retrieve all instances of this model. This endpoint accepts filtering (see [filters section](filters.md) for more information) - **Method**: `GET` @@ -189,7 +189,9 @@ This operation could fail. If the instance is related to a protected instance, i #### Get stats on model MyModel -A `GET` on the endpoint `https:///api/v1.1/my-model/stats/` allows to get stats on the given model. +A `GET` on the endpoint `https:///api/v1.1/my-model/stats/` allows to get stats on the given model. This endpoint accepts filtering (see [filters section](filters.md) for more information) + +*N.B* Some query parameters in the [filters section](filters.md) are not needed for this endpoint (hence ignored) such as `c_resp_page_size`, `page` and `c_resp_nested`. As for the `timestamp_start` and `timestamp_end`, they are used differently for this endpoint. (Please refer to the [option parameters section](#OptionalParameters) to see more information about the `timestamp_start` and `timestamp_end`, as well as the itroduction of two other query parameters) - **Method**: `GET` @@ -228,7 +230,7 @@ Example of a response: } ``` -##### Optional parameters for the stats endpoint +##### Optional parameters for the stats endpoint - **Path parameters:** The stats endpoint accepts two path parameters: `timestamp_start` and `timestamp_end` to get stats between two timestamps. You can either specify only a `timestamp_start` (the end will be the current timestamp) or both start and end: diff --git a/docs/filters.md b/docs/filters.md index 62c81c61..8c5ec59e 100644 --- a/docs/filters.md +++ b/docs/filters.md @@ -22,6 +22,7 @@ example: example: `?creation_date__range=2018-01-01,2018-12-31` returns all objects with creation date is between 1st Jan 2018 and 31st Dec 2018 - `c_resp_page_size`: The API also features pagination by the use of the query parameter `c_resp_page_size` that takes an integer representing the number of results per page that sould be returned +- `page`: If this query param is specified, it returns the results of requested page. Returns `404 NOT FOUND` is the page is not found - `c_resp_nested`: If there are relation between objects, by default the API shows the relation completely, it is nested. Example: From adf44955ea082b17d668acadba2a0c9afe74b01b Mon Sep 17 00:00:00 2001 From: Khaled Bousrih <60177005+KhaledBousrih@users.noreply.github.com> Date: Wed, 13 Oct 2021 15:23:35 +0200 Subject: [PATCH 3/3] Apply suggestions from code review --- docs/api-routes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api-routes.md b/docs/api-routes.md index 400d953d..37d49987 100644 --- a/docs/api-routes.md +++ b/docs/api-routes.md @@ -335,7 +335,7 @@ curl \ "status,archived":{ "COMPLETED,true": 45, "COMPLETED,false": 68, - "FAILED,true": 8 + "FAILED,true": 8, "FAILED,false": 4 } }