Adding the server side code for the analysis tab view (#434)

ChrisPaulBennett · web-flow · commit fac5132be9b6 · 2023-04-14T16:16:42.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -129,3 +129,6 @@ dmypy.json
 jupyterhub-proxy.pid
 jupyterhub.sqlite
 jupyterhub_cookie_secret
+
+# IDE files
+.idea/
diff --git a/.mailmap b/.mailmap
@@ -6,5 +6,6 @@ Sadie Bartholomew      <sadie.bartholomew@metoffice.gov.uk>   Sadie L. Bartholom
 Ronnie Dutta           <ronnie.dutta@metoffice.gov.uk>                                <61982285+MetRonnie@users.noreply.github.com>
 Mel Hall               <mel.hall@metoffice.gov.uk>                                    <37735232+datamel@users.noreply.github.com>
 Tim Pillinger           <tim.pillinger@metoffice.gov.uk>                              <26465611+wxtim@users.noreply.github.com>
-Stephen McVeigh        <smcveigh941@gmail.com>                                        <stephenmcveigh@users-MacBook-Pro.local>                  
+Stephen McVeigh        <smcveigh941@gmail.com>                                        <stephenmcveigh@users-MacBook-Pro.local>
+Christopher Bennett    <christopher.bennett@metoffice.gov.uk>
 github-actions[bot]    <github-actions@noreply.github.com>                            <41898282+github-actions[bot]@users.noreply.github.com> 
diff --git a/CHANGES.md b/CHANGES.md
@@ -15,6 +15,11 @@ ones in. -->
 
 <!-- [Updated cylc-ui to x.y.z](https://github.com/cylc/cylc-ui/blob/master/CHANGES.md) -->
 
+### Enhancements
+
+[#434](https://github.com/cylc/cylc-uiserver/pull/434)- Added the server side
+code for the analysis view in the UI
+
 ### Fixes
 
 [#431](https://github.com/cylc/cylc-uiserver/pull/431)- Adds an additional
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -51,6 +51,7 @@ below.
  - Ronnie Dutta
  - David Matthews
  - Mel Hall
+ - Christopher Bennett
  <!-- end-shortlog -->
 
 (All contributors are identifiable with email addresses in the git version
diff --git a/cylc/uiserver/schema.py b/cylc/uiserver/schema.py
@@ -30,12 +30,17 @@
     CyclePoint,
     GenericResponse,
     ID,
+    SortArgs,
+    Task,
     Mutations,
     Queries,
+    process_resolver_info,
+    STRIP_NULL_DEFAULT,
     Subscriptions,
     WorkflowID,
     _mut_field,
-    sstrip
+    sstrip,
+    get_nodes_all
 )
 from cylc.uiserver.resolvers import Resolvers
 
@@ -254,6 +259,227 @@ class Arguments:
     result = GenericScalar()
 
 
+async def get_jobs(root, info, **kwargs):
+    if kwargs['live']:
+        return await get_nodes_all(root, info, **kwargs)
+
+    _, field_ids = process_resolver_info(root, info, kwargs)
+
+    if hasattr(kwargs, 'id'):
+        kwargs['ids'] = [kwargs.get('id')]
+    if field_ids:
+        if isinstance(field_ids, str):
+            field_ids = [field_ids]
+        elif isinstance(field_ids, dict):
+            field_ids = list(field_ids)
+        kwargs['ids'] = field_ids
+    elif field_ids == []:
+        return []
+
+    for arg in ('ids', 'exids'):
+        # live objects can be represented by a universal ID
+        kwargs[arg] = [Tokens(n_id, relative=True) for n_id in kwargs[arg]]
+    kwargs['workflows'] = [
+        Tokens(w_id) for w_id in kwargs['workflows']]
+    kwargs['exworkflows'] = [
+        Tokens(w_id) for w_id in kwargs['exworkflows']]
+
+    return await list_jobs(kwargs)
+
+
+async def list_jobs(args):
+    if not args['workflows']:
+        raise Exception('At least one workflow must be provided.')
+    from cylc.flow.rundb import CylcWorkflowDAO
+    from cylc.flow.pathutil import get_workflow_run_dir
+    from cylc.flow.workflow_files import WorkflowFiles
+    jobs = []
+    for workflow in args['workflows']:
+        db_file = get_workflow_run_dir(
+            workflow['workflow'],
+            WorkflowFiles.LogDir.DIRNAME,
+            "db"
+        )
+        with CylcWorkflowDAO(db_file, is_public=True) as dao:
+            conn = dao.connect()
+            jobs.extend(make_query(conn, workflow))
+    return jobs
+
+
+def make_query(conn, workflow):
+
+    # TODO: support all arguments including states
+    # https://github.com/cylc/cylc-uiserver/issues/440
+    tasks = []
+    for row in conn.execute('''
+SELECT
+    name,
+    cycle,
+    submit_num,
+    submit_status,
+    time_run,
+    time_run_exit,
+    job_id,
+    platform_name,
+    time_submit,
+
+    -- Calculate Queue time stats
+    MIN(queue_time) AS min_queue_time,
+    AVG(queue_time) AS mean_queue_time,
+    MAX(queue_time) AS max_queue_time,
+    AVG(queue_time * queue_time) AS mean_squares_queue_time,
+    MAX(CASE WHEN queue_time_quartile = 1 THEN queue_time END)
+     queue_quartile_1,
+    MAX(CASE WHEN queue_time_quartile = 2 THEN queue_time END)
+    queue_quartile_2,
+    MAX(CASE WHEN queue_time_quartile = 3 THEN queue_time END)
+    queue_quartile_3,
+
+    -- Calculate Run time stats
+    MIN(run_time) AS min_run_time,
+    AVG(run_time) AS mean_run_time,
+    MAX(run_time) AS max_run_time,
+    AVG(run_time * run_time) AS mean_squares_run_time,
+    MAX(CASE WHEN run_time_quartile = 1 THEN run_time END) run_quartile_1,
+    MAX(CASE WHEN run_time_quartile = 2 THEN run_time END) run_quartile_2,
+    MAX(CASE WHEN run_time_quartile = 3 THEN run_time END) run_quartile_3,
+
+    -- Calculate Total time stats
+    MIN(total_time) AS min_total_time,
+    AVG(total_time) AS mean_total_time,
+    MAX(total_time) AS max_total_time,
+    AVG(total_time * total_time) AS mean_squares_total_time,
+    MAX(CASE WHEN total_time_quartile = 1 THEN total_time END)
+    total_quartile_1,
+    MAX(CASE WHEN total_time_quartile = 2 THEN total_time END)
+    total_quartile_2,
+    MAX(CASE WHEN total_time_quartile = 3 THEN total_time END)
+    total_quartile_3,
+
+    COUNT(*) AS n
+
+FROM
+    (SELECT
+        *,
+        NTILE (4) OVER (PARTITION BY name ORDER BY queue_time)
+        queue_time_quartile,
+        NTILE (4) OVER (PARTITION BY name ORDER BY run_time)
+        run_time_quartile,
+        NTILE (4) OVER (PARTITION BY name ORDER BY total_time)
+        total_time_quartile
+    FROM
+        (SELECT
+            *,
+            STRFTIME('%s', time_run_exit) -
+            STRFTIME('%s', time_submit) AS total_time,
+            STRFTIME('%s', time_run_exit) -
+            STRFTIME('%s', time_run) AS run_time,
+            STRFTIME('%s', time_run) -
+            STRFTIME('%s', time_submit) AS queue_time
+        FROM
+            task_jobs))
+WHERE
+    run_status = 0
+GROUP BY
+    name;
+'''):
+        tasks.append({
+            'id': workflow.duplicate(
+                cycle=row[1],
+                task=row[0],
+                job=row[2]
+            ),
+            'name': row[0],
+            'cycle_point': row[1],
+            'submit_num': row[2],
+            'state': row[3],
+            'started_time': row[4],
+            'finished_time': row[5],
+            'job_ID': row[6],
+            'platform': row[7],
+            'submitted_time': row[8],
+            # Queue time stats
+            'min_queue_time': row[9],
+            'mean_queue_time': row[10],
+            'max_queue_time': row[11],
+            'std_dev_queue_time': (row[12] - row[10]**2)**0.5,
+            'queue_quartiles': [row[13],
+                                row[13] if row[14] is None else row[14],
+                                row[13] if row[15] is None else row[15]],
+            # Run time stats
+            'min_run_time': row[16],
+            'mean_run_time': row[17],
+            'max_run_time': row[18],
+            'std_dev_run_time': (row[19] - row[17]**2)**0.5,
+            'run_quartiles': [row[20],
+                              row[20] if row[21] is None else row[21],
+                              row[20] if row[22] is None else row[22]],
+            # Total
+            'min_total_time': row[23],
+            'mean_total_time': row[24],
+            'max_total_time': row[25],
+            'std_dev_total_time': (row[26] - row[24] ** 2) ** 0.5,
+            'total_quartiles': [row[27],
+                                row[27] if row[28] is None else row[28],
+                                row[27] if row[29] is None else row[29]],
+
+            'count': row[30]
+        })
+
+    return tasks
+
+
+class UISTask(Task):
+
+    platform = graphene.String()
+    min_total_time = graphene.Int()
+    mean_total_time = graphene.Int()
+    max_total_time = graphene.Int()
+    std_dev_total_time = graphene.Int()
+    queue_quartiles = graphene.List(
+        graphene.Int,
+        description=sstrip('''
+            List containing the first, second,
+            third and forth quartile queue times.'''))
+    min_queue_time = graphene.Int()
+    mean_queue_time = graphene.Int()
+    max_queue_time = graphene.Int()
+    std_dev_queue_time = graphene.Int()
+    run_quartiles = graphene.List(
+        graphene.Int,
+        description=sstrip('''
+            List containing the first, second,
+            third and forth quartile run times.'''))
+    min_run_time = graphene.Int()
+    mean_run_time = graphene.Int()
+    max_run_time = graphene.Int()
+    std_dev_run_time = graphene.Int()
+    total_quartiles = graphene.List(
+        graphene.Int,
+        description=sstrip('''
+            List containing the first, second,
+            third and forth quartile total times.'''))
+    count = graphene.Int()
+
+
+class UISQueries(Queries):
+
+    tasks = graphene.List(
+        UISTask,
+        description=Task._meta.description,
+        live=graphene.Boolean(default_value=True),
+        strip_null=STRIP_NULL_DEFAULT,
+        resolver=get_jobs,
+        workflows=graphene.List(ID, default_value=[]),
+        exworkflows=graphene.List(ID, default_value=[]),
+        ids=graphene.List(ID, default_value=[]),
+        exids=graphene.List(ID, default_value=[]),
+        mindepth=graphene.Int(default_value=-1),
+        maxdepth=graphene.Int(default_value=-1),
+        sort=SortArgs(default_value=None),
+    )
+
+
 class UISSubscriptions(Subscriptions):
     # Example graphiql workflow log subscription:
     # subscription {
@@ -320,7 +546,7 @@ class UISMutations(Mutations):
 
 
 schema = graphene.Schema(
-    query=Queries,
+    query=UISQueries,
     subscription=UISSubscriptions,
     mutation=UISMutations
 )
diff --git a/cylc/uiserver/tests/test_workflow_retrieval.py b/cylc/uiserver/tests/test_workflow_retrieval.py

-Original file line number
+Diff line change
 Ronnie Dutta           <[email protected]>                                <[email protected]>
 Mel Hall               <[email protected]>                                    <[email protected]>
 Tim Pillinger           <[email protected]>                              <[email protected]>
 -Stephen McVeigh        <[email protected]>                                        <[email protected]>
 +Stephen McVeigh        <[email protected]>                                        <[email protected]>
 +Christopher Bennett    <[email protected]>
 github-actions[bot]    <[email protected]>                            <41898282+github-actions[bot]@users.noreply.github.com>