OpenSourcePolitics · nbckk9 · May 7, 2024 · May 7, 2024 · May 22, 2024 · May 22, 2024
diff --git a/data_utils/dashboard_copy/README.md b/data_utils/dashboard_copy/README.md
@@ -0,0 +1,36 @@
+# Dashboard copy
+The goal of this script is to be able to : 
+* first copy a dashboard to another collection 
+* then to update each card in the dashboard to use another source database
+
+The copy of the dashboard is done using the dashboard copy API endpoint.
+
+The update of the source database is done using the card update API endpoint. 
+To do so we need to: 
+* Get the list of dashcards in the dashboard by calling the get dashboard API endpoint
+* Then we filter only the dashcards that needs to be updated : those which contain an actual card
+* For each card, we need to update the the database id, the tables ids, and the fields ids
+
+## Limitations
+- The script is not currently designed to work with : 
+  - dashboards with cards that have multiple databases as sources (eg : one card uses data from the Lyon database and another one uses data from Angers database) 
+  - cards that use Metabase Models as data source
+
+## How to 
+1. Before running script
+    - Source the main `.env` file at the root of `data_utils` (cf. [README at the root of project](./../../README.md))
+    - Get the *id of the dashboard* you wish to copy
+    - Get the *id of the collection* you wish to copy it to
+2. Run `poetry run dashboard_copy`
+3. Once you copied the dashboard
+    - Get the *id of the dashboard* you just copied 
+    - Get the *id of the database* you want your dashboard to use as source database
+    - Get the database schema you want to use : Usually 'prod' or 'dev'
+4. Run `poetry run replace_dashboard_source_db` 🎉
+
+
+## Possible improvements 
+- Some work could be done to limit the api calls to Metabase
+  - The script currently calls the Metabase API for each occurence of any field it needs to replace, a cache system could be added to avoid calling the API when the useful data previously has been retrieved
+- Add more logging
+- Handling
diff --git a/data_utils/dashboard_copy/__init__.py b/data_utils/dashboard_copy/__init__.py
diff --git a/data_utils/dashboard_copy/main.py b/data_utils/dashboard_copy/main.py
@@ -0,0 +1,203 @@
+from ..utils import MTB, modify_dict
+import re
+import json
+import logging
+
+# Setup basic configuration for logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+MESSAGE_DASHBOARD_ID = 'Enter dashboard id to be copied: '
+MESSAGE_COLLECTION_ID = 'Enter the collection id where the dashboard will be copied to: '
+MESSAGE_DASHBOARD_NAME = 'Enter the new dashboard name: '
+MESSAGE_DASHBOARD_DESCRIPTION = 'Optional - Enter the new dashboard description: '
+MESSAGE_DASHBOARD_ID_REPLACE_DB = 'Enter dashboard id: '
+MESSAGE_NEW_DB = "Enter the id of the database: "
+MESSAGE_SCHEMA = "Enter the schema: "
+
+
+def get_dashboard(dashboard_id):
+    return MTB.get(f'/api/dashboard/{dashboard_id}')
+
+def get_database_info(database_id):
+    return MTB.get(f'/api/database/{database_id}?include=tables')
+
+def get_field_info(field_id):
+    return MTB.get(f'/api/field/{field_id}')
+
+def get_all_db_ids(dashboard):
+    if 'dashcards' in dashboard and isinstance(dashboard['dashcards'], list):
+        return list(set([dashcard['card']['database_id'] for dashcard in dashboard['dashcards'] if 'card' in dashcard and 'database_id' in dashcard['card']]))
+    return []
+
+def get_tables_info(database_id, schema=None):
+    database_info = get_database_info(database_id)
+    if schema: 
+        return [{'table_id': table['id'], 'table_name': table['name'], 'schema': table['schema']} for table in database_info['tables'] if table['schema'] == schema]
+    else: 
+        return [{'table_id': table['id'], 'table_name': table['name'], 'schema': table['schema']} for table in database_info['tables']]
+
+def extract_field_integers(json_str):
+    field_values = []
+    matches = re.findall(r'"field",\s*(\d+)', json_str)
+    for match in matches:
+        field_values.append(int(match))
+    field_values = list(set(field_values))
+    return field_values
+
+def get_new_field_id(old_field_id, table_id_mapping):
+    field = get_field_info(old_field_id)
+    old_table_id = field['table']['id']
+    new_table_id = table_id_mapping[old_table_id]
+    old_table = MTB.get(f'/api/table/{old_table_id}/query_metadata')
+    new_table = MTB.get(f'/api/table/{new_table_id}/query_metadata')
+    if not old_table:
+        logging.error(f"Failed to fetch table metadata for table {old_table_id}")
+        return
+    elif not new_table:
+        logging.error(f"Failed to fetch table metadata for table {new_table_id}")
+        return
+    field_name_to_new_id = {field['name']: field['id'] for field in new_table['fields']}
+    field_id_to_name = {field['id']: field['name'] for field in old_table['fields']}
+    if old_field_id in field_id_to_name and field_id_to_name[old_field_id] in field_name_to_new_id:
+        new_field_id = field_name_to_new_id[field_id_to_name[old_field_id]]
+        return new_field_id
+    return None
+
+def update_object_fields(object, table_id_mapping):
+    object_json_str = json.dumps(object)
+    field_ids_to_replace = extract_field_integers(object_json_str)
+    for field_id in field_ids_to_replace:
+        new_field_id = get_new_field_id(field_id, table_id_mapping)
+        object_json_str = re.sub(rf'"field",\s*{field_id}', f'"field", {new_field_id}', object_json_str)
+    object = json.loads(object_json_str)
+    return object
+
+def update_card_db(card_id, db_id, table_id_mapping):
+    try:
+        card_to_update = MTB.get(f'/api/card/{card_id}')
+        if not card_to_update:
+            logging.error(f"Failed to fetch card {card_id}")
+            return
+
+        modify_dict(card_to_update, ['dataset_query', 'database'], db_id)
+
+        # We handle the cards that work with non SQL query
+        if card_to_update["query_type"] == "query":
+            old_table_id = card_to_update["table_id"]
+            new_table_id = table_id_mapping[old_table_id]
+            modify_dict(card_to_update, ['dataset_query', 'query', 'source-table'], new_table_id)
+            # If the cards work with joins :
+            if card_to_update["dataset_query"]["query"].get("joins"):
+                for join in card_to_update["dataset_query"]["query"]["joins"]:
+                    old_table_id = join["source-table"]
+                    new_table_id = table_id_mapping[old_table_id]
+                    modify_dict(join, ['source-table'], new_table_id)
+            card_to_update = update_object_fields(card_to_update, table_id_mapping)
+
+        # We handle the cards that work with a SQL query
+        elif card_to_update["query_type"] == "native":
+            card_to_update["dataset_query"]["native"]["template-tags"] = update_object_fields(card_to_update["dataset_query"]["native"]["template-tags"], table_id_mapping)
+
+        response = MTB.put(f'/api/card/{card_id}', json=card_to_update)
+        if response:
+            logging.info(f"Card {card_id} updated successfully.")
+        else:
+            logging.error(f"Failed to update card {card_id}")
+    except Exception as e:
+        logging.error(f"Unexpected error during card update: {str(e)}")
+
+def update_dashcard_filters(dashcard, table_id_mapping):
+    if dashcard.get("parameter_mappings"):
+        for mapping in dashcard.get("parameter_mappings", []):
+            mapping["target"] = update_object_fields(mapping["target"], table_id_mapping)
+    return dashcard 
+
+def update_dashboard(dashboard_id, dashboard):
+    response = MTB.put(f'/api/dashboard/{dashboard_id}', json=dashboard)
+    if response:
+        logging.info(f"Dashboard {dashboard_id} updated successfully.")
+    else:
+        logging.error(f"Failed to update dashboard {dashboard_id}")
+
+def replace_dashboard_source_db():
+    dashboard_id = input(MESSAGE_DASHBOARD_ID_REPLACE_DB).strip()
+    if not dashboard_id.isdigit():
+        raise ValueError("The Dashboard ID must be a numeric value.")
+    dashboard_id = int(dashboard_id)
+
+    dashboard = get_dashboard(dashboard_id)
+
+    old_db_ids = get_all_db_ids(dashboard)
+    if len(old_db_ids) > 1:
+        logging.error("Multiple database IDs found. This script does not support multiple source databases.")
+        return
+
+    old_db_id = old_db_ids[0]
+    old_tables = get_tables_info(old_db_id)
+
+    new_db_id = input(MESSAGE_NEW_DB).strip()
+    if not new_db_id.isdigit():
+        raise ValueError("The Database ID must be a numeric value.")
+    new_db_id = int(new_db_id)
+
+    schema_name = input(MESSAGE_SCHEMA).strip()
+    if not schema_name:
+        raise ValueError("Schema Name is required.")
+
+    # We build the tables mapping between new and old tables id
+    new_tables = get_tables_info(new_db_id, schema_name) 
+    table_id_mapping = {}
+    for old_table in old_tables:
+        for new_table in new_tables:
+            if old_table['table_name'] == new_table['table_name'] and old_table['schema'] == new_table['schema']:
+                table_id_mapping[old_table['table_id']] = new_table['table_id']
+
+    # If there are some dashcards in the dashboard, we update them
+    if 'dashcards' in dashboard and isinstance(dashboard['dashcards'], list):
+        updated_cards = []
+        for dashcard in dashboard['dashcards']:
+            # Check if the dashcard contains a card
+            if 'card' in dashcard and 'database_id' in dashcard['card']:
+                card = dashcard["card"]
+                if card["id"] not in updated_cards:
+                    update_card_db(card["id"], new_db_id, table_id_mapping)
+                    updated_cards.append(card["id"])
+                dashcard = update_dashcard_filters(dashcard, table_id_mapping)
+        update_dashboard(dashboard_id, dashboard)
+
+
+def dashboard_copy():
+    try:
+        dashboard_id = input(MESSAGE_DASHBOARD_ID).strip()
+        if not dashboard_id.isdigit():
+            raise ValueError("Dashboard ID must be a numeric value.")
+        dashboard_id = int(dashboard_id)
+
+        collection_id = input(MESSAGE_COLLECTION_ID).strip()
+        if not collection_id.isdigit():
+            raise ValueError("Collection ID must be a numeric value.")
+        collection_id = int(collection_id)
+
+        dashboard_name = input(MESSAGE_DASHBOARD_NAME).strip()
+        if not dashboard_name:
+            raise ValueError("Dashboard Name is required.")
+
+        dashboard_description = input(MESSAGE_DASHBOARD_DESCRIPTION).strip()
+
+        payload = {
+            "name": dashboard_name,
+            "description": dashboard_description,
+            "collection_id": collection_id,
+            "is_deep_copy": True
+        }
+
+        response = MTB.post(f'/api/dashboard/{dashboard_id}/copy', json=payload)
+        if response:
+            logging.info("Dashboard copied successfully.")
+        else:
+            logging.error(f"Failed to copy dashboard: {dashboard_id}")
+    except ValueError as ve:
+        logging.error(f"Input error: {str(ve)}")
+    except Exception as e:
+        logging.error(f"Unexpected error: {str(e)}")
+