fivetran · fivetran-catfritz · Oct 16, 2024 · Oct 7, 2024 · Oct 7, 2024 · Oct 8, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,28 @@
+# dbt_jira v0.18.0
+[PR #131](https://github.com/fivetran/dbt_jira/pull/131) contains the following updates:
+## Breaking Changes
+> Since the following changes are breaking, a `--full-refresh` after upgrading will be required.
+
+- Changed the partitioning from days to weeks in the following models for BigQuery and Databricks All Purpose Cluster destinations:
+  - `int_jira__pivot_daily_field_history`
+    - Added field `valid_starting_at_week` for use with the new weekly partition logic.
+  - `jira__daily_issue_field_history`
+    - Added field `date_week` for use with the new weekly partition logic.
+- This adjustment reduces the total number of partitions, helping avoid partition limit issues in certain warehouses.
+- For Databricks All Purpose Cluster destinations, updated the `file_format` to `delta` for improved performance.
+- Updated the default materialization of `int_jira__issue_calendar_spine` from incremental to ephemeral to improve performance and maintainability.
+
+## Documentation Update
+- Updated [README](https://github.com/fivetran/dbt_jira/blob/main/README.md#lookback-window) with the new default of 1 week for the `lookback_window` variable.
+
+## Under the Hood
+- Replaced the deprecated `dbt.current_timestamp_backcompat()` function with `dbt.current_timestamp()` to ensure all timestamps are captured in UTC for the following models:
+  - `int_jira__issue_calendar_spine`
+  - `int_jira__issue_join`
+  - `jira__issue_enhanced`
+- Updated model `int_jira__issue_calendar_spine` to prevent errors during compilation.
+- Added consistency tests for the `jira__daily_issue_field_history` and `jira__issue_enhanced` models.
+
 # dbt_jira v0.17.0
 [PR #127](https://github.com/fivetran/dbt_jira/pull/127) contains the following updates:
 

diff --git a/README.md b/README.md
@@ -66,7 +66,7 @@ Include the following jira package version in your `packages.yml` file:
 ```yaml
 packages:
   - package: fivetran/jira
-    version: [">=0.17.0", "<0.18.0"]
+    version: [">=0.18.0", "<0.19.0"]
 
 ```
 ### Step 3: Define database and schema variables
@@ -142,14 +142,14 @@ vars:
 ```
 
 #### Lookback Window
-Records from the source can sometimes arrive late. Since several of the models in this package are incremental, by default we look back 3 days to ensure late arrivals are captured while avoiding the need for frequent full refreshes. While the frequency can be reduced, we still recommend running `dbt --full-refresh` periodically to maintain data quality of the models.
+Records from the source may occasionally arrive late. To handle this, we implement a one-week lookback in our incremental models to capture late arrivals without requiring frequent full refreshes. The lookback is structured in weekly increments, as the incremental logic is based on weekly periods. While the frequency of full refreshes can be reduced, we still recommend running `dbt --full-refresh` periodically to maintain data quality of the models. 
 
 To change the default lookback window, add the following variable to your `dbt_project.yml` file:
 
 ```yml
 vars:
   jira:
-    lookback_window: number_of_days # default is 3
+    lookback_window: number_of_weeks # default is 1
 ```
 
 ### (Optional) Step 6: Orchestrate your models with Fivetran Transformations for dbt Core™

diff --git a/dbt_project.yml b/dbt_project.yml
@@ -1,5 +1,5 @@
 name: 'jira'
-version: '0.17.0'
+version: '0.18.0'
 config-version: 2
 require-dbt-version: [">=1.3.0", "<2.0.0"]
 vars:

diff --git a/docs/catalog.json b/docs/catalog.json
diff --git a/docs/index.html b/docs/index.html
diff --git a/docs/manifest.json b/docs/manifest.json
diff --git a/docs/run_results.json b/docs/run_results.json
diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
@@ -1,5 +1,5 @@
 name: 'jira_integration_tests'
-version: '0.17.0'
+version: '0.18.0'
 config-version: 2
 profile: 'integration_tests'
 
@@ -28,11 +28,13 @@ vars:
     jira_user_identifier: "user"
     jira_version_identifier: "version"
 
+  # Comment out the below when generating docs
   issue_field_history_columns: ['summary', 'story points', 'components']
 
 models:
   jira:
     +schema: "{{ 'jira_integrations_tests_sqlw' if target.name == 'databricks-sql' else 'jira' }}"
+    # +schema: "jira_{{ var('directed_schema','dev') }}"
 
 seeds:
   jira_integration_tests:

diff --git a/integration_tests/tests/consistency/consistency_daily_issue_field_history.sql b/integration_tests/tests/consistency/consistency_daily_issue_field_history.sql
@@ -0,0 +1,58 @@
+
+{{ config(
+    tags="fivetran_validations",
+    enabled=var('fivetran_validation_tests_enabled', false)
+) }}
+
+with prod as (
+    select
+        date_day,
+        issue_id,
+        status,
+        status_id,
+        sprint,
+        issue_day_id
+    from {{ target.schema }}_jira_prod.jira__daily_issue_field_history
+),
+
+dev as (
+    select
+        date_day,
+        issue_id,
+        status,
+        status_id,
+        sprint,
+        issue_day_id
+    from {{ target.schema }}_jira_dev.jira__daily_issue_field_history
+),
+
+prod_not_in_dev as (
+    -- rows from prod not found in dev
+    select * from prod
+    except distinct
+    select * from dev
+),
+
+dev_not_in_prod as (
+    -- rows from dev not found in prod
+    select * from dev
+    except distinct
+    select * from prod
+),
+
+final as (
+    select
+        *,
+        'from prod' as source
+    from prod_not_in_dev
+
+    union all -- union since we only care if rows are produced
+
+    select
+        *,
+        'from dev' as source
+    from dev_not_in_prod
+)
+
+select *
+from final
diff --git a/integration_tests/tests/consistency/consistency_issue_enhanced.sql b/integration_tests/tests/consistency/consistency_issue_enhanced.sql
@@ -0,0 +1,46 @@
+
+{{ config(
+    tags="fivetran_validations",
+    enabled=var('fivetran_validation_tests_enabled', false)
+) }}
+
+with prod as (
+    select *
+    from {{ target.schema }}_jira_prod.jira__issue_enhanced
+),
+
+dev as (
+    select *
+    from {{ target.schema }}_jira_dev.jira__issue_enhanced
+),
+
+prod_not_in_dev as (
+    -- rows from prod not found in dev
+    select * from prod
+    except distinct
+    select * from dev
+),
+
+dev_not_in_prod as (
+    -- rows from dev not found in prod
+    select * from dev
+    except distinct
+    select * from prod
+),
+
+final as (
+    select
+        *,
+        'from prod' as source
+    from prod_not_in_dev
+
+    union all -- union since we only care if rows are produced
+
+    select
+        *,
+        'from dev' as source
+    from dev_not_in_prod
+)
+
+select *
+from final
diff --git a/models/intermediate/field_history/int_jira__issue_calendar_spine.sql b/models/intermediate/field_history/int_jira__issue_calendar_spine.sql
@@ -1,43 +1,32 @@
-{{
-    config(
-        materialized='table' if jira.jira_is_databricks_sql_warehouse() else 'incremental',
-        partition_by = {'field': 'date_day', 'data_type': 'date'}
-            if target.type not in ['spark', 'databricks'] else ['date_day'],
-        cluster_by = ['date_day', 'issue_id'],
-        unique_key='issue_day_id',
-        incremental_strategy = 'insert_overwrite' if target.type in ('bigquery', 'databricks', 'spark') else 'delete+insert',
-        file_format='delta' if jira.jira_is_databricks_sql_warehouse() else 'parquet'
-    )
-}}
-
 with spine as (
 
-    {% if execute %}
+    {% if execute and flags.WHICH in ('run', 'build') %}
     {% set first_date_query %}
     -- start at the first created issue
-        select  min( created ) as min_date from {{ source('jira','issue') }}
+        select  
+            coalesce(
+                min(cast(created as date)),
+                cast({{ dbt.dateadd("month", -1, "current_date") }} as date)
+            ) as min_date
+        from {{ source('jira','issue') }}
     {% endset %}
-    {% set first_date = run_query(first_date_query).columns[0][0]|string %}
+
+    {%- set first_date = dbt_utils.get_single_value(first_date_query) %}
 
     {% else %} {% set first_date = "2016-01-01" %}
     {% endif %}
 
-    select * 
+    select
+        cast(date_day as date) as date_day
     from (
         {{
             dbt_utils.date_spine(
                 datepart = "day", 
-                start_date =  "cast('" ~ first_date[0:10] ~ "' as date)", 
-                end_date = dbt.dateadd("week", 1, dbt.current_timestamp_in_utc_backcompat())
+                start_date = "cast('" ~ first_date ~ "' as date)",
+                end_date = dbt.dateadd("week", 1, dbt.current_timestamp())
             )   
-        }} 
+        }}
     ) as date_spine
-
-    {% if is_incremental() %}
-    -- compare to the earliest possible open_until date so that if a resolved issue is updated after a long period of inactivity, we don't need a full refresh
-    -- essentially we need to be able to backfill
-    where cast( date_day as date) >= (select min(earliest_open_until_date) from {{ this }} )
-    {% endif %}
 ),
 
 issue_history_scd as (
@@ -53,7 +42,9 @@ issue_dates as (
         cast( {{ dbt.date_trunc('day', 'issue.created_at') }} as date) as created_on,
         -- resolved_at will become null if an issue is marked as un-resolved. if this sorta thing happens often, you may want to run full-refreshes of the field_history models often
         -- if it's not resolved include everything up to today. if it is, look at the last time it was updated 
-        cast({{ dbt.date_trunc('day', 'case when issue.resolved_at is null then ' ~ dbt.current_timestamp_in_utc_backcompat() ~ ' else cast(issue_history_scd.valid_starting_on as ' ~ dbt.type_timestamp() ~ ') end') }} as date) as open_until
+        cast({{ dbt.date_trunc('day',
+            'case when issue.resolved_at is null then ' ~ dbt.current_timestamp() ~ ' else cast(issue_history_scd.valid_starting_on as ' ~ dbt.type_timestamp() ~ ') end') }}
+            as date) as open_until
     from issue_history_scd
     left join {{ var('issue') }} as issue
         on issue_history_scd.issue_id = issue.issue_id
@@ -62,7 +53,7 @@ issue_dates as (
 issue_spine as (
 
     select 
-        cast(spine.date_day as date) as date_day,
+        spine.date_day,
         issue_dates.issue_id,
         -- will take the table-wide min of this in the incremental block at the top of this model
         min(issue_dates.open_until) as earliest_open_until_date
@@ -72,13 +63,6 @@ issue_spine as (
         issue_dates.created_on <= spine.date_day
         and {{ dbt.dateadd('month', var('jira_issue_history_buffer', 1), 'issue_dates.open_until') }} >= spine.date_day
         -- if we cut off issues, we're going to have to do a full refresh to catch issues that have been un-resolved
-
-    {% if is_incremental() %}
-    -- This is necessary to insert only new rows during an incremental run. The above operations require more rows for backfilling purposes.
-    where spine.date_day >= 
-        {{ jira.jira_lookback(from_date='max(date_day)', datepart='day', interval=var('lookback_window', 3)) }}
-    {% endif %}
-
     group by 1,2
 ),
 
@@ -88,11 +72,12 @@ surrogate_key as (
         date_day,
         issue_id,
         {{ dbt_utils.generate_surrogate_key(['date_day','issue_id']) }} as issue_day_id,
-        earliest_open_until_date
+        earliest_open_until_date,
+        cast({{ dbt.date_trunc('week', 'earliest_open_until_date') }} as date) as earliest_open_until_week
 
     from issue_spine
 
-    where date_day <= cast( {{ dbt.date_trunc('day',dbt.current_timestamp_in_utc_backcompat()) }} as date)
+    where date_day <= cast( {{ dbt.current_timestamp() }} as date)
 )
 
 select *

diff --git a/models/intermediate/field_history/int_jira__pivot_daily_field_history.sql b/models/intermediate/field_history/int_jira__pivot_daily_field_history.sql
@@ -1,12 +1,12 @@
 {{
     config(
         materialized='table' if jira.jira_is_databricks_sql_warehouse() else 'incremental',
-        partition_by = {'field': 'valid_starting_on', 'data_type': 'date'}
-            if target.type not in ['spark','databricks'] else ['valid_starting_on'],
-        cluster_by = ['valid_starting_on', 'issue_id'],
+        partition_by = {'field': 'valid_starting_at_week', 'data_type': 'date'}
+            if target.type not in ['spark','databricks'] else ['valid_starting_at_week'],
+        cluster_by = ['valid_starting_at_week'],
         unique_key='issue_day_id',
         incremental_strategy = 'insert_overwrite' if target.type in ('bigquery', 'databricks', 'spark') else 'delete+insert',
-        file_format='delta' if jira.jira_is_databricks_sql_warehouse() else 'parquet'
+        file_format='delta'
     )
 }}
 
@@ -20,8 +20,8 @@ with issue_field_history as (
     from {{ ref('int_jira__issue_field_history') }}
 
     {% if is_incremental() %}
-    {% set max_valid_starting_on = jira.jira_lookback(from_date='max(valid_starting_on)', datepart='day', interval=var('lookback_window', 3)) %}
-    where cast(updated_at as date) >= {{ max_valid_starting_on }}
+    {% set max_valid_starting_at_week = jira.jira_lookback(from_date='max(valid_starting_on)', datepart='week', interval=var('lookback_window', 1)) %}
+    where cast(updated_at as date) >= {{ max_valid_starting_at_week }}
     {% endif %}
 ),
 
@@ -32,7 +32,7 @@ issue_multiselect_history as (
     from {{ ref('int_jira__issue_multiselect_history') }}
 
     {% if is_incremental() %}
-    where cast(updated_at as date) >= {{ max_valid_starting_on }}
+    where cast(updated_at as date) >= {{ max_valid_starting_at_week }}
     {% endif %}
 ),
 
@@ -151,8 +151,9 @@ pivot_out as (
     -- only days on which a field value was actively changed will have a non-null value. the nulls will need to 
     -- be backfilled in the final jira__daily_issue_field_history model
     select 
-        valid_starting_on, 
+        valid_starting_on,
         issue_id,
+        cast({{ dbt.date_trunc('week', 'valid_starting_at') }} as date) as valid_starting_at_week,
         max(case when lower(field_id) = 'status' then field_value end) as status,
         max(case when lower(field_name) = 'sprint' then field_value end) as sprint
 
@@ -163,7 +164,7 @@ pivot_out as (
 
     from int_jira__daily_field_history
 
-    group by 1,2
+    {{ dbt_utils.group_by(3) }}
 ),
 
 final as (

diff --git a/models/intermediate/int_jira__issue_join.sql b/models/intermediate/int_jira__issue_join.sql
@@ -60,9 +60,9 @@ issue_comments as (
 {% endif %}
 
 issue_assignments_and_resolutions as (
-  
-  select *
-  from {{ ref('int_jira__issue_assign_resolution')}}
+
+    select *
+    from {{ ref('int_jira__issue_assign_resolution')}}
 
 ),
 
@@ -95,9 +95,9 @@ join_issue as (
         ,issue_sprint.sprint_started_at
         ,issue_sprint.sprint_ended_at
         ,issue_sprint.sprint_completed_at
-        ,coalesce(issue_sprint.sprint_started_at <= {{ dbt.current_timestamp_backcompat() }}
-          and coalesce(issue_sprint.sprint_completed_at, {{ dbt.current_timestamp_backcompat() }}) >= {{ dbt.current_timestamp_backcompat() }}  
-          , false) as is_active_sprint -- If sprint doesn't have a start date, default to false. If it does have a start date, but no completed date, this means that the sprint is active. The ended_at timestamp is irrelevant here.
+        ,coalesce(issue_sprint.sprint_started_at <= {{ dbt.current_timestamp() }}
+            and coalesce(issue_sprint.sprint_completed_at, {{ dbt.current_timestamp() }}) >= {{ dbt.current_timestamp() }}  
+            , false) as is_active_sprint -- If sprint doesn't have a start date, default to false. If it does have a start date, but no completed date, this means that the sprint is active. The ended_at timestamp is irrelevant here.
         {% endif %}
 
         ,issue_assignments_and_resolutions.first_assigned_at

diff --git a/models/jira.yml b/models/jira.yml
@@ -13,6 +13,8 @@ models:
     columns:
       - name: date_day
         description: Date on which the issue had these field values.
+      - name: date_week
+        description: The start date of the week corresponding to `date_day`.
       - name: issue_id
         description: Foreign key referencing the ID of the `issue` with these values.
       - name: issue_day_id