Merge pull request #34 from fivetran/feature/extend-history

fivetran-jamie · web-flow · commit df506504f901 · 2021-08-02T13:42:08.000-07:00
Feature/extend history
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -25,17 +25,6 @@ jobs:
             pip install -r integration_tests/requirements.txt
             mkdir -p ~/.dbt
             cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml
-      - run:
-          name: "Run Tests - Spark"
-          command: |
-            . venv/bin/activate
-            echo `pwd`
-            cd integration_tests
-            dbt deps
-            dbt seed --target spark --full-refresh
-            dbt run --target spark --full-refresh
-            dbt run --target spark
-            dbt test --target spark
       - run:
           name: "Run Tests - Postgres"
           command: |
diff --git a/README.md b/README.md
@@ -1,3 +1,4 @@
+[![Apache License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) ![dbt logo and version](https://img.shields.io/static/v1?logo=dbt&label=dbt-version&message=0.20.x&color=orange)
 # Jira ([docs](https://fivetran-dbt-jira.netlify.app/#!/overview))
 
 This package models Jira data from [Fivetran's connector](https://fivetran.com/docs/applications/jira). It uses data in the format described by [this ERD](https://fivetran.com/docs/applications/jira/#schemainformation).
@@ -28,7 +29,7 @@ Check [dbt Hub](https://hub.getdbt.com/) for the latest installation instruction
 # packages.yml
 packages:
   - package: fivetran/jira
-    version: [">=0.3.0", "<0.4.0"]
+    version: [">=0.4.0", "<0.5.0"]
 ```
 
 ## Configuration
@@ -61,10 +62,26 @@ vars:
     issue_field_history_columns: ['the', 'list', 'of', 'field', 'IDs']
 ```
 
-> Note: `sprint` and `status` will always be tracked, as they are necessary for creating common agile reports. 
+> Note: `sprint` and `status` will always be tracked, as they are necessary for creating common agile reports.
 
-### Disabling models
+### Extending an Issue's History Period
+This package will create a row in `jira__daily_issue_field_history` for each day that an issue is open or being updated. For currently open issues, the latest date will be the current date. For closed issues, the latest date will be  when the issue was last resolved or updated in any way, plus a _buffer period_ that is by default equal to 1 month. This buffer exists for two reasons:
+1. The daily issue field history model is materialized incrementally, and if your closed issues are being opened or updated often, this will avoid requiring a full refresh to catch these changes.
+2. You may want to create a longer timeline of issues, regardless of their status, for easier reporting.
 
+If you would like to extend this buffer period to longer than 1 month, add the following configuration to your `dbt_project.yml` file:
+
+```yml
+# dbt_project.yml
+
+...
+config-version: 2
+
+vars:
+  jira_issue_history_buffer: integer_number_of_months # default is an interval of 1 month
+```
+
+### Disabling Models
 It's possible that your Jira connector does not sync every table that this package expects. If your syncs exclude certain tables, it is because you either don't use that functionality in Jira or actively excluded some tables from your syncs. To disable the corresponding functionality in the package, you must add the relevant variables. By default, all variables are assumed to be `true`. Add variables for only the tables you would like to disable:  
 
 ```yml
@@ -99,7 +116,7 @@ and running the package? If so, we highly encourage and welcome contributions to
 Please create issues or open PRs against `master`. Check out [this post](https://discourse.getdbt.com/t/contributing-to-a-dbt-package/657) on the best workflow for contributing to a package.
 
 ## Database Support
-This package has been tested on BigQuery, Snowflake and Redshift.
+This package has been tested on BigQuery, Snowflake, Redshift, and Postgres.
 
 ## Resources:
 - Provide [feedback](https://www.surveymonkey.com/r/DQ7K7WW) on our existing dbt packages or what you'd like to see next
diff --git a/dbt_project.yml b/dbt_project.yml
@@ -1,7 +1,7 @@
 name: 'jira'
-version: '0.3.1'
+version: '0.4.0'
 config-version: 2
-require-dbt-version: [">=0.18.0", "<0.20.0"]
+require-dbt-version: ">=0.20.0"
 
 
 vars:
@@ -23,11 +23,11 @@ vars:
     field: "{{ ref('stg_jira__field') }}"
     sprint: "{{ ref('stg_jira__sprint') }}"
     version: "{{ ref('stg_jira__version') }}"
+
     jira_include_comments: true  # this package aggregates issue comments so that you have a single view of all your comments in the jira__issue_enhanced table. This can cause limit errors if you have a large dataset. Disable to remove this functionality.
     jira_using_sprints: true # disable if you are not using sprints in Jira
+    jira_issue_history_buffer: 1 # in months
 
-  dbt_utils_dispatch_list: [spark_utils, fivetran_utils]
-  fivetran_utils_dispatch_list: [spark_utils]
   
 models:
   jira:
diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
@@ -1,5 +1,5 @@
 name: 'jira_integration_tests'
-version: '0.3.1'
+version: '0.4.0'
 config-version: 2
 profile: 'integration_tests'
 
diff --git a/integration_tests/requirements.txt b/integration_tests/requirements.txt
@@ -1,3 +1,3 @@
-dbt==0.19.1
-dbt-spark==0.19.1
-dbt-spark[PyHive]==0.19.1
+dbt~=0.20.0
+dbt-spark~=0.20.0
+dbt-spark[PyHive]~=0.20.0
diff --git a/models/intermediate/field_history/int_jira__issue_calendar_spine.sql b/models/intermediate/field_history/int_jira__issue_calendar_spine.sql
@@ -65,7 +65,7 @@ issue_spine as (
     from spine 
     join issue_dates on
         issue_dates.created_on <= spine.date_day
-        and {{ dbt_utils.dateadd('month', 1, 'issue_dates.open_until') }} >= spine.date_day
+        and {{ dbt_utils.dateadd('month', var('jira_issue_history_buffer', 1), 'issue_dates.open_until') }} >= spine.date_day
         -- if we cut off issues, we're going to have to do a full refresh to catch issues that have been un-resolved
 
     group by 1,2
diff --git a/models/jira.yml b/models/jira.yml
@@ -5,8 +5,10 @@ models:
     description: >
       Each record represents a snapshot of an issue's selected fields (as specified in
       the `issue_history_columns` variable + sprint and status by default) on a given day.
-      Each issue will have a record for every day in which it was open (plus a month afterward,
-      in case issues are reopened -- if they are reopened even later, you'll need to perform a full refresh). 
+      Each issue will have a record for every day in which it was open and/or being updated. 
+      For currently open issues, the latest date will be the current date, and for closed issues,
+      this will be the last time the issue was resolved OR updated in any way (plus a configurable buffer afterward,
+      set to a month by default).
       Note: this is an incremental model.
     columns:
       - name: date_day
diff --git a/models/jira__issue_enhanced.sql b/models/jira__issue_enhanced.sql
@@ -10,7 +10,7 @@ with issue as (
 {%- set issue_data_columns_clean = [] -%}
 
 {%- for k in issue_data_columns -%}
-    {{ issue_data_columns_clean.append(k.name)|default("", True)  }}
+    {{ issue_data_columns_clean.append(k.name|lower)|default("", True)  }}
 {%- endfor -%}
 
 daily_issue_field_history as (
diff --git a/packages.yml b/packages.yml
@@ -1,3 +1,3 @@
 packages:
   - package: fivetran/jira_source
-    version: [">=0.2.0","<0.3.0"]
+    version: [">=0.3.0","<0.4.0"]