diff --git a/dags/data_utils/doc_helpscout.py b/dags/data_utils/doc_helpscout.py new file mode 100644 index 0000000..c1a4458 --- /dev/null +++ b/dags/data_utils/doc_helpscout.py @@ -0,0 +1,60 @@ +import pandas as pd +import requests +from airflow.hooks.base import BaseHook +from airflow.models import Variable +from requests.auth import HTTPBasicAuth + +from .grist.grist_helper import osp_grist_api + +connection_helpscout = BaseHook.get_connection("helpscout") +assert connection_helpscout.login is not None +basic = HTTPBasicAuth(connection_helpscout.login, "X") + +grist_doc_id = Variable.get("helpscout_documentation_grist_doc_id") + + +def get_categories(collection_id): + response = requests.get( + f"https://docsapi.helpscout.net/v1/collections/{collection_id}/categories", + auth=basic, + ) + return pd.DataFrame(response.json()["categories"]["items"]) + + +def get_articles(category_id): + response = requests.get( + f"https://docsapi.helpscout.net/v1/categories/{category_id}/articles", + auth=basic, + ) + result = pd.DataFrame(response.json()["articles"]["items"]) + result["category_id"] = category_id + return result + + +def dump_helpscout_collection_to_grist(collection_id): + categories = get_categories(collection_id) + + all_pages = pd.concat([get_articles(c_id) for c_id in categories["id"]]) + final_table = pd.merge( + all_pages, + categories[["id", "name"]], + left_on="category_id", + right_on="id", + suffixes=("", "_category"), + ) + final_table["published"] = final_table["status"] == "published" + + api = osp_grist_api("grist_doc_id") + + api.sync_table( + "Articles", + final_table.itertuples(), + [("helpscout_id", "id")], + [ + ("Category", "name_category"), + ("updated", "updatedAt"), + ("Published", "published"), + ("Url", "publicUrl"), + ("Name", "name"), + ], + ) diff --git a/dags/doc_helpscout.py b/dags/doc_helpscout.py new file mode 100644 index 0000000..b2342bd --- /dev/null +++ b/dags/doc_helpscout.py @@ -0,0 +1,26 @@ +import pendulum +from airflow import DAG +from airflow.operators.python import PythonOperator +from data_utils.alerting.alerting import task_failed +from data_utils.doc_helpscout import dump_helpscout_collection_to_grist + +# id of the collection to export. See here: +# https://developer.helpscout.com/docs-api/articles/list/ +helpscout_collection = "5d1c770a04286369ad8d1458" + +with DAG( + dag_id="fecth_helpscout_doc", + default_args={"owner": "airflow"}, + schedule="50 0 * * *", + start_date=pendulum.datetime(2024, 11, 15, tz="UTC"), + catchup=False, +) as dag: + dump_articles = PythonOperator( + task_id="fetch_and_dump_helpscout_articles", + python_callable=dump_helpscout_collection_to_grist, + op_args=[helpscout_collection], + dag=dag, + on_failure_callback=task_failed, + ) + + dump_articles