Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions dags/data_utils/doc_helpscout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import pandas as pd
import requests
from airflow.hooks.base import BaseHook
from airflow.models import Variable
from requests.auth import HTTPBasicAuth

from .grist.grist_helper import osp_grist_api

connection_helpscout = BaseHook.get_connection("helpscout")
assert connection_helpscout.login is not None
basic = HTTPBasicAuth(connection_helpscout.login, "X")

grist_doc_id = Variable.get("helpscout_documentation_grist_doc_id")


def get_categories(collection_id):
response = requests.get(
f"https://docsapi.helpscout.net/v1/collections/{collection_id}/categories",
auth=basic,
)
return pd.DataFrame(response.json()["categories"]["items"])


def get_articles(category_id):
response = requests.get(
f"https://docsapi.helpscout.net/v1/categories/{category_id}/articles",
auth=basic,
)
result = pd.DataFrame(response.json()["articles"]["items"])
result["category_id"] = category_id
return result


def dump_helpscout_collection_to_grist(collection_id):
categories = get_categories(collection_id)

all_pages = pd.concat([get_articles(c_id) for c_id in categories["id"]])
final_table = pd.merge(
all_pages,
categories[["id", "name"]],
left_on="category_id",
right_on="id",
suffixes=("", "_category"),
)
final_table["published"] = final_table["status"] == "published"

api = osp_grist_api("grist_doc_id")

api.sync_table(
"Articles",
final_table.itertuples(),
[("helpscout_id", "id")],
[
("Category", "name_category"),
("updated", "updatedAt"),
("Published", "published"),
("Url", "publicUrl"),
("Name", "name"),
],
)
26 changes: 26 additions & 0 deletions dags/doc_helpscout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pendulum
from airflow import DAG
from airflow.operators.python import PythonOperator
from data_utils.alerting.alerting import task_failed
from data_utils.doc_helpscout import dump_helpscout_collection_to_grist

# id of the collection to export. See here:
# https://developer.helpscout.com/docs-api/articles/list/
helpscout_collection = "5d1c770a04286369ad8d1458"

with DAG(
dag_id="fecth_helpscout_doc",
default_args={"owner": "airflow"},
schedule="50 0 * * *",
start_date=pendulum.datetime(2024, 11, 15, tz="UTC"),
catchup=False,
) as dag:
dump_articles = PythonOperator(
task_id="fetch_and_dump_helpscout_articles",
python_callable=dump_helpscout_collection_to_grist,
op_args=[helpscout_collection],
dag=dag,
on_failure_callback=task_failed,
)

dump_articles