From 1973ff098266ffc77ea17c1245c960e184fd4d46 Mon Sep 17 00:00:00 2001 From: Oleg Ovcharuk Date: Wed, 18 Sep 2024 19:43:45 +0300 Subject: [PATCH] Add YDBScanQueryOperator docs --- docs/apache-airflow-providers-ydb/index.rst | 2 +- .../operators/ydb_operator_howto_guide.rst | 26 ++++++++++++++++--- tests/system/providers/ydb/example_ydb.py | 11 +++++++- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/docs/apache-airflow-providers-ydb/index.rst b/docs/apache-airflow-providers-ydb/index.rst index 25edc87386c4..45cbd2b00b7a 100644 --- a/docs/apache-airflow-providers-ydb/index.rst +++ b/docs/apache-airflow-providers-ydb/index.rst @@ -34,7 +34,7 @@ :caption: Guides Connection types - YDBExecuteQueryOperator types + Operator types .. toctree:: diff --git a/docs/apache-airflow-providers-ydb/operators/ydb_operator_howto_guide.rst b/docs/apache-airflow-providers-ydb/operators/ydb_operator_howto_guide.rst index 3b7aaab44c2b..894be8101d5b 100644 --- a/docs/apache-airflow-providers-ydb/operators/ydb_operator_howto_guide.rst +++ b/docs/apache-airflow-providers-ydb/operators/ydb_operator_howto_guide.rst @@ -17,7 +17,7 @@ .. _howto/operators:ydb: -How-to Guide for YDB using YDBExecuteQueryOperator +How-to Guide for using YDB Operators ================================================== Introduction @@ -29,7 +29,7 @@ workflow. Airflow is essentially a graph (Directed Acyclic Graph) made up of tas A task defined or implemented by a operator is a unit of work in your data pipeline. The purpose of this guide is to define tasks involving interactions with a YDB database with -the :class:`~airflow.providers.ydb.operators.YDBExecuteQueryOperator`. +the :class:`~airflow.providers.ydb.operators.YDBExecuteQueryOperator` and :class:`~airflow.providers.ydb.operators.YDBScanQueryOperator`. Common database operations with YDBExecuteQueryOperator ------------------------------------------------------- @@ -162,6 +162,26 @@ by creating a sql file. ) +Executing Scan Queries with YDBScanQueryOperator +------------------------------------------------------- + +YDBScanQueryOperator executes YDB Scan Queries, which designed primarily for running analytical ad hoc queries. Parameters of the operators are: + +- ``sql`` - string with query; +- ``conn_id`` - YDB connection id. Default value is ``ydb_default``; +- ``params`` - parameters to be injected into query if it is Jinja template, more details about :doc:`params ` + +Example of using YDBScanQueryOperator: + +.. code-block:: python + + get_birth_date_scan = YDBScanQueryOperator( + task_id="get_birth_date_scan", + sql="sql/birth_date.sql", + params={"begin_date": "2020-01-01", "end_date": "2020-12-31"}, + ) + + The complete YDB Operator DAG ----------------------------- @@ -176,7 +196,7 @@ When we put everything together, our DAG should look like this: Conclusion ---------- -In this how-to guide we explored the Apache Airflow YDBExecuteQueryOperator to connect to YDB database. Let's quickly highlight the key takeaways. +In this how-to guide we explored the Apache Airflow YDBExecuteQueryOperator and YDBScanQueryOperator to connect to YDB database. Let's quickly highlight the key takeaways. It is best practice to create subdirectory called ``sql`` in your ``dags`` directory where you can store your sql files. This will make your code more elegant and more maintainable. And finally, we looked at the templated version of sql script and usage of ``params`` attribute. diff --git a/tests/system/providers/ydb/example_ydb.py b/tests/system/providers/ydb/example_ydb.py index 8d43b6199abb..2938f4c467e8 100644 --- a/tests/system/providers/ydb/example_ydb.py +++ b/tests/system/providers/ydb/example_ydb.py @@ -24,7 +24,7 @@ from airflow import DAG from airflow.decorators import task from airflow.providers.ydb.hooks.ydb import YDBHook -from airflow.providers.ydb.operators.ydb import YDBExecuteQueryOperator +from airflow.providers.ydb.operators.ydb import YDBExecuteQueryOperator, YDBScanQueryOperator # [START ydb_operator_howto_guide] @@ -101,12 +101,21 @@ def populate_pet_table_via_bulk_upsert(): ) # [END ydb_operator_howto_guide_get_birth_date] + # [START ydb_operator_howto_guide_get_birth_date_scan] + get_birth_date_scan = YDBScanQueryOperator( + task_id="get_birth_date_scan", + sql="sql/birth_date.sql", + params={"begin_date": "2020-01-01", "end_date": "2020-12-31"}, + ) + # [END ydb_operator_howto_guide_get_birth_date_scan] + ( create_pet_table >> populate_pet_table >> populate_pet_table_via_bulk_upsert() >> get_all_pets >> get_birth_date + >> get_birth_date_scan ) # [END ydb_operator_howto_guide]