Skip to content

Commit f14458e

Browse files
authoredApr 13, 2018
Include scheduler speed improvements (#18)
* Don't redo nginx conf modification on restart * Use new airflow customized for scheduler speedup * Bump up concurrency limits * letting dag import run longer just in case. and limit task insertion time
1 parent 68e804b commit f14458e

File tree

5 files changed

+97
-88
lines changed

5 files changed

+97
-88
lines changed
 

‎config/airflow.cfg

+11-4
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,10 @@ dags_are_paused_at_creation = True
6969

7070
# When not using pools, tasks are run in the "default pool",
7171
# whose size is guided by this config element
72-
non_pooled_task_slot_count = 128
72+
non_pooled_task_slot_count = 1000
7373

7474
# The maximum number of active DAG runs per DAG
75-
max_active_runs_per_dag = 16
75+
max_active_runs_per_dag = 1000
7676

7777
# Whether to load the examples that ship with Airflow. It's good to
7878
# get started, but you probably want to set this to False in a production
@@ -89,7 +89,7 @@ fernet_key = $FERNET_KEY
8989
donot_pickle = False
9090

9191
# How long before timing out a python file import while filling the DagBag
92-
dagbag_import_timeout = 30
92+
dagbag_import_timeout = 300
9393

9494
# The class to use for running task instances in a subprocess
9595
task_runner = BashTaskRunner
@@ -330,7 +330,7 @@ catchup_by_default = False
330330
# This changes the batch size of queries in the scheduling main loop.
331331
# This depends on query length limits and how long you are willing to hold locks.
332332
# 0 for no limit
333-
max_tis_per_query = 500
333+
max_tis_per_query = 5000
334334

335335
# Statsd (https://github.com/etsy/statsd) integration settings
336336
statsd_on = False
@@ -345,6 +345,13 @@ max_threads = 8
345345

346346
authenticate = False
347347

348+
# Time limit in seconds for inserting tasks instances into the DB.
349+
# Verify integrity is called to ensure that all tasks in the dag are saved in the db.
350+
# If set, this limits the amount of time this function is allowed to insert in seconds
351+
# Because verify integrity is run on a loop by the scheduler the function becomes a
352+
# "best effort" operation -1 for no limit
353+
verify_integrity_insert_time_limit = 60
354+
348355
[ldap]
349356
# set this to ldaps://<your.ldap.server>:<port>
350357
uri =

‎deploy/docker-compose-CeleryExecutor.yml

+80-78
Original file line numberDiff line numberDiff line change
@@ -219,85 +219,87 @@ services:
219219
ssl on;\" ||
220220
SSL_BLOCK=\"ssl off;\" &&
221221
echo $$SSL_BLOCK &&
222-
sed -i -e \"s/include \\/etc\\/nginx\\/conf\\.d\\/\\*\.conf;//\"
223-
-e \"\\$$s/}/
224-
error_log \\/var\\/log\\/error.log;\\n
225-
gzip on;\\n
226-
gzip_vary on;\\n
227-
gzip_min_length 10240;\\n
228-
gzip_proxied expired no-cache no-store private auth;\\n
229-
gzip_types text\\/plain text\\/css text\\/xml text\\/javascript application\\/x-javascript application\\/xml;\\n
230-
gzip_disable \\\"MSIE [1-6]\.\\\";\\n
231-
server {\\n
232-
listen 80;\\n
233-
listen 443;\\n
234-
auth_basic $${AUTH_BASIC_STRING};\\n
235-
auth_basic_user_file \\/etc\\/nginx\\/\.htpasswd;\\n
236-
$${SSL_BLOCK}\\n
237-
proxy_set_header Host \\$$host;\\n
238-
proxy_set_header X-Real-IP \\$$remote_addr;\\n
239-
proxy_set_header X-Forwarded-For \\$$proxy_add_x_forwarded_for;\\n
240-
if (\\$$http_referer ~* \\\"\\/airflow\\/*.*\\\") {\\n
241-
set \\$$is_airflow \\\"A\\\";\\n
222+
if ! grep airflow /etc/nginx/nginx.conf; then
223+
sed -i -e \"s/include \\/etc\\/nginx\\/conf\\.d\\/\\*\.conf;//\"
224+
-e \"\\$$s/}/
225+
error_log \\/var\\/log\\/error.log;\\n
226+
gzip on;\\n
227+
gzip_vary on;\\n
228+
gzip_min_length 10240;\\n
229+
gzip_proxied expired no-cache no-store private auth;\\n
230+
gzip_types text\\/plain text\\/css text\\/xml text\\/javascript application\\/x-javascript application\\/xml;\\n
231+
gzip_disable \\\"MSIE [1-6]\.\\\";\\n
232+
server {\\n
233+
listen 80;\\n
234+
listen 443;\\n
235+
auth_basic $${AUTH_BASIC_STRING};\\n
236+
auth_basic_user_file \\/etc\\/nginx\\/\.htpasswd;\\n
237+
$${SSL_BLOCK}\\n
238+
proxy_set_header Host \\$$host;\\n
239+
proxy_set_header X-Real-IP \\$$remote_addr;\\n
240+
proxy_set_header X-Forwarded-For \\$$proxy_add_x_forwarded_for;\\n
241+
if (\\$$http_referer ~* \\\"\\/airflow\\/*.*\\\") {\\n
242+
set \\$$is_airflow \\\"A\\\";\\n
243+
}\\n
244+
if (\\$$request_uri !~* ^\\/airflow\\/.*) {\\n
245+
set \\$$is_airflow \\\"\\$${is_airflow}B\\\";\\n
246+
}\\n
247+
if (\\$$is_airflow ~* AB) { \\n
248+
rewrite ^\\/*(.*) \\/airflow\\/\\$$1 permanent;\\n
249+
}\\n
250+
if (\\$$http_referer ~* \\\".*\\/flower\\/*.*\\\") {\\n
251+
set \\$$is_flower \\\"A\\\";\\n
252+
}\\n
253+
if (\\$$request_uri !~* ^\\/flower\\/.*) {\\n
254+
set \\$$is_flower \\\"\\$${is_flower}B\\\";\\n
255+
}\\n
256+
if (\\$$is_flower = AB) { \\n
257+
rewrite ^\\/*(.*) \\/flower\\/\\$$1 permanent;\\n
258+
}\\n
259+
if (\\$$http_referer ~* \\\".*\\/visualizer\\/*.*\\\") {\\n
260+
set \\$$is_visualizer \\\"A\\\";\\n
261+
}\\n
262+
if (\\$$request_uri !~* ^\\/visualizer\\/.*) {\\n
263+
set \\$$is_visualizer \\\"\\$${is_visualizer}B\\\";\\n
264+
}\\n
265+
if (\\$$is_visualizer = AB) { \\n
266+
rewrite ^\\/*(.*) \\/visualizer\\/\\$$1 permanent;\\n
267+
}\\n
268+
if (\\$$http_referer ~* \\\".*\\/rabbitmq\\/*.*\\\") {\\n
269+
set \\$$is_rabbitmq \\\"A\\\";\\n
270+
}\\n
271+
if (\\$$request_uri !~* ^\\/rabbitmq\\/.*) {\\n
272+
set \\$$is_rabbitmq \\\"\\$${is_rabbitmq}B\\\";\\n
273+
}\\n
274+
if (\\$$is_rabbitmq = AB) { \\n
275+
rewrite ^\\/*(.*) \\/rabbitmq\\/\\$$1 permanent;\\n
276+
}\\n
277+
location \\/ {\\n
278+
rewrite . \\/airflow\\/admin\\/ permanent;\\n
279+
}\\n
280+
location ^~ \\/airflow\\/ {\\n
281+
proxy_redirect http:\\/\\/\\$$host \\$$scheme:\\/\\/\\$$host;\\n
282+
proxy_pass http:\\/\\/webserver:8080\\/;\\n
283+
}\\n
284+
location ^~ \\/flower\\/ {\\n
285+
sub_filter_last_modified on;\\n
286+
sub_filter_once off;\\n
287+
sub_filter_types application\\/javascript;\\n
288+
sub_filter \\\"url_prefix() +\\\" \\\"'\\/flower' +\\\";\\n
289+
proxy_redirect http:\\/\\/\\$$host \\$$scheme:\\/\\/\\$$host;\\n
290+
proxy_pass http:\\/\\/flower:5555\\/;\\n
291+
}\\n
292+
location ^~ \\/visualizer\\/ {\\n
293+
proxy_redirect http:\\/\\/\\$$host \\$$scheme:\\/\\/\\$$host;\\n
294+
proxy_pass http:\\/\\/visualizer:8080\\/;\\n
295+
}\\n
296+
location ^~ \\/rabbitmq\\/ {\\n
297+
proxy_pass http:\\/\\/rabbitmq:15672\\/;\\n
298+
proxy_redirect off;\\n
299+
}\\n
242300
}\\n
243-
if (\\$$request_uri !~* ^\\/airflow\\/.*) {\\n
244-
set \\$$is_airflow \\\"\\$${is_airflow}B\\\";\\n
245-
}\\n
246-
if (\\$$is_airflow ~* AB) { \\n
247-
rewrite ^\\/*(.*) \\/airflow\\/\\$$1 permanent;\\n
248-
}\\n
249-
if (\\$$http_referer ~* \\\".*\\/flower\\/*.*\\\") {\\n
250-
set \\$$is_flower \\\"A\\\";\\n
251-
}\\n
252-
if (\\$$request_uri !~* ^\\/flower\\/.*) {\\n
253-
set \\$$is_flower \\\"\\$${is_flower}B\\\";\\n
254-
}\\n
255-
if (\\$$is_flower = AB) { \\n
256-
rewrite ^\\/*(.*) \\/flower\\/\\$$1 permanent;\\n
257-
}\\n
258-
if (\\$$http_referer ~* \\\".*\\/visualizer\\/*.*\\\") {\\n
259-
set \\$$is_visualizer \\\"A\\\";\\n
260-
}\\n
261-
if (\\$$request_uri !~* ^\\/visualizer\\/.*) {\\n
262-
set \\$$is_visualizer \\\"\\$${is_visualizer}B\\\";\\n
263-
}\\n
264-
if (\\$$is_visualizer = AB) { \\n
265-
rewrite ^\\/*(.*) \\/visualizer\\/\\$$1 permanent;\\n
266-
}\\n
267-
if (\\$$http_referer ~* \\\".*\\/rabbitmq\\/*.*\\\") {\\n
268-
set \\$$is_rabbitmq \\\"A\\\";\\n
269-
}\\n
270-
if (\\$$request_uri !~* ^\\/rabbitmq\\/.*) {\\n
271-
set \\$$is_rabbitmq \\\"\\$${is_rabbitmq}B\\\";\\n
272-
}\\n
273-
if (\\$$is_rabbitmq = AB) { \\n
274-
rewrite ^\\/*(.*) \\/rabbitmq\\/\\$$1 permanent;\\n
275-
}\\n
276-
location \\/ {\\n
277-
rewrite . \\/airflow\\/admin\\/ permanent;\\n
278-
}\\n
279-
location ^~ \\/airflow\\/ {\\n
280-
proxy_redirect http:\\/\\/\\$$host \\$$scheme:\\/\\/\\$$host;\\n
281-
proxy_pass http:\\/\\/webserver:8080\\/;\\n
282-
}\\n
283-
location ^~ \\/flower\\/ {\\n
284-
sub_filter_last_modified on;\\n
285-
sub_filter_once off;\\n
286-
sub_filter_types application\\/javascript;\\n
287-
sub_filter \\\"url_prefix() +\\\" \\\"'\\/flower' +\\\";\\n
288-
proxy_redirect http:\\/\\/\\$$host \\$$scheme:\\/\\/\\$$host;\\n
289-
proxy_pass http:\\/\\/flower:5555\\/;\\n
290-
}\\n
291-
location ^~ \\/visualizer\\/ {\\n
292-
proxy_redirect http:\\/\\/\\$$host \\$$scheme:\\/\\/\\$$host;\\n
293-
proxy_pass http:\\/\\/visualizer:8080\\/;\\n
294-
}\\n
295-
location ^~ \\/rabbitmq\\/ {\\n
296-
proxy_pass http:\\/\\/rabbitmq:15672\\/;\\n
297-
proxy_redirect off;\\n
298-
}\\n
299-
}\\n
300-
}/\" /etc/nginx/nginx.conf &&
301+
}/\" /etc/nginx/nginx.conf;
302+
fi &&
301303
unset BASIC_AUTH_PASSWORD &&
302304
unset BASIC_AUTH_USERNAME &&
303305
unset SSL_CERTIFICATE &&

‎docker/Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# VERSION 0.0.5-customAF
1+
# VERSION 0.0.6-customAF
22
# AUTHOR: Will Wong
33
# DESCRIPTION: Docker airflow with ECR registry and DooD (Docker outside of Dkr)
44
# BUILD: docker build --rm -t wongwill86/air-tasks .
55
# SOURCE: https://github.com/wongwill86/air-tasks
66

77
ARG BASE_DIST=alpine
8-
ARG BASE_TAG=0.0.5-customAF
8+
ARG BASE_TAG=0.0.6-customAF
99
ARG BASE_TEST=
1010

1111
FROM wongwill86/air-tasks:base-${BASE_DIST}-${BASE_TAG}${BASE_TEST}

‎docker/base/Dockerfile.base-alpine

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# VERSION 0.0.5-customAF
1+
# VERSION 0.0.6-customAF
22
# AUTHOR: Will Wong
33
# DESCRIPTION: Alpine base image with dockerized airflow and ECR registry and DooD (Docker outside of Docker)
44
# BUILD: docker build --rm -t wongwill86/air-tasks:base-alpine -f /docker/base/Dockerfile.base-slim .
@@ -34,7 +34,7 @@ RUN set -ex \
3434
&& mv docker/* /usr/local/bin/ \
3535
&& rm -rf docker docker-${DOCKER_VERSION}.tgz docker \
3636
# Temporarily use this performance branch of airflow instead of pip install apache-airflow[cyrpto,celery,postgres]==${AIRFLOW_VERSION}
37-
&& git clone https://github.com/wongwill86/incubator-airflow.git --depth 1 -b v1-9-stable-dag_import_speed \
37+
&& git clone https://github.com/wongwill86/incubator-airflow.git --depth 1 -b v1-9-stable-3-scheduler_speed \
3838
# Deleting this symlink not handled correctly by shutil.copy
3939
&& rm -rf incubator-airflow/airflow/www/static/docs \
4040
&& pip install incubator-airflow/[crypto,celery,postgres] \

‎docker/base/Dockerfile.base-slim

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# VERSION 0.0.5-customAF
1+
# VERSION 0.0.6-customAF
22
# AUTHOR: Will Wong
33
# DESCRIPTION: Slim base image with dockerized airflow and ECR registry and DooD (Docker outside of Docker)
44
# BUILD: docker build --rm -t wongwill86/air-tasks:base-slim -f /docker/base/Dockerfile.base-slim .
@@ -33,7 +33,7 @@ RUN apt-get update \
3333
&& apt-get update \
3434
&& apt-get install docker-ce -y \
3535
# Temporarily use this performance branch of airflow instead of pip install apache-airflow[cyrpto,celery,postgres]==${AIRFLOW_VERSION}
36-
&& git clone https://github.com/wongwill86/incubator-airflow.git --depth 1 -b v1-9-stable-dag_import_speed \
36+
&& git clone https://github.com/wongwill86/incubator-airflow.git --depth 1 -b v1-9-stable-3-scheduler_speed \
3737
&& pip install incubator-airflow/[crypto,celery,postgres] \
3838
&& rm -rf incubator-airflow \
3939
&& pip install docker-compose docker \

0 commit comments

Comments
 (0)
Please sign in to comment.