-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmedian.sql
84 lines (61 loc) · 1.43 KB
/
median.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/*
Median (odd set of numbers) = ((n+1)/2)th term
Median (even set of numbers) = ((n/2)th term + ((n/2)+1)th term)/2
The median is also known as the 50th percentile
*/
-- Using Window Function
-- This is more accurate one in fraction
SELECT
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY column_name) AS median
FROM
table_name;
-- One more way to calculate median is by using the following query:
SELECT
percentile_disc(0.5) WITHIN GROUP (
ORDER BY temperature)
FROM city_data;
-- Because the query used percentile_disc(), the result is a value that exists in the dataset.
-- Calculating Multiple Percentiles
SELECT
device_id,
percentile_cont(0.25) WITHIN GROUP(
ORDER BY
humidity) AS percentile_25,
percentile_cont(0.50) WITHIN GROUP(
ORDER BY
humidity) AS percentile_50,
percentile_cont(0.75) WITHIN GROUP(
ORDER BY
humidity) AS percentile_75,
percentile_cont(0.95) WITHIN GROUP(
ORDER BY
humidity) AS percentile_95
FROM
conditions
GROUP BY
device_id;
-- Calculating a Series of Percentiles
SELECT
city,
percentile,
percentile_cont(p) WITHIN GROUP (
ORDER BY
temperature)
FROM
city_data,
generate_series(0.01, 1, 0.01) AS percentile
GROUP BY
city, percentile;
-- Without Window Function
WITH get_median AS (
SELECT
y, row_number() OVER(ORDER BY y ASC) AS rn_asc,
COUNT(*) OVER() AS ct
FROM dataset
)
SELECT
AVG(y) AS median
FROM
get_median
WHERE
rn_asc BETWEEN ct/2.0 AND ct / 2.0 + 1;