Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions .sqlfluff
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
[sqlfluff]

# Supported dialects https://docs.sqlfluff.com/en/stable/perma/dialects.html
# Or run 'sqlfluff dialects'
dialect = snowflake

# One of [raw|jinja|python|placeholder]
templater = jinja

# Comma separated list of rules to exclude, or None
# See https://docs.sqlfluff.com/en/stable/perma/rule_disabling.html
# AM04 (ambiguous.column_count) and ST06 (structure.column_order) are
# two of the more controversial rules included to illustrate usage.
exclude_rules = ambiguous.column_count, structure.column_order

# The standard max_line_length is 80 in line with the convention of
# other tools and several style guides. Many projects however prefer
# something a little longer.
# Set to zero or negative to disable checks.
max_line_length = 120

# CPU processes to use while linting.
# The default is "single threaded" to allow easy debugging, but this
# is often undesirable at scale.
# If positive, just implies number of processes.
# If negative or zero, implies number_of_cpus - specified_number.
# e.g. -1 means use all processors but one. 0 means all cpus.
processes = -1

[sqlfluff:indentation]
# While implicit indents are not enabled by default. Many of the
# SQLFluff maintainers do use them in their projects.
allow_implicit_indents = True

[sqlfluff:rules:aliasing.length]
min_alias_length = 2

# The default configuration for capitalisation rules is "consistent"
# which will auto-detect the setting from the rest of the file. This
# is less desirable in a new project and you may find this (slightly
# more strict) setting more useful.
# Typically we find users rely on syntax highlighting rather than
# capitalisation to distinguish between keywords and identifiers.
# Clearly, if your organisation has already settled on uppercase
# formatting for any of these syntax elements then set them to "upper".
# See https://stackoverflow.com/questions/608196/why-should-i-capitalize-my-sql-keywords-is-there-a-good-reason
[sqlfluff:rules:capitalisation.keywords]
capitalisation_policy = upper
[sqlfluff:rules:capitalisation.identifiers]
extended_capitalisation_policy = lower
[sqlfluff:rules:capitalisation.functions]
extended_capitalisation_policy = upper
[sqlfluff:rules:capitalisation.literals]
capitalisation_policy = upper
[sqlfluff:rules:capitalisation.types]
extended_capitalisation_policy = upper

# The default configuration for the not equal convention rule is "consistent"
# which will auto-detect the setting from the rest of the file. This
# is less desirable in a new project and you may find this (slightly
# more strict) setting more useful.
[sqlfluff:rules:convention.not_equal]
# Default to preferring the "c_style" (i.e. `!=`)
preferred_not_equal_style = c_style
18 changes: 18 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
appdirs==1.4.4
chardet==5.2.0
click==8.1.7
colorama==0.4.6
diff_cover==9.2.0
iniconfig==2.0.0
Jinja2==3.1.4
MarkupSafe==3.0.2
packaging==24.1
pathspec==0.12.1
pluggy==1.5.0
Pygments==2.18.0
pytest==8.3.3
PyYAML==6.0.2
regex==2024.11.6
sqlfluff==3.2.5
tblib==3.0.0
tqdm==4.67.0
96 changes: 96 additions & 0 deletions sql_scripts/create_dimensions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
-- 0.dev 데이터베이스를 사용
USE DATABASE dev;

-- 0.1) analytics 스키마로 변경
USE SCHEMA dev.analytics;

-- 1. dim_artist 테이블 생성
CREATE OR REPLACE TABLE dim_artist (
artist_id VARCHAR(36) PRIMARY KEY DEFAULT UUID_STRING(),
artist_name VARCHAR(256)
);

-- 1.1) artist_name 데이터 삽입
INSERT INTO dim_artist(artist_name)
SELECT DISTINCT SPLIT_PART(artist_names, ',', 1) AS artist_name
FROM raw_data.music_daily;



-- 2. dim_weather_code 테이블 생성
CREATE OR REPLACE TABLE dim_weather_code AS
SELECT DISTINCT weather_code AS weather_code_id, wmo AS description
FROM raw_data.weather_daily
ORDER BY weather_code_id;

-- 2.1) PK 추가: weather_code_id
ALTER TABLE dim_weather_code ADD CONSTRAINT dim_weather_code_pk PRIMARY KEY (weather_code_id);



-- 3. dim_location 테이블 생성
CREATE TABLE dim_location (
location_id VARCHAR(36) PRIMARY KEY DEFAULT UUID_STRING(),
country_code VARCHAR(2),
country_name VARCHAR(100),
city VARCHAR(100),
longitude FLOAT,
latitude FLOAT
);

-- 3.1) 데이터 삽입
INSERT INTO analytics.dim_location (country_code, country_name, city, longitude, latitude)
VALUES
('kr', 'korea', 'seoul', 126.978, 37.5665),
('jp', 'japan', 'tokyo', 139.8395, 35.6528),
('us', 'usa', 'washington', -77.0363, 38.8951),
('gb', 'uk', 'london', -0.1278, 51.5074),
('au', 'australia', 'canberra', 149.1281, -35.2835),
('br', 'brazil', 'brasilia', -47.8828, -15.7939);



-- 4. dim_date 테이블 생성
CREATE OR REPLACE TABLE dim_date AS
SELECT
"date" AS date_id,
EXTRACT(YEAR FROM TO_DATE("date")) AS year,
EXTRACT(MONTH FROM TO_DATE("date")) AS month,
EXTRACT(DAY FROM TO_DATE("date")) AS day,
EXTRACT(DOW FROM TO_DATE("date")) AS day_of_week_num,
CASE
WHEN EXTRACT(DOW FROM TO_DATE("date")) = 0 THEN 'Sun'
WHEN EXTRACT(DOW FROM TO_DATE("date")) = 1 THEN 'Mon'
WHEN EXTRACT(DOW FROM TO_DATE("date")) = 2 THEN 'Tue'
WHEN EXTRACT(DOW FROM TO_DATE("date")) = 3 THEN 'Wed'
WHEN EXTRACT(DOW FROM TO_DATE("date")) = 4 THEN 'Thu'
WHEN EXTRACT(DOW FROM TO_DATE("date")) = 5 THEN 'Fri'
WHEN EXTRACT(DOW FROM TO_DATE("date")) = 6 THEN 'Sat'
END AS day_of_week_name
FROM raw_data.weather_daily
GROUP BY date_id
ORDER BY date_id;

-- 4.1) PK 추가: date_id
ALTER TABLE dim_date ADD CONSTRAINT dim_date_pk PRIMARY KEY (date_id);



-- 5. dim_track 테이블 생성
CREATE OR REPLACE TABLE analytics.dim_track AS (
SELECT
DISTINCT track.track_id,
music_daily.track_name,
track.duration_ms,
track.tempo,
track.danceability,
track.energy,
track.valence
FROM raw_data.track AS track
LEFT JOIN raw_data.music_daily AS music_daily
ON track.track_id = music_daily.track_id
);


-- 5.1) PK 추가: track_id
ALTER TABLE analytics.dim_track ADD CONSTRAINT pk_dim_track_temp PRIMARY KEY (track_id);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EOF(End Of File) 챙겨주세요.
hint: 저 빨간 동그라미 안에 줄이 그어져 있는 것이 무슨 의미인지 확인해보세요

62 changes: 62 additions & 0 deletions sql_scripts/create_facts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
-- 0. 데이터베이스 설정
USE DATABASE dev;

-- 1. fact table 생성
CREATE OR REPLACE TABLE analytics.fact_music_daily AS
SELECT
music_daily.track_id,
music_daily."date" AS date_id,
music_daily.rank,
music_daily.days_on_chart,
track.duration_ms,
dim_artist.artist_id,
dim_location.location_id
FROM raw_data.music_daily AS music_daily
INNER JOIN raw_data.track AS track
ON music_daily.track_id = track.track_id
INNER JOIN analytics.dim_artist AS dim_artist
ON SPLIT_PART(music_daily.artist_names, ',', 1) = dim_artist.artist_name
INNER JOIN analytics.dim_location AS dim_location
ON music_daily.country = dim_location.country_name;

-- 1.1) FK 추가: artist_id
ALTER TABLE analytics.fact_music_daily
ADD CONSTRAINT fk_artist FOREIGN KEY (artist_id)
REFERENCES analytics.dim_artist (artist_id);

-- 1.2) FK 추가: location_id
ALTER TABLE analytics.fact_music_daily
ADD CONSTRAINT fk_location FOREIGN KEY (location_id)
REFERENCES analytics.dim_location (location_id);

-- 2. analytics.fact_weather_daily: 일별 날씨에 대한 테이블(analytics)
CREATE OR REPLACE TABLE analytics.fact_weather_daily AS (
SELECT
dim_location.location_id,
weather_daily."date" AS date_id,
weather_daily.weather_code AS weather_code_id,
ROUND(weather_daily.temperature_2m_max, 4) AS temperature_max,
ROUND(weather_daily.temperature_2m_min, 4) AS temperature_min,
ROUND(weather_daily.precipitation_sum, 4) AS precipitation_sum,
ROUND(weather_daily.wind_speed_10m_max, 4) AS wind_speed_max
FROM raw_data.weather_daily AS weather_daily
LEFT JOIN analytics.dim_location AS dim_location
ON weather_daily.country = dim_location.country_name
);

-- 2.1) FK 추가: location_id
ALTER TABLE analytics.fact_weather_daily
ADD CONSTRAINT fk_weather_daily_location_id FOREIGN KEY (location_id)
REFERENCES analytics.dim_location (location_id);

-- 2.2) FK 추가: date_id
ALTER TABLE analytics.fact_weather_daily
ADD CONSTRAINT fk_weather_daily_date_id FOREIGN KEY (date_id)
REFERENCES analytics.dim_date (date_id);

-- 2.3) FK 추가: weather_code_id
ALTER TABLE analytics.fact_weather_daily
ADD CONSTRAINT fk_weather_daily_weather_code_id FOREIGN KEY (weather_code_id)
REFERENCES analytics.dim_weather_code (
weather_code_id
);
8 changes: 8 additions & 0 deletions sql_scripts/create_schemas.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- 0. dev 데이터베이스를 사용
USE DATABASE dev;

-- 1. 4개 스키마 생성
CREATE SCHEMA IF NOT EXISTS raw_data;
CREATE SCHEMA IF NOT EXISTS analytics;
CREATE SCHEMA IF NOT EXISTS reporting;
CREATE SCHEMA IF NOT EXISTS test;
60 changes: 60 additions & 0 deletions sql_scripts/load_raw_data.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
-- 0. 데이터베이스 설정
USE DATABASE dev;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

데이터베이스 이름만 보고도 무엇을 담고 있는 것인지 알 수 있게 작성해주세요.


/*
1. raw_data 스키마 하위 테이블 생성
*/
CREATE OR REPLACE TABLE raw_data.weather_daily (
"date" DATE PRIMARY KEY,
country VARCHAR(32),
weather_code FLOAT,
temperature_2m_max FLOAT,
temperature_2m_min FLOAT,
precipitation_sum FLOAT,
wind_speed_10m_max FLOAT,
wmo VARCHAR(32)
);

CREATE OR REPLACE TABLE raw_data.track (
track_id VARCHAR(128) PRIMARY KEY,
duration_ms INTEGER,
tempo FLOAT,
danceability FLOAT,
energy FLOAT,
valence FLOAT
);

CREATE OR REPLACE TABLE raw_data.music_daily (
rank INTEGER,
track_id VARCHAR(32) PRIMARY KEY,
artist_names VARCHAR(256),
track_name VARCHAR(128),
source VARCHAR(128),
peak_rank INTEGER,
previous_rank INTEGER,
days_on_chart INTEGER,
"streams" INTEGER,
country VARCHAR(32),
"date" DATE
);

/*
2. raw_data 스키마 하위 테이블에 데이터 주입(S3 Copy)
*/
COPY INTO raw_data.weather_daily
FROM 's3://<AWS_S3_BUCKET>/weather/daily/'
CREDENTIALS = (AWS_KEY_ID = '<AWS_KEY_ID>' AWS_SECRET_KEY = '<AWS_SECRET_KEY>')
FILE_FORMAT = (TYPE = 'CSV' SKIP_HEADER = 1 FIELD_OPTIONALLY_ENCLOSED_BY = '"');

COPY INTO raw_data.track
FROM 's3://<AWS_S3_BUCKET>/music/track_data/'
CREDENTIALS = (
AWS_KEY_ID = '<AWS_KEY_ID>'
AWS_SECRET_KEY = '<AWS_SECRET_KEY>'
)
FILE_FORMAT = (TYPE = 'CSV' SKIP_HEADER = 1 FIELD_OPTIONALLY_ENCLOSED_BY = '"');

COPY INTO raw_data.music_daily
FROM 's3://<AWS_S3_BUCKET>/music/daily/'
CREDENTIALS = (AWS_KEY_ID = '<AWS_KEY_ID>' AWS_SECRET_KEY = '<AWS_SECRET_KEY>')
FILE_FORMAT = (TYPE = 'CSV' SKIP_HEADER = 1 FIELD_OPTIONALLY_ENCLOSED_BY = '"');