with cte as (select Price, substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', 1) as Gas_Station,
substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', -1) as Gas_Station_Address,
Area,
substring_index(replace(replace(replace(Thanks, "('", ' '), "'", ''), ")", ''), ', ', 1) as Thanks_By,
substring_index(replace(replace(replace(Thanks, "('", ' '), "'", ''), ")", ''), ', ', -1) as thanks_last_updated,
last_updated_price
from Regular_Gas),
cte1 as (
select Price, substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', 1) as Gas_Station,
substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', -1) as Gas_Station_Address,
Area,
substring_index(replace(replace(replace(Thanks, "('", ' '), "'", ''), ")", ''), ', ', 1) as Thanks_By,
substring_index(replace(replace(replace(Thanks, "('", ' '), "'", ''), ")", ''), ', ', -1) as thanks_last_updated,
last_updated_price
from midgrade_Gas),
cte2 as (
select Price, substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', 1) as Gas_Station,
substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', -1) as Gas_Station_Address,
Area,
substring_index(replace(replace(replace(Thanks, "('", ' '), "'", ''), ")", ''), ', ', 1) as Thanks_By,
substring_index(replace(replace(replace(Thanks, "('", ' '), "'", ''), ")", ''), ', ', -1) as thanks_last_updated,
last_updated_price
from premium_Gas),
cte3 as (
select Price, substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', 1) as Gas_Station,
substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', -1) as Gas_Station_Address,
Area,
substring_index(replace(replace(replace(Thanks, "('", ' '), "'", ''), ")", ''), ', ', 1) as Thanks_By,
substring_index(replace(replace(replace(Thanks, "('", ' '), "'", ''), ")", ''), ', ', -1) as thanks_last_updated,
last_updated_price
from diesel_fuel_Gas)
select distinct cte.Price as regular_price, cte1.price as midgrade_price, cte2.price as premium_price, cte3.price as diesel_fuel_price,
cte.gas_station, cte.gas_station_address, cte.Area, cte.last_updated_price
from cte
join cte1 on cte.gas_station = cte1.gas_station and cte.gas_station_address = cte1.gas_station_address
join cte2 on cte.gas_station = cte2.gas_station and cte.gas_station_address = cte2.gas_station_address
join cte3 on cte.gas_station = cte3.gas_station and cte.gas_station_address = cte3.gas_station_address
order by cte.last_updated_price desc
limit 10
;
-> It looks nicer now, after cleaning the CSV files
select min(price), max(price) from Regular_Gas;
-> Highest regular gas price is $8.55, lowest is $3.69.
select min(price), max(price) from Midgrade_Gas;
-> Highest midgrade gas price is $7.23, lowest is $3.75.
select min(price), max(price) from Premium_Gas;
-> Highest premium gas price is $7.39, lowest is $3.85.
select min(price), max(price) from Diesel_Fuel_Gas;
-> Highest diesel fuel gas price is $8.55, lowest is $3.79.
min(price), max(price)
from Regular_Gas
group by Gas_Station
limit 3;
min(price), max(price)
from Midgrade_gas
group by Gas_Station
limit 3;
select distinct substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', 1) as Gas_Station,
min(price), max(price)
from Premium_gas
group by Gas_Station
limit 3;
select distinct substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', 1) as Gas_Station,
min(price), max(price)
from Diesel_Fuel_gas
group by Gas_Station;
with cte as (select date_format(last_updated_price, '%Y/%m/%d %H') as formatted_date, round(avg(price), 2) as average_price,
lead(round(avg(price), 2)) over(order by date_format(last_updated_price, '%Y/%m/%d %H')) as next_value,
((lead(round(avg(price), 2)) over(order by date_format(last_updated_price, '%Y/%m/%d %H')) - round(avg(price), 2) ) * 100)/round(avg(price), 2)
as price_difference
from regular_gas
group by formatted_date
order by formatted_date)
select formatted_date, average_price, round(lag(price_difference) over(order by formatted_date), 2) as price_difference
from cte
order by formatted_date
;
-> The price went up and down quite frequently, which see its peak on Jun 9 at 3:00 PM (it went down by 6.5%)
with cte as (select date_format(last_updated_price, '%Y/%m/%d %H') as formatted_date, round(avg(price), 2) as average_price,
lead(round(avg(price), 2)) over(order by date_format(last_updated_price, '%Y/%m/%d %H')) as next_value,
((lead(round(avg(price), 2)) over(order by date_format(last_updated_price, '%Y/%m/%d %H')) - round(avg(price), 2) ) * 100)/round(avg(price), 2)
as price_difference
from regular_gas
group by formatted_date
order by formatted_date)
select round(avg(price_difference), 2) as average_price_difference
from cte;
-> On average, the price went down by 0.12%.
select distinct Area, round(avg(price), 0) as average_price_per_Area from Regular_Gas
group by Area
order by average_price_per_Area desc
;
-> Essex has the highest average regular gas price
select distinct Area, round(avg(price), 0) as average_price_per_Area from Midgrade_Gas
group by Area
order by average_price_per_Area desc
;
-> Vidal Junction has the highest average midgrade gas price
select distinct Area, round(avg(price), 0) as average_price_per_Area from Premium_Gas
group by Area
order by average_price_per_Area desc
;
-> Ludlow has the highest average premium gas price
select distinct Area, round(avg(price), 0) as average_price_per_Area from Diesel_Fuel_Gas
group by Area
order by average_price_per_Area desc
;
-> Essex has the highest average diesel fuel gas price
select substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', 1) as Gas_Station, round(avg(price), 2) as
average_price_per_gas_brand
from Regular_Gas
where last_updated_price > now() - interval 12 hour
group by Gas_Station
order by average_price_per_gas_brand
limit 2
;
-> Foxxy Gasoline and Diamond Gas & Mart each has the lowest average regular gas
select substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', 1) as Gas_Station, round(avg(price), 2) as
average_price_per_gas_brand
from Midgrade_Gas
where last_updated_price > now() - interval 12 hour
group by Gas_Station
order by average_price_per_gas_brand
limit 1
;
-> Fastrip has the lowest average midgrade gas
select substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', 1) as Gas_Station, round(avg(price), 2) as
average_price_per_gas_brand
from Premium_Gas
where last_updated_price > now() - interval 12 hour
group by Gas_Station
order by average_price_per_gas_brand
limit 1
;
-> Fastrip has the lowest average premium gas
select substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', 1) as Gas_Station, round(avg(price), 2) as
average_price_per_gas_brand
from Diesel_Fuel_Gas
where last_updated_price > now() - interval 12 hour
group by Gas_Station
order by average_price_per_gas_brand
limit 1
;
-> Wintun Mini Market has the lowest average diesel fuel gas
6. Which part of California sees the cheapeast average regular gas price, most expensive average regular gas price? Which gas station address sees most regular gas price fluctuation (highest delta), and the most price changes (more changes in price) in the last 12 hours according to this dataset?
select Area, round(avg(price), 2) as
average_price_per_gas_brand
from Regular_Gas
where last_updated_price > now() - interval 12 hour
group by Area
order by average_price_per_gas_brand
limit 1
;
-> Livermore has the cheapest average regular gas price
select Area, round(avg(price), 2) as
average_price_per_gas_brand
from Regular_Gas
where last_updated_price > now() - interval 12 hour
group by Area
order by average_price_per_gas_brand desc
limit 1
;
-> Vidal Junction has the most expensive average regular gas price
most regular gas price fluctuation (highest delta, if there are multiple deltas per gas station address then we will get the average of the deltas) gas station address in the last 12 hours (zero does not count because it means there's no changes)
with cte2 as (with cte1 as (with cte as (select distinct last_updated_price, substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', 1) as Gas_Station,
substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', -1) as Gas_Station_Address, price
from regular_gas
where last_updated_price > now() - interval 12 hour
order by gas_station, gas_station_address, last_updated_price)
select *, round(lead(price) over (partition by Gas_Station, Gas_Station_Address order by last_updated_price), 2) as next_value
from cte)
select *, round(next_value-price, 2) as delta from cte1)
select Gas_Station, Gas_Station_Address, avg(delta) as average_delta
from cte2
where delta != 0 and delta != -0
group by Gas_Station, Gas_Station_Address
order by average_delta desc
;
-> Fastrip on 195 W Hermosa St & N Sweetbriar Ave has the highest delta, it went down by $0.04.
Gas station address with the most price changes (i.e. number of deltas per Gas Station Address) in the last 12 hours
with cte2 as (with cte1 as (with cte as (select distinct last_updated_price, substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', 1) as Gas_Station,
substring_index(replace(replace(replace(Station, "('", ' '), "'", ''), ")", ''), ', ', -1) as Gas_Station_Address, price
from regular_gas
where last_updated_price > now() - interval 12 hour
order by gas_station, gas_station_address, last_updated_price)
select *, round(lead(price) over (partition by Gas_Station, Gas_Station_Address order by last_updated_price), 2) as next_value
from cte)
select *, round(next_value-price, 2) as delta from cte1)
select Gas_Station, Gas_Station_Address, count(delta) as number_of_deltas
from cte2
where delta != 0 and delta != -0
group by Gas_Station, Gas_Station_Address
order by number_of_deltas desc
;
-> In the last 12 hours, only Fastrip on 195 W Hermosa St & N Sweetbriar Ave has one delta.