|
| 1 | + /* |
| 2 | +
|
| 3 | + The following schema is a subset of a relational database of a grocery store |
| 4 | + chain. This chain sells many products of different product classes to its |
| 5 | + customers across its different stores. It also conducts many different |
| 6 | + promotion campaigns. |
| 7 | + |
| 8 | + The relationship between the four tables we want to analyze is depicted below: |
| 9 | + |
| 10 | + # sales # products |
| 11 | + +------------------+---------+ +---------------------+---------+ |
| 12 | + | product_id | INTEGER |>--------| product_id | INTEGER | |
| 13 | + | store_id | INTEGER | +---<| product_class_id | INTEGER | |
| 14 | + | customer_id | INTEGER | | | brand_name | VARCHAR | |
| 15 | + +---<| promotion_id | INTEGER | | | product_name | VARCHAR | |
| 16 | + | | store_sales | DECIMAL | | | is_low_fat_flg | TINYINT | |
| 17 | + | | store_cost | DECIMAL | | | is_recyclable_flg | TINYINT | |
| 18 | + | | units_sold | DECIMAL | | | gross_weight | DECIMAL | |
| 19 | + | | transaction_date | DATE | | | net_weight | DECIMAL | |
| 20 | + | +------------------+---------+ | +---------------------+---------+ |
| 21 | + | | |
| 22 | + | # promotions | # product_classes |
| 23 | + | +------------------+---------+ | +---------------------+---------+ |
| 24 | + +----| promotion_id | INTEGER | +----| product_class_id | INTEGER | |
| 25 | + | promotion_name | VARCHAR | | product_subcategory | VARCHAR | |
| 26 | + | media_type | VARCHAR | | product_category | VARCHAR | |
| 27 | + | cost | DECIMAL | | product_department | VARCHAR | |
| 28 | + | start_date | DATE | | product_family | VARCHAR | |
| 29 | + | end_date | DATE | +---------------------+---------+ |
| 30 | + +------------------+---------+ |
| 31 | +
|
| 32 | + Question 1 |
| 33 | +
|
| 34 | + -- What percent of all products in the grocery chain's catalog |
| 35 | + -- are both low fat and recyclable? |
| 36 | + |
| 37 | +
|
| 38 | + EXPECTED OUTPUT: |
| 39 | + Note: Please use the column name(s) specified in the expected output in your solution. |
| 40 | + +----------------------------+ |
| 41 | + | pct_low_fat_and_recyclable | |
| 42 | + +----------------------------+ |
| 43 | + | 15.384615384615385 | |
| 44 | + +----------------------------+ |
| 45 | +
|
| 46 | +Question 2 |
| 47 | +
|
| 48 | + -- What are the top five (ranked in decreasing order) |
| 49 | + -- single-channel media types that correspond to the most money |
| 50 | + -- the grocery chain had spent on its promotional campaigns? |
| 51 | +
|
| 52 | + Single Media Channel Types are promotions that contain only one media type. |
| 53 | +
|
| 54 | + EXPECTED OUPTUT: |
| 55 | + Note: Please use the column name(s) specified in the expected output in your solution. |
| 56 | + +---------------------------+------------+ |
| 57 | + | single_channel_media_type | total_cost | |
| 58 | + +---------------------------+------------+ |
| 59 | + | In-Store Coupon | 70800.0000 | |
| 60 | + | Street Handout | 70627.0000 | |
| 61 | + | Radio | 60192.0000 | |
| 62 | + | Sunday Paper | 56994.0000 | |
| 63 | + | Product Attachment | 50815.0000 | |
| 64 | + +---------------------------+------------+ |
| 65 | +
|
| 66 | +Question 3 |
| 67 | +
|
| 68 | + -- Of sales that had a valid promotion, the VP of marketing |
| 69 | + -- wants to know what % of transactions occur on either |
| 70 | + -- the very first day or the very last day of a promotion campaign. |
| 71 | + |
| 72 | + |
| 73 | + EXPECTED OUTPUT: |
| 74 | + Note: Please use the column name(s) specified in the expected output in your solution. |
| 75 | + +-------------------------------------------------------------+ |
| 76 | + | pct_of_transactions_on_first_or_last_day_of_valid_promotion | |
| 77 | + +-------------------------------------------------------------+ |
| 78 | + | 41.9047619047619048 | |
| 79 | + +-------------------------------------------------------------+ |
| 80 | +
|
| 81 | +
|
| 82 | +Question 4 |
| 83 | +
|
| 84 | +-- The CMO is interested in understanding how the sales of different |
| 85 | + -- product families are affected by promotional campaigns. |
| 86 | + -- To do so, for each of the available product families, |
| 87 | + -- show the total number of units sold, |
| 88 | + -- as well as the ratio of units sold that had a valid promotion |
| 89 | + -- to units sold without a promotion, |
| 90 | + -- ordered by increasing order of total units sold. |
| 91 | + |
| 92 | + |
| 93 | + EXPECTED OUTPUT |
| 94 | + Note: Please use the column name(s) specified in the expected output in your solution. |
| 95 | + +----------------+------------------+--------------------------------------------------+ |
| 96 | + | product_family | total_units_sold | ratio_units_sold_with_promo_to_sold_without_promo| |
| 97 | + +----------------+------------------+--------------------------------------------------+ |
| 98 | + | Drink | 43.0000 | 0.79166666666666666667 | |
| 99 | + | Non-Consumable | 176.0000 | 0.76000000000000000000 | |
| 100 | + | Food | 564.0000 | 0.75155279503105590062 | |
| 101 | + +----------------+------------------+--------------------------------------------------+ |
| 102 | +
|
| 103 | +
|
| 104 | +Question 5 |
| 105 | +
|
| 106 | +-- The VP of Sales feels that some product categories don't sell |
| 107 | + -- and can be completely removed from the inventory. |
| 108 | + -- As a first pass analysis, they want you to find what percentage |
| 109 | + -- of product categories have never been sold. |
| 110 | + |
| 111 | + EXPECTED OUTPUT: |
| 112 | + Note: Please use the column name(s) specified in the expected output in your solution. |
| 113 | + +-----------------------------------+ |
| 114 | + | pct_product_categories_never_sold | |
| 115 | + +-----------------------------------+ |
| 116 | + | 13.8888888888888889 | |
| 117 | + +-----------------------------------+ |
| 118 | +
|
| 119 | + */ |
| 120 | + |
| 121 | + |
| 122 | +1. |
| 123 | + |
| 124 | +select count(case when is_low_fat_flg = 1 and is_recyclable_flg = 1 then 1 end) * 100.0 / count(*) from products |
| 125 | + |
| 126 | +2. |
| 127 | + |
| 128 | +select media_type as single_channel_media_type, sum(cost) total_cost |
| 129 | +from promotions where media_type not like '%,%' |
| 130 | +group by media_type |
| 131 | +order by 2 desc |
| 132 | +limit 5 |
| 133 | + |
| 134 | +3. |
| 135 | + |
| 136 | +select count(case when transaction_date=start_date or transaction_date=end_date then 1 end) * 100.0 / count(*) pct_of_transactions_on_first_or_last_day_of_valid_promotion |
| 137 | +from sales s join promotions p on s.promotion_id=p.promotion_id |
| 138 | + |
| 139 | + |
| 140 | +4. |
| 141 | + |
| 142 | +select product_family, sum(units_sold) as total_units_sold, |
| 143 | +sum(case when s.promotion_id != 0 then units_sold end) * 1.0 / sum(case when s.promotion_id = 0 then units_sold end) * 1.0 ratio_units_sold_with_promo_to_sold_without_promo |
| 144 | +from sales s join products p on s.product_id=p.product_id |
| 145 | +join product_classes pc on pc.product_class_id=p.product_class_id |
| 146 | +group by product_family |
| 147 | + |
| 148 | + |
| 149 | +5. |
| 150 | + |
| 151 | +select (tc - count(distinct pc.product_category)) * 100.0 / tc |
| 152 | +from (select count(distinct product_category) tc from product_classes), |
| 153 | +sales s |
| 154 | +join products p on s.product_id=p.product_id |
| 155 | +join product_classes pc on pc.product_class_id=p.product_class_id |
0 commit comments