diff --git a/02_activities/assignments/Assignment2.md b/02_activities/assignments/Assignment2.md index 5cbb4e70f..5655d8f75 100644 --- a/02_activities/assignments/Assignment2.md +++ b/02_activities/assignments/Assignment2.md @@ -54,7 +54,10 @@ The store wants to keep customer addresses. Propose two architectures for the CU **HINT:** search type 1 vs type 2 slowly changing dimensions. ``` -Your answer... +Type 1 Slowly Changing Dimension (SCD Type 1) - overwrites history. +Type 2 Slowly Changing Dimension (SCD Type 2) - retains history. +SCD Type 1 (overwrite): Keep only the most recent address, update in place. +SCD Type 2 (history): Keep all past addresses with start/end dates (or a current flag). ``` *** diff --git a/02_activities/assignments/ERD_1.png b/02_activities/assignments/ERD_1.png new file mode 100644 index 000000000..d24f10e45 Binary files /dev/null and b/02_activities/assignments/ERD_1.png differ diff --git a/02_activities/assignments/ERD_2.png b/02_activities/assignments/ERD_2.png new file mode 100644 index 000000000..3acbec676 Binary files /dev/null and b/02_activities/assignments/ERD_2.png differ diff --git a/02_activities/assignments/assignment2.sql b/02_activities/assignments/assignment2.sql index 5ad40748a..7bdd514c2 100644 --- a/02_activities/assignments/assignment2.sql +++ b/02_activities/assignments/assignment2.sql @@ -20,6 +20,9 @@ The `||` values concatenate the columns into strings. Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed. All the other rows will remain the same.) */ +SELECT + product_name || ', ' || COALESCE(product_size, '') || ' (' || COALESCE(product_qty_type, 'unit') || ')' +FROM product; --Windowed Functions @@ -32,17 +35,50 @@ each new market date for each customer, or select only the unique market dates p (without purchase details) and number those visits. HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */ +/* Nor sure, if I can follow either one approach , hence added query for both the approaches */ + +SELECT + customer_id, + market_date, + ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number +FROM customer_purchases; + + +SELECT + customer_id, + market_date, + DENSE_RANK() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number +FROM customer_purchases +GROUP BY customer_id, market_date; + /* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1, then write another query that uses this one as a subquery (or temp table) and filters the results to only the customer’s most recent visit. */ +SELECT + customer_id, + market_date, + ROW_NUMBER() OVER ( + PARTITION BY customer_id + ORDER BY market_date DESC + ) AS visit_number +FROM customer_purchases; /* 3. Using a COUNT() window function, include a value along with each row of the customer_purchases table that indicates how many different times that customer has purchased that product_id. */ - +SELECT + customer_id, + product_id, + market_date, + quantity, + cost_to_customer_per_qty, + COUNT(*) OVER ( + PARTITION BY customer_id, product_id + ) AS times_purchased +FROM customer_purchases; -- String manipulations @@ -57,11 +93,23 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */ +SELECT + product_name, + TRIM( + CASE + WHEN INSTR(product_name, '-') > 0 THEN SUBSTR(product_name, INSTR(product_name, '-') + 1) + END + ) AS description +FROM product; /* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ +SELECT * +FROM product +WHERE product_size REGEXP '[0-9]'; + -- UNION /* 1. Using a UNION, write a query that displays the market dates with the highest and lowest total sales. @@ -73,6 +121,35 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling 3) Query the second temp table twice, once for the best day, once for the worst day, with a UNION binding them. */ +-- Step 1: Aggregate sales by market_date +WITH total_sales_per_day AS ( + SELECT + market_date, + SUM(quantity * cost_to_customer_per_qty) AS total_sales + FROM customer_purchases + GROUP BY market_date +), + +-- Step 2: Rank the days by total_sales +ranked_sales AS ( + SELECT + market_date, + total_sales, + RANK() OVER (ORDER BY total_sales DESC) AS rank_desc, -- highest sales first + RANK() OVER (ORDER BY total_sales ASC) AS rank_asc -- lowest sales first + FROM total_sales_per_day +) + +-- Step 3: Select best and worst days using UNION +SELECT market_date, total_sales, 'Best Day' AS day_type +FROM ranked_sales +WHERE rank_desc = 1 + +UNION + +SELECT market_date, total_sales, 'Worst Day' AS day_type +FROM ranked_sales +WHERE rank_asc = 1; @@ -90,6 +167,18 @@ How many customers are there (y). Before your final group by you should have the product of those two queries (x*y). */ +SELECT + v.vendor_name, + p.product_name, + SUM(5 * vi.original_price) AS potential_revenue_per_customer +FROM + vendor_inventory vi +JOIN vendor v ON vi.vendor_id = v.vendor_id +JOIN product p ON vi.product_id = p.product_id +CROSS JOIN customer c -- every vendor/product sold to every customer +GROUP BY + v.vendor_name, + p.product_name; -- INSERT /*1. Create a new table "product_units". @@ -97,11 +186,39 @@ This table will contain only products where the `product_qty_type = 'unit'`. It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`. Name the timestamp column `snapshot_timestamp`. */ +CREATE TABLE product_units AS +SELECT + product_id, + product_name, + product_size, + product_category_id, + product_qty_type, + CURRENT_TIMESTAMP AS snapshot_timestamp +FROM product +WHERE product_qty_type = 'unit'; + /*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp). This can be any product you desire (e.g. add another record for Apple Pie). */ +INSERT INTO product_units ( + product_id, + product_name, + product_size, + product_category_id, + product_qty_type, + snapshot_timestamp +) +VALUES ( + 101, + 'Apple Pie', + '1 Pie', + 5, + 'unit', + CURRENT_TIMESTAMP -- capture the timestamp of insertion +); + -- DELETE @@ -109,7 +226,13 @@ This can be any product you desire (e.g. add another record for Apple Pie). */ HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/ - +DELETE FROM product_units +WHERE product_name = 'Apple Pie' + AND snapshot_timestamp < ( + SELECT MAX(snapshot_timestamp) + FROM product_units + WHERE product_name = 'Apple Pie' + ); -- UPDATE /* 1.We want to add the current_quantity to the product_units table. @@ -129,5 +252,19 @@ Finally, make sure you have a WHERE statement to update the right row, When you have all of these components, you can run the update statement. */ +-- Step 1: Add the new column +ALTER TABLE product_units +ADD current_quantity INT; +-- Step 2: Update current_quantity with the latest vendor_inventory quantity per product +UPDATE product_units +SET current_quantity = ( + SELECT COALESCE(vi.quantity, 0) + FROM vendor_inventory vi + WHERE vi.product_id = product_units.product_id + ORDER BY vi.market_date DESC + LIMIT 1 +); + +select * from product_units;