Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion 02_activities/assignments/Assignment2.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,11 @@ The store wants to keep customer addresses. Propose two architectures for the CU
**HINT:** search type 1 vs type 2 slowly changing dimensions.

```
Your answer...
Architecture 1: Overwrite Changes (Type 1 SCD)
When a customer's address changes, the existing record is updated, overwriting the old data with no history retained. This architecture uses customer_id and address_id to identify the current address. No history is kept. This is ideal for operational needs like shipping or other cases where you want to be storage-efficient and simplistic.

Arhitecture 2: Retain Changes (Type 2 SCD)
Each address change creates a new record, preserving history. This architecture uses customer_id and address_id, but also includes fields like effective_start, effective_end, and is_address_current to keep track of changes. This is more complex but keeps a history of records which might be useful for analytics, auditing, etc.
```

***
Expand Down
Binary file not shown.
120 changes: 102 additions & 18 deletions 02_activities/assignments/assignment2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ HINT: keep the syntax the same, but edited the correct components with the strin
The `||` values concatenate the columns into strings.
Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed.
All the other rows will remain the same.) */

SELECT
COALESCE(NULLIF(product_name, ''), '') || ',' ||
COALESCE(NULLIF(product_size, ''),'unit') || ' (' ||
product_qty_type || ')'
FROM product


--Windowed Functions
Expand All @@ -31,18 +35,31 @@ You can either display all rows in the customer_purchases table, with the counte
each new market date for each customer, or select only the unique market dates per customer
(without purchase details) and number those visits.
HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */

SELECT *
,DENSE_RANK() OVER (PARTITION BY customer_id ORDER BY market_date ASC) as visit_number
FROM customer_purchases


/* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1,
then write another query that uses this one as a subquery (or temp table) and filters the results to
only the customer’s most recent visit. */


SELECT *
,DENSE_RANK() OVER (PARTITION BY customer_id ORDER BY market_date DESC) as visit_number
FROM customer_purchases

SELECT *
FROM (
SELECT *
,ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date DESC) as visit_number
FROM customer_purchases
) as X
WHERE x.visit_number = 1

/* 3. Using a COUNT() window function, include a value along with each row of the
customer_purchases table that indicates how many different times that customer has purchased that product_id. */

SELECT *
,COUNT() OVER (PARTITION BY customer_id, product_id) as times_purchased
FROM customer_purchases


-- String manipulations
Expand All @@ -56,11 +73,19 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for
| Habanero Peppers - Organic | Organic |

Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */


SELECT *
,CASE WHEN INSTR(product_name, '-') >0 THEN TRIM(SUBSTR(product_name, -(LENGTH(product_name) - INSTR(product_name, '-'))))
ELSE NULL
END
FROM product

/* 2. Filter the query to show any product_size value that contain a number with REGEXP. */

SELECT *
,CASE WHEN INSTR(product_name, '-') >0 THEN TRIM(SUBSTR(product_name, -(LENGTH(product_name) - INSTR(product_name, '-'))))
ELSE NULL
END
FROM product
WHERE product_size REGEXP '[0-9]'


-- UNION
Expand All @@ -72,8 +97,34 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling
"best day" and "worst day";
3) Query the second temp table twice, once for the best day, once for the worst day,
with a UNION binding them. */


--creating temp table to find sales values grouped dates
DROP TABLE IF EXISTS temp.market_total_sales;
CREATE TABLE temp.market_total_sales AS
SELECT *
,SUM(quantity * cost_to_customer_per_qty) as total_sales
FROM customer_purchases
GROUP BY market_date;

--creating another temp table to find best and worst day
DROP TABLE IF EXISTS temp.market_extreme_dates;
CREATE TABLE temp.market_extreme_dates AS
SELECT *
,RANK () OVER (ORDER BY total_sales DESC) as best_day
,RANK () OVER (ORDER BY total_sales ASC) as worst_day
FROM temp.market_total_sales;

--union temp tables
SELECT *
,'best day' as extreme_type
FROM temp.market_extreme_dates
WHERE best_day = 1

UNION

SELECT *
,'worst day' as extreme_type
FROM temp.market_extreme_dates
WHERE worst_day = 1


/* SECTION 3 */
Expand All @@ -88,27 +139,43 @@ Remember, CROSS JOIN will explode your table rows, so CROSS JOIN should likely b
Think a bit about the row counts: how many distinct vendors, product names are there (x)?
How many customers are there (y).
Before your final group by you should have the product of those two queries (x*y). */


SELECT
vendor_id
,product_id
,SUM(5 * original_price)
FROM (
SELECT vendor_id
,product_id
,original_price
FROM vendor_inventory as vi
CROSS JOIN customer as c
) as x
GROUP by vendor_id, product_id

-- INSERT
/*1. Create a new table "product_units".
This table will contain only products where the `product_qty_type = 'unit'`.
It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`.
Name the timestamp column `snapshot_timestamp`. */

DROP TABLE IF EXISTS product_units;
CREATE TABLE product_units as
SELECT *
,CURRENT_TIMESTAMP as snapshot_timestamp
FROM product
WHERE product_qty_type like '%unit%'


/*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp).
This can be any product you desire (e.g. add another record for Apple Pie). */

INSERT INTO product_units
VALUES ('28', 'Bubble Tea', 'cup', '8', 'unit', CURRENT_TIMESTAMP)


-- DELETE
/* 1. Delete the older record for the whatever product you added.

HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/

DELETE FROM product_units
WHERE product_name = 'Bubble Tea'


-- UPDATE
Expand All @@ -128,6 +195,23 @@ Finally, make sure you have a WHERE statement to update the right row,
you'll need to use product_units.product_id to refer to the correct row within the product_units table.
When you have all of these components, you can run the update statement. */

--set up the alter code
ALTER TABLE product_units
ADD current_quantity INT;



--update product_units
UPDATE product_units as pu
SET current_quantity = (
SELECT COALESCE(vi.quantity, 0)
FROM product p
LEFT JOIN (
SELECT product_id, vendor_id, quantity
FROM vendor_inventory vi
WHERE (vi.product_id, vi.market_date) IN (
SELECT product_id, MAX(market_date)
FROM vendor_inventory
GROUP BY product_id
)
) vi ON p.product_id = vi.product_id
WHERE p.product_id = pu.product_id
);
Binary file modified 05_src/sql/farmersmarket.db
Binary file not shown.