-
Notifications
You must be signed in to change notification settings - Fork 0
UofT-DSI | SQL - Assignment 2 #2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
ac5914f
e2baf21
4eccbf3
11b5111
f005c34
b7cf3ec
7022cc8
782bab6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,144 @@ | ||
| erDiagram | ||
| CUSTOMER ||--o{ "ORDER" : places | ||
| EMPLOYEE ||--o{ "ORDER" : handles | ||
| "ORDER" ||--o{ SALES : contains | ||
| BOOK ||--o{ SALES : "sold in" | ||
| DATE_DIM ||--o{ "ORDER" : "order date" | ||
|
|
||
| BOOK_CATEGORY ||--o{ BOOK : "categorizes" | ||
|
|
||
| BOOK ||--o{ BOOK_SUPPLIER : "is supplied by" | ||
| SUPPLIER ||--o{ BOOK_SUPPLIER : "supplies" | ||
|
|
||
| BOOK ||--o{ INVENTORY_TRANSACTION : "movement" | ||
| DATE_DIM ||--o{ INVENTORY_TRANSACTION : "txn date" | ||
|
|
||
| EMPLOYEE ||--o{ EMPLOYEE_SHIFT : "assigned" | ||
| SHIFT ||--o{ EMPLOYEE_SHIFT : "shift info" | ||
| DATE_DIM ||--o{ EMPLOYEE_SHIFT : "shift date" | ||
|
|
||
| CUSTOMER { | ||
| int customer_id PK | ||
| string first_name | ||
| string last_name | ||
| string email | ||
| string phone | ||
| string loyalty_number | ||
| string address_line1 | ||
| string address_line2 | ||
| string city | ||
| string province_state | ||
| string postal_code | ||
| date created_at | ||
| } | ||
|
|
||
| EMPLOYEE { | ||
| int employee_id PK | ||
| string first_name | ||
| string last_name | ||
| string role | ||
| date hire_date | ||
| string email | ||
| string phone | ||
| boolean is_active | ||
| } | ||
|
|
||
| SHIFT { | ||
| int shift_id PK | ||
| string shift_name "Morning, Evening" | ||
| time start_time | ||
| time end_time | ||
| } | ||
|
|
||
| EMPLOYEE_SHIFT { | ||
| int emp_shift_id PK | ||
| int employee_id FK | ||
| int shift_id FK | ||
| int date_key FK | ||
| boolean is_confirmed | ||
| string notes | ||
| } | ||
|
|
||
| "ORDER" { | ||
| int order_id PK | ||
| int customer_id FK | ||
| int employee_id FK | ||
| int order_date_key FK | ||
| string order_status | ||
| string payment_method | ||
| decimal order_subtotal | ||
| decimal order_tax | ||
| decimal order_total | ||
| } | ||
|
|
||
| SALES { | ||
| int sales_id PK | ||
| int order_id FK | ||
| int book_id FK | ||
| int quantity | ||
| decimal unit_price | ||
| decimal discount_amount | ||
| decimal line_total | ||
| } | ||
|
|
||
| BOOK { | ||
| int book_id PK | ||
| string isbn | ||
| string title | ||
| string author | ||
| int category_id FK | ||
| decimal list_price | ||
| date published_date | ||
| boolean is_active | ||
| } | ||
|
|
||
| BOOK_CATEGORY { | ||
| int category_id PK | ||
| string category_name | ||
| string description | ||
| } | ||
|
|
||
| SUPPLIER { | ||
| int supplier_id PK | ||
| string supplier_name | ||
| string contact_name | ||
| string email | ||
| string phone | ||
| string address_line1 | ||
| string address_line2 | ||
| string city | ||
| string province_state | ||
| string postal_code | ||
| } | ||
|
|
||
| BOOK_SUPPLIER { | ||
| int book_id FK | ||
| int supplier_id FK | ||
| decimal supplier_price | ||
| int lead_time_days | ||
| boolean is_primary | ||
| } | ||
|
|
||
| INVENTORY_TRANSACTION { | ||
| int txn_id PK | ||
| int book_id FK | ||
| int date_key FK | ||
| string txn_type "RECEIPT, SALE, RETURN, ADJUSTMENT" | ||
| int quantity_change "positive or negative" | ||
| int order_id FK "for sales" | ||
| int supplier_id FK "for receipts (optional)" | ||
| string notes | ||
| } | ||
|
|
||
| DATE_DIM { | ||
| int date_key PK | ||
| date full_date | ||
| int year | ||
| int quarter | ||
| int month | ||
| string month_name | ||
| int day | ||
| string day_name | ||
| boolean is_weekend | ||
| boolean is_holiday | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,7 +20,15 @@ The `||` values concatenate the columns into strings. | |
| Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed. | ||
| All the other rows will remain the same.) */ | ||
|
|
||
|
|
||
| SELECT | ||
| COALESCE(product_name, '') | ||
| || ', ' || | ||
| COALESCE(product_size, '') | ||
| || ' (' || | ||
| COALESCE(product_qty_type, 'unit') | ||
| || ')' | ||
| AS product_details | ||
| FROM product; | ||
|
|
||
| --Windowed Functions | ||
| /* 1. Write a query that selects from the customer_purchases table and numbers each customer’s | ||
|
|
@@ -32,18 +40,50 @@ each new market date for each customer, or select only the unique market dates p | |
| (without purchase details) and number those visits. | ||
| HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */ | ||
|
|
||
|
|
||
| SELECT | ||
| customer_id, | ||
| market_date, | ||
| DENSE_RANK() OVER ( | ||
| PARTITION BY customer_id | ||
| ORDER BY market_date | ||
| ) AS visit_number | ||
| FROM ( | ||
| SELECT DISTINCT customer_id, market_date | ||
| FROM customer_purchases | ||
| ); | ||
|
|
||
| /* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1, | ||
| then write another query that uses this one as a subquery (or temp table) and filters the results to | ||
| only the customer’s most recent visit. */ | ||
|
|
||
|
|
||
| CREATE TEMP TABLE temp_customer_visits AS | ||
| SELECT | ||
| customer_id, | ||
| market_date, | ||
| ROW_NUMBER() OVER ( | ||
| PARTITION BY customer_id | ||
| ORDER BY market_date DESC | ||
| ) AS visit_number | ||
| FROM customer_purchases; | ||
| SELECT | ||
| customer_id, | ||
| market_date | ||
| FROM temp_customer_visits | ||
| WHERE visit_number = 1; | ||
|
|
||
| /* 3. Using a COUNT() window function, include a value along with each row of the | ||
| customer_purchases table that indicates how many different times that customer has purchased that product_id. */ | ||
|
|
||
|
|
||
| SELECT | ||
| customer_id, | ||
| customer_purchases.*, | ||
| product.product_name, | ||
| COUNT(*) OVER ( | ||
| PARTITION BY customer_id, customer_purchases.product_id | ||
| ) AS times_customer_bought_product | ||
| FROM customer_purchases | ||
| INNER JOIN product | ||
| ON customer_purchases.product_id = product.product_id; | ||
|
|
||
| -- String manipulations | ||
| /* 1. Some product names in the product table have descriptions like "Jar" or "Organic". | ||
|
|
@@ -57,11 +97,16 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for | |
|
|
||
| Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */ | ||
|
|
||
|
|
||
| SELECT | ||
| product_name, | ||
| TRIM(SUBSTR(product_name, NULLIF(INSTR(product_name, '-'), 0) + 1)) AS description | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the product name has no hyphen, student formula incorrectly extracts the entire product_name instead of returning NULL, producing wrong descriptions. |
||
| FROM product; | ||
|
|
||
| /* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ | ||
|
|
||
|
|
||
| SELECT * | ||
| FROM product | ||
| WHERE product_size REGEXP '[0-9]'; | ||
|
|
||
| -- UNION | ||
| /* 1. Using a UNION, write a query that displays the market dates with the highest and lowest total sales. | ||
|
|
@@ -72,8 +117,36 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling | |
| "best day" and "worst day"; | ||
| 3) Query the second temp table twice, once for the best day, once for the worst day, | ||
| with a UNION binding them. */ | ||
|
|
||
|
|
||
| -- Step 1: Create CTE for daily sales | ||
| WITH daily_sales AS ( | ||
| SELECT | ||
| market_date, | ||
| SUM(quantity * cost_to_customer_per_qty) AS total_sales | ||
| FROM customer_purchases | ||
| GROUP BY market_date | ||
| ), | ||
| ranked_sales AS ( | ||
| SELECT | ||
| market_date, | ||
| total_sales, | ||
| # ranking twice will correctly return all tied days | ||
| RANK() OVER (ORDER BY total_sales DESC) AS best_rank, | ||
| RANK() OVER (ORDER BY total_sales ASC) AS worst_rank | ||
| FROM daily_sales | ||
| ) | ||
| SELECT | ||
| market_date, | ||
| total_sales, | ||
| 'Best day' AS day_type | ||
| FROM ranked_sales | ||
| WHERE best_rank = 1 | ||
| UNION | ||
| SELECT | ||
| market_date, | ||
| total_sales, | ||
| 'Worst day' AS day_type | ||
| FROM ranked_sales | ||
| WHERE worst_rank = 1 | ||
|
|
||
|
|
||
| /* SECTION 3 */ | ||
|
|
@@ -88,27 +161,49 @@ Remember, CROSS JOIN will explode your table rows, so CROSS JOIN should likely b | |
| Think a bit about the row counts: how many distinct vendors, product names are there (x)? | ||
| How many customers are there (y). | ||
| Before your final group by you should have the product of those two queries (x*y). */ | ||
|
|
||
|
|
||
| SELECT | ||
| v.vendor_name, | ||
| p.product_name, | ||
| customer_count * 5 * vi.original_price AS total_revenue | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SUM(5 * vi.original_price) AS total_revenue |
||
| FROM ( | ||
| SELECT DISTINCT | ||
| vendor_id, | ||
| product_id, | ||
| original_price | ||
| FROM vendor_inventory) AS vi | ||
| CROSS JOIN ( | ||
| -- Get all unique customers | ||
| SELECT COUNT(DISTINCT customer_id) as customer_count | ||
| FROM customer | ||
| ) c | ||
| INNER JOIN vendor v ON vi.vendor_id = v.vendor_id | ||
| INNER JOIN product p ON vi.product_id = p.product_id | ||
| GROUP BY v.vendor_name, p.product_name, vi.original_price; | ||
|
|
||
| -- INSERT | ||
| /*1. Create a new table "product_units". | ||
| This table will contain only products where the `product_qty_type = 'unit'`. | ||
| It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`. | ||
| Name the timestamp column `snapshot_timestamp`. */ | ||
|
|
||
|
|
||
| CREATE TABLE product_units AS | ||
| SELECT | ||
| product.*, | ||
| CURRENT_TIMESTAMP AS snapshot_timestamp | ||
| FROM product | ||
| WHERE product_qty_type = 'unit'; | ||
|
|
||
| /*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp). | ||
| This can be any product you desire (e.g. add another record for Apple Pie). */ | ||
|
|
||
| INSERT INTO product_units (product_id, product_name, product_size, product_category_id, product_qty_type, snapshot_timestamp) | ||
| VALUES (9999, 'Test Product', '1 unit', 1, 'unit', CURRENT_TIMESTAMP); | ||
|
|
||
|
|
||
| -- DELETE | ||
| /* 1. Delete the older record for the whatever product you added. | ||
|
|
||
| HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/ | ||
|
|
||
| DELETE FROM product_units | ||
| WHERE product_id = 9999 | ||
|
|
||
|
|
||
| -- UPDATE | ||
|
|
@@ -128,6 +223,25 @@ Finally, make sure you have a WHERE statement to update the right row, | |
| you'll need to use product_units.product_id to refer to the correct row within the product_units table. | ||
| When you have all of these components, you can run the update statement. */ | ||
|
|
||
| ALTER TABLE product_units | ||
| ADD current_quantity INT; | ||
|
|
||
| WITH latest_inventory AS ( | ||
| SELECT product_id, | ||
| quantity, | ||
| ROW_NUMBER() OVER ( | ||
| PARTITION BY product_id | ||
| ORDER BY market_date DESC | ||
| ) AS rn | ||
| FROM vendor_inventory) | ||
| UPDATE product_units | ||
| SET current_quantity = COALESCE(( | ||
| SELECT vi.quantity | ||
| FROM latest_inventory vi | ||
| WHERE vi.product_id = product_units.product_id AND vi.rn = 1 | ||
| ), 0); | ||
|
|
||
| -- SELECT Statements for reference | ||
| SELECT * FROM product_units; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good attempt. The question requires reversing numbering based on the distinct visit list from above. Here used the raw customer_purchases may cause duplicate rows and incorrect visit numbering. consider this:
WITH visits AS (
SELECT
customer_id,
market_date,
ROW_NUMBER() OVER (
PARTITION BY customer_id
ORDER BY market_date DESC
) AS visit_number
FROM (
SELECT DISTINCT customer_id, market_date
FROM customer_purchases
) x
)
SELECT customer_id, market_date
FROM visits
WHERE visit_number = 1;