diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb index 14faec566..9b199c39b 100644 --- a/02_activities/assignments/assignment_1.ipynb +++ b/02_activities/assignments/assignment_1.ipynb @@ -56,32 +56,81 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# This is a function, which we will learn more about next week. For testing purposes, we will write our code in the function\n", - "def anagram_checker(word_a, word_b):\n", - " # Your code here\n", + "def anagram_checker(word_a: str, word_b: str) -> bool:\n", + " '''\n", + " - This function checks if word_a and word_b are anagrams of each other\n", + " - Anagrams are words or phrases that can be rearranged to make an entirely different word or phrase.\n", + " - with all the original letters \n", + " Args:\n", + " word_a : str, a word or phrase\n", + " word_b : str, words or phrases that can be rearranged to make an entirely different \n", + " returns:\n", + " boolean: if word_a and word_b are anagrams returns True otherwise False \n", + " \n", + " '''\n", + " # Your code here\n", + " # convert the upper case to lower case\n", + " # For phrases remove the space\n", + " word_a, word_b = word_a.replace(' ', '').lower(), word_b.replace(' ', '').lower()\n", + " return sorted(word_a) == sorted(word_b)\n", + "\n", "\n", "# Run your code to check using the words below:\n", - "anagram_checker(\"Silent\", \"listen\")" + "anagram_checker(\"Silent\", \"listen\")\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "anagram_checker(\"Silent\", \"Night\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "anagram_checker(\"night\", \"Thing\")" ] @@ -99,10 +148,42 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "def anagram_checker(word_a, word_b, is_case_sensitive):\n", - " # Modify your existing code here\n", + "# def anagram_checker(word_a, word_b, is_case_sensitive):\n", + "# # Modify your existing code here\n", + "\n", + "# This is a function, which we will learn more about next week. For testing purposes, we will write our code in the function\n", + "def anagram_checker(word_a: str, word_b: str, is_case_sensitive: bool) -> bool:\n", + " # Modify your existing code here\n", + " '''\n", + " - This function checks if word_a and word_b are anagrams of each other\n", + " - Anagrams are words or phrases that can be rearranged to make an entirely different word or phrase.\n", + " - with all the original letters \n", + " Args:\n", + " word_a : str, a word or phrase\n", + " word_b : str, words or phrases that can be rearranged to make an entirely different \n", + " is_case_sensitive: boolean, If True, the comparison is case-sensitive. \n", + " returns:\n", + " boolean: if word_a and word_b are anagrams returns True otherwise False \n", + " \n", + " '''\n", + " # convert the upper case to lower case\n", + "\n", + " if not is_case_sensitive:\n", + " word_a, word_b = word_a.replace(' ', '').lower(), word_b.replace(' ', '').lower()\n", + " return sorted(word_a) == sorted(word_b)\n", "\n", "# Run your code to check using the words below:\n", "anagram_checker(\"Silent\", \"listen\", False) # True" @@ -110,9 +191,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "anagram_checker(\"Silent\", \"Listen\", True) # False" ] @@ -130,7 +222,7 @@ ], "metadata": { "kernelspec": { - "display_name": "new-learner", + "display_name": "dsi_participant", "language": "python", "name": "python3" }, @@ -144,7 +236,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.9.15" } }, "nbformat": 4, diff --git a/02_activities/assignments/assignment_2.ipynb b/02_activities/assignments/assignment_2.ipynb index 482ac53d1..c3c846cad 100644 --- a/02_activities/assignments/assignment_2.ipynb +++ b/02_activities/assignments/assignment_2.ipynb @@ -1,339 +1,2221 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "QmzeljDgm4Ll" - }, - "source": [ - "# Assignment #2: Efficacy Analysis of a Hypothetical Arthritis Drug\n", - "\n", - "**Objective**: In this assignment, your task is to utilize Python programming skills to evaluate the effectiveness of a fictional medication designed to reduce inflammation caused by arthritis flare-ups.\n", - "\n", - "**Background**: Imagine a clinical trial where 60 patients were administered a new drug for arthritis. Data from this trial has been recorded in a series of CSV files. Evaluate the effectiveness of a fictional medication designed to reduce inflammation caused by arthritis flare-ups.\n", - "\n", - "**Data Structure**:\n", - "- Each CSV file corresponds to a specific check-in session with the patients.\n", - "- There are 12 such CSV files, reflecting 12 different sessions where patients reported their experiences.\n", - "- Inside each file:\n", - " - Rows: Each of the 60 rows represents a unique patient.\n", - " - Columns: Each of the 40 columns corresponds to a day, detailing the number of inflammation flare-ups the patient experienced on that day.\n", - "\n", - "**Your Role**: Analyze this data to determine how effective the new drug has been in managing arthritis inflammation across the trial period." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Submission Information\n", - "\n", - "🚨 **Please review our [Assignment Submission Guide](https://github.com/UofT-DSI/onboarding/blob/main/onboarding_documents/submissions.md)** 🚨 for detailed instructions on how to format, branch, and submit your work. Following these guidelines is crucial for your submissions to be evaluated correctly.\n", - "\n", - "### Submission Parameters:\n", - "* Submission Due Date: `11:59 PM - Dec 8, 2024`\n", - "* The branch name for your repo should be: `assignment-2`\n", - "* What to submit for this assignment:\n", - " * This Jupyter Notebook (assignment_2.ipynb) should be populated and should be the only change in your pull request.\n", - "* What the pull request link should look like for this assignment: `https://github.com//python/pull/`\n", - " * Open a private window in your browser. Copy and paste the link to your pull request into the address bar. Make sure you can see your pull request properly. This helps the technical facilitator and learning support staff review your submission easily.\n", - "\n", - "Checklist:\n", - "- [ ] Created a branch with the correct naming convention.\n", - "- [ ] Ensured that the repository is public.\n", - "- [ ] Reviewed the PR description guidelines and adhered to them.\n", - "- [ ] Verify that the link is accessible in a private browser window.\n", - "\n", - "If you encounter any difficulties or have questions, please don't hesitate to reach out to our team via our Slack at `#cohort-3-help`. Our Technical Facilitators and Learning Support staff are here to help you navigate any challenges." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yMsC2qsiKNP8" - }, - "source": [ - "**The file is located under `../05_src/data/assignment_2_data/`.**\n", - "\n", - "The filtered list has been made for you:\n", - "\n", - "```python\n", - "all_paths = [\n", - " \"../05_src/data/assignment_2_data/inflammation_01.csv\",\n", - " \"../05_src/data/assignment_2_data/inflammation_02.csv\",\n", - " \"../05_src/data/assignment_2_data/inflammation_03.csv\",\n", - " \"../05_src/data/assignment_2_data/inflammation_04.csv\",\n", - " \"../05_src/data/assignment_2_data/inflammation_05.csv\",\n", - " \"../05_src/data/assignment_2_data/inflammation_06.csv\",\n", - " \"../05_src/data/assignment_2_data/inflammation_07.csv\",\n", - " \"../05_src/data/assignment_2_data/inflammation_08.csv\",\n", - " \"../05_src/data/assignment_2_data/inflammation_09.csv\",\n", - " \"../05_src/data/assignment_2_data/inflammation_10.csv\",\n", - " \"../05_src/data/assignment_2_data/inflammation_11.csv\",\n", - " \"../05_src/data/assignment_2_data/inflammation_12.csv\"\n", - "]\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JhJAJb1m-nkn" - }, - "source": [ - "## 1. Reading and Displaying Data from the First File\n", - "\n", - "With the list of the relevant `inflammation_xx.csv` file paths above, write a program to read the `inflammation_xx.csv` files, and display the contents of the first file in this list.\n", - "\n", - "**Hint**: Remember to use appropriate Python file handling and data reading methods. If you need guidance on how to handle CSV files in Python, refer to the relevant sections in your Python learning resources." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "n0m48JsS-nMC" - }, - "outputs": [], - "source": [ - "with open(all_paths[0], 'r') as f:\n", - " # YOUR CODE HERE: Use the readline() or readlines() method to read the .csv file into 'contents'\n", - " \n", - " # YOUR CODE HERE: Iterate through 'contents' using a for loop and print each row for inspection" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sacozX7oB1VP" - }, - "source": [ - "## 2. Data Summarization Function: `patient_summary`\n", - "\n", - "Your next step is to create a function named `patient_summary` that will compute summary statistics for each patient's data over a 40-day period.\n", - "\n", - "**Function Specifications**:\n", - "- **Function Name**: `patient_summary`\n", - "- **Parameters**:\n", - " 1. `file_path`: A string representing the path to the CSV file containing the patient data.\n", - " 2. `operation`: A string specifying the type of summary operation to perform. Acceptable values are \"mean\", \"max\", or \"min\". This will determine whether the function calculates the average, maximum, or minimum number of flare-ups for each patient over the 40 days.\n", - "\n", - "**Expected Behavior**:\n", - "- Your function should read the data from the file at `file_path`.\n", - "- Perform the specified `operation` (mean, max, or min) to summarize the flare-ups data for each of the 60 patients.\n", - "- Return an array with 60 elements, each element being the result of the summary operation for a corresponding patient.\n", - "\n", - "**Expected Output**:\n", - "- The output should be an array with a length of 60, aligning with the number of patients in the study.\n", - "\n", - "**Hints for Implementation**:\n", - "1. **Utilizing NumPy**: For efficient data manipulation and computation, consider using NumPy, as discussed in the `10_numpy` slides.\n", - "2. **Output Shape**: Ensure that the shape of your output data matches the number of patients, which is 60." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "82-bk4CBB1w4" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "def patient_summary(file_path, operation):\n", - " # load the data from the file\n", - " data = np.loadtxt(fname=file_path, delimiter=',')\n", - " ax = 1 # this specifies that the operation should be done for each row (patient)\n", - "\n", - " # implement the specific operation based on the 'operation' argument\n", - " if operation == 'mean':\n", - " # YOUR CODE HERE: calculate the mean (average) number of flare-ups for each patient\n", - "\n", - " elif operation == 'max':\n", - " # YOUR CODE HERE: calculate the maximum number of flare-ups experienced by each patient\n", - "\n", - " elif operation == 'min':\n", - " # YOUR CODE HERE: calculate the minimum number of flare-ups experienced by each patient\n", - "\n", - " else:\n", - " # if the operation is not one of the expected values, raise an error\n", - " raise ValueError(\"Invalid operation. Please choose 'mean', 'max', or 'min'.\")\n", - "\n", - " return summary_values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3TYo0-1SDLrd" - }, - "outputs": [], - "source": [ - "# test it out on the data file we read in and make sure the size is what we expect i.e., 60\n", - "# Your output for the first file should be 60\n", - "data_min = patient_summary(all_paths[0], 'min')\n", - "print(len(data_min))" - ] - }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "QmzeljDgm4Ll" + }, + "source": [ + "# Assignment #2: Efficacy Analysis of a Hypothetical Arthritis Drug\n", + "\n", + "**Objective**: In this assignment, your task is to utilize Python programming skills to evaluate the effectiveness of a fictional medication designed to reduce inflammation caused by arthritis flare-ups.\n", + "\n", + "**Background**: Imagine a clinical trial where 60 patients were administered a new drug for arthritis. Data from this trial has been recorded in a series of CSV files. Evaluate the effectiveness of a fictional medication designed to reduce inflammation caused by arthritis flare-ups.\n", + "\n", + "**Data Structure**:\n", + "- Each CSV file corresponds to a specific check-in session with the patients.\n", + "- There are 12 such CSV files, reflecting 12 different sessions where patients reported their experiences.\n", + "- Inside each file:\n", + " - Rows: Each of the 60 rows represents a unique patient.\n", + " - Columns: Each of the 40 columns corresponds to a day, detailing the number of inflammation flare-ups the patient experienced on that day.\n", + "\n", + "**Your Role**: Analyze this data to determine how effective the new drug has been in managing arthritis inflammation across the trial period." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submission Information\n", + "\n", + "🚨 **Please review our [Assignment Submission Guide](https://github.com/UofT-DSI/onboarding/blob/main/onboarding_documents/submissions.md)** 🚨 for detailed instructions on how to format, branch, and submit your work. Following these guidelines is crucial for your submissions to be evaluated correctly.\n", + "\n", + "### Submission Parameters:\n", + "* Submission Due Date: `11:59 PM - Dec 8, 2024`\n", + "* The branch name for your repo should be: `assignment-2`\n", + "* What to submit for this assignment:\n", + " * This Jupyter Notebook (assignment_2.ipynb) should be populated and should be the only change in your pull request.\n", + "* What the pull request link should look like for this assignment: `https://github.com//python/pull/`\n", + " * Open a private window in your browser. Copy and paste the link to your pull request into the address bar. Make sure you can see your pull request properly. This helps the technical facilitator and learning support staff review your submission easily.\n", + "\n", + "Checklist:\n", + "- [ ] Created a branch with the correct naming convention.\n", + "- [ ] Ensured that the repository is public.\n", + "- [ ] Reviewed the PR description guidelines and adhered to them.\n", + "- [ ] Verify that the link is accessible in a private browser window.\n", + "\n", + "If you encounter any difficulties or have questions, please don't hesitate to reach out to our team via our Slack at `#cohort-3-help`. Our Technical Facilitators and Learning Support staff are here to help you navigate any challenges." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yMsC2qsiKNP8" + }, + "source": [ + "**The file is located under `../05_src/data/assignment_2_data/`.**\n", + "\n", + "The filtered list has been made for you:\n", + "\n", + "```python\n", + "all_paths = [\n", + " \"../05_src/data/assignment_2_data/inflammation_01.csv\",\n", + " \"../05_src/data/assignment_2_data/inflammation_02.csv\",\n", + " \"../05_src/data/assignment_2_data/inflammation_03.csv\",\n", + " \"../05_src/data/assignment_2_data/inflammation_04.csv\",\n", + " \"../05_src/data/assignment_2_data/inflammation_05.csv\",\n", + " \"../05_src/data/assignment_2_data/inflammation_06.csv\",\n", + " \"../05_src/data/assignment_2_data/inflammation_07.csv\",\n", + " \"../05_src/data/assignment_2_data/inflammation_08.csv\",\n", + " \"../05_src/data/assignment_2_data/inflammation_09.csv\",\n", + " \"../05_src/data/assignment_2_data/inflammation_10.csv\",\n", + " \"../05_src/data/assignment_2_data/inflammation_11.csv\",\n", + " \"../05_src/data/assignment_2_data/inflammation_12.csv\"\n", + "]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "all_paths = [\n", + " \"/05_src/data/assignment_2_data/inflammation_01.csv\",\n", + " \"/05_src/data/assignment_2_data/inflammation_02.csv\",\n", + " \"/05_src/data/assignment_2_data/inflammation_03.csv\",\n", + " \"/05_src/data/assignment_2_data/inflammation_04.csv\",\n", + " \"/05_src/data/assignment_2_data/inflammation_05.csv\",\n", + " \"/05_src/data/assignment_2_data/inflammation_06.csv\",\n", + " \"/05_src/data/assignment_2_data/inflammation_07.csv\",\n", + " \"/05_src/data/assignment_2_data/inflammation_08.csv\",\n", + " \"/05_src/data/assignment_2_data/inflammation_09.csv\",\n", + " \"/05_src/data/assignment_2_data/inflammation_10.csv\",\n", + " \"/05_src/data/assignment_2_data/inflammation_11.csv\",\n", + " \"/05_src/data/assignment_2_data/inflammation_12.csv\"\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JhJAJb1m-nkn" + }, + "source": [ + "## 1. Reading and Displaying Data from the First File\n", + "\n", + "With the list of the relevant `inflammation_xx.csv` file paths above, write a program to read the `inflammation_xx.csv` files, and display the contents of the first file in this list.\n", + "\n", + "**Hint**: Remember to use appropriate Python file handling and data reading methods. If you need guidance on how to handle CSV files in Python, refer to the relevant sections in your Python learning resources." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "id": "n0m48JsS-nMC" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "I-5m0RvxFx4J" - }, - "source": [ - "## 3. Error Detection in Patient Data\n", - "\n", - "Your final task is to develop a function named `detect_problems` that identifies any irregularities in the patient data, specifically focusing on detecting patients with a mean inflammation score of 0.\n", - "\n", - "**Function Specifications**:\n", - "- **Function Name**: `detect_problems`\n", - "- **Parameter**:\n", - " - `file_path`: A string that specifies the path to the CSV file containing patient data.\n", - "\n", - "**Expected Behavior**:\n", - "- The function should read the patient data from the file at `file_path`.\n", - "- Utilize the previously defined `patient_summary()` function to calculate the mean inflammation for each patient.\n", - "- Employ an additional helper function `check_zeros(x)` (provided) to determine if there are any zero values in the array of mean inflammations.\n", - "- The `detect_problems()` function should return `True` if there is at least one patient with a mean inflammation score of 0, and `False` otherwise.\n", - "\n", - "**Hints for Implementation**:\n", - "1. Call `patient_summary(file_path, 'mean')` to get the mean inflammation scores for all patients.\n", - "2. Use `check_zeros()` to evaluate the mean scores. This helper function takes an array as input and returns `True` if it finds zero values in the array.\n", - "3. Based on the output from `check_zeros()`, the `detect_problems()` function should return `True` (indicating an issue) if any mean inflammation scores of 0 are found, or `False` if none are found.\n", - "\n", - "**Note**: This function is crucial for identifying potential data entry errors, such as healthy individuals being mistakenly included in the dataset or other data-related issues." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "0,0,1,3,1,2,4,7,8,3,3,3,10,5,7,4,7,7,12,18,6,13,11,11,7,7,4,6,8,8,4,4,5,7,3,4,2,3,0,0\n", + "0,1,2,1,2,1,3,2,2,6,10,11,5,9,4,4,7,16,8,6,18,4,12,5,12,7,11,5,11,3,3,5,4,4,5,5,1,1,0,1\n", + "0,1,1,3,3,2,6,2,5,9,5,7,4,5,4,15,5,11,9,10,19,14,12,17,7,12,11,7,4,2,10,5,4,2,2,3,2,2,1,1\n", + "0,0,2,0,4,2,2,1,6,7,10,7,9,13,8,8,15,10,10,7,17,4,4,7,6,15,6,4,9,11,3,5,6,3,3,4,2,3,2,1\n", + "0,1,1,3,3,1,3,5,2,4,4,7,6,5,3,10,8,10,6,17,9,14,9,7,13,9,12,6,7,7,9,6,3,2,2,4,2,0,1,1\n", + "0,0,1,2,2,4,2,1,6,4,7,6,6,9,9,15,4,16,18,12,12,5,18,9,5,3,10,3,12,7,8,4,7,3,5,4,4,3,2,1\n", + "0,0,2,2,4,2,2,5,5,8,6,5,11,9,4,13,5,12,10,6,9,17,15,8,9,3,13,7,8,2,8,8,4,2,3,5,4,1,1,1\n", + "0,0,1,2,3,1,2,3,5,3,7,8,8,5,10,9,15,11,18,19,20,8,5,13,15,10,6,10,6,7,4,9,3,5,2,5,3,2,2,1\n", + "0,0,0,3,1,5,6,5,5,8,2,4,11,12,10,11,9,10,17,11,6,16,12,6,8,14,6,13,10,11,4,6,4,7,6,3,2,1,0,0\n", + "0,1,1,2,1,3,5,3,5,8,6,8,12,5,13,6,13,8,16,8,18,15,16,14,12,7,3,8,9,11,2,5,4,5,1,4,1,2,0,0\n", + "0,1,0,0,4,3,3,5,5,4,5,8,7,10,13,3,7,13,15,18,8,15,15,16,11,14,12,4,10,10,4,3,4,5,5,3,3,2,2,1\n", + "0,1,0,0,3,4,2,7,8,5,2,8,11,5,5,8,14,11,6,11,9,16,18,6,12,5,4,3,5,7,8,3,5,4,5,5,4,0,1,1\n", + "0,0,2,1,4,3,6,4,6,7,9,9,3,11,6,12,4,17,13,15,13,12,8,7,4,7,12,9,5,6,5,4,7,3,5,4,2,3,0,1\n", + "0,0,0,0,1,3,1,6,6,5,5,6,3,6,13,3,10,13,9,16,15,9,11,4,6,4,11,11,12,3,5,8,7,4,6,4,1,3,0,0\n", + "0,1,2,1,1,1,4,1,5,2,3,3,10,7,13,5,7,17,6,9,12,13,10,4,12,4,6,7,6,10,8,2,5,1,3,4,2,0,2,0\n", + "0,1,1,0,1,2,4,3,6,4,7,5,5,7,5,10,7,8,18,17,9,8,12,11,11,11,14,6,11,2,10,9,5,6,5,3,4,2,2,0\n", + "0,0,0,0,2,3,6,5,7,4,3,2,10,7,9,11,12,5,12,9,13,19,14,17,5,13,8,11,5,10,9,8,7,5,3,1,4,0,2,1\n", + "0,0,0,1,2,1,4,3,6,7,4,2,12,6,12,4,14,7,8,14,13,19,6,9,12,6,4,13,6,7,2,3,6,5,4,2,3,0,1,0\n", + "0,0,2,1,2,5,4,2,7,8,4,7,11,9,8,11,15,17,11,12,7,12,7,6,7,4,13,5,7,6,6,9,2,1,1,2,2,0,1,0\n", + "0,1,2,0,1,4,3,2,2,7,3,3,12,13,11,13,6,5,9,16,9,19,16,11,8,9,14,12,11,9,6,6,6,1,1,2,4,3,1,1\n", + "0,1,1,3,1,4,4,1,8,2,2,3,12,12,10,15,13,6,5,5,18,19,9,6,11,12,7,6,3,6,3,2,4,3,1,5,4,2,2,0\n", + "0,0,2,3,2,3,2,6,3,8,7,4,6,6,9,5,12,12,8,5,12,10,16,7,14,12,5,4,6,9,8,5,6,6,1,4,3,0,2,0\n", + "0,0,0,3,4,5,1,7,7,8,2,5,12,4,10,14,5,5,17,13,16,15,13,6,12,9,10,3,3,7,4,4,8,2,6,5,1,0,1,0\n", + "0,1,1,1,1,3,3,2,6,3,9,7,8,8,4,13,7,14,11,15,14,13,5,13,7,14,9,10,5,11,5,3,5,1,1,4,4,1,2,0\n", + "0,1,1,1,2,3,5,3,6,3,7,10,3,8,12,4,12,9,15,5,17,16,5,10,10,15,7,5,3,11,5,5,6,1,1,1,1,0,2,1\n", + "0,0,2,1,3,3,2,7,4,4,3,8,12,9,12,9,5,16,8,17,7,11,14,7,13,11,7,12,12,7,8,5,7,2,2,4,1,1,1,0\n", + "0,0,1,2,4,2,2,3,5,7,10,5,5,12,3,13,4,13,7,15,9,12,18,14,16,12,3,11,3,2,7,4,8,2,2,1,3,0,1,1\n", + "0,0,1,1,1,5,1,5,2,2,4,10,4,8,14,6,15,6,12,15,15,13,7,17,4,5,11,4,8,7,9,4,5,3,2,5,4,3,2,1\n", + "0,0,2,2,3,4,6,3,7,6,4,5,8,4,7,7,6,11,12,19,20,18,9,5,4,7,14,8,4,3,7,7,8,3,5,4,1,3,1,0\n", + "0,0,0,1,4,4,6,3,8,6,4,10,12,3,3,6,8,7,17,16,14,15,17,4,14,13,4,4,12,11,6,9,5,5,2,5,2,1,0,1\n", + "0,1,1,0,3,2,4,6,8,6,2,3,11,3,14,14,12,8,8,16,13,7,6,9,15,7,6,4,10,8,10,4,2,6,5,5,2,3,2,1\n", + "0,0,2,3,3,4,5,3,6,7,10,5,10,13,14,3,8,10,9,9,19,15,15,6,8,8,11,5,5,7,3,6,6,4,5,2,2,3,0,0\n", + "0,1,2,2,2,3,6,6,6,7,6,3,11,12,13,15,15,10,14,11,11,8,6,12,10,5,12,7,7,11,5,8,5,2,5,5,2,0,2,1\n", + "0,0,2,1,3,5,6,7,5,8,9,3,12,10,12,4,12,9,13,10,10,6,10,11,4,15,13,7,3,4,2,9,7,2,4,2,1,2,1,1\n", + "0,0,1,2,4,1,5,5,2,3,4,8,8,12,5,15,9,17,7,19,14,18,12,17,14,4,13,13,8,11,5,6,6,2,3,5,2,1,1,1\n", + "0,0,0,3,1,3,6,4,3,4,8,3,4,8,3,11,5,7,10,5,15,9,16,17,16,3,8,9,8,3,3,9,5,1,6,5,4,2,2,0\n", + "0,1,2,2,2,5,5,1,4,6,3,6,5,9,6,7,4,7,16,7,16,13,9,16,12,6,7,9,10,3,6,4,5,4,6,3,4,3,2,1\n", + "0,1,1,2,3,1,5,1,2,2,5,7,6,6,5,10,6,7,17,13,15,16,17,14,4,4,10,10,10,11,9,9,5,4,4,2,1,0,1,0\n", + "0,1,0,3,2,4,1,1,5,9,10,7,12,10,9,15,12,13,13,6,19,9,10,6,13,5,13,6,7,2,5,5,2,1,1,1,1,3,0,1\n", + "0,1,1,3,1,1,5,5,3,7,2,2,3,12,4,6,8,15,16,16,15,4,14,5,13,10,7,10,6,3,2,3,6,3,3,5,4,3,2,1\n", + "0,0,0,2,2,1,3,4,5,5,6,5,5,12,13,5,7,5,11,15,18,7,9,10,14,12,11,9,10,3,2,9,6,2,2,5,3,0,0,1\n", + "0,0,1,3,3,1,2,1,8,9,2,8,10,3,8,6,10,13,11,17,19,6,4,11,6,12,7,5,5,4,4,8,2,6,6,4,2,2,0,0\n", + "0,1,1,3,4,5,2,1,3,7,9,6,10,5,8,15,11,12,15,6,12,16,6,4,14,3,12,9,6,11,5,8,5,5,6,1,2,1,2,0\n", + "0,0,1,3,1,4,3,6,7,8,5,7,11,3,6,11,6,10,6,19,18,14,6,10,7,9,8,5,8,3,10,2,5,1,5,4,2,1,0,1\n", + "0,1,1,3,3,4,4,6,3,4,9,9,7,6,8,15,12,15,6,11,6,18,5,14,15,12,9,8,3,6,10,6,8,7,2,5,4,3,1,1\n", + "0,1,2,2,4,3,1,4,8,9,5,10,10,3,4,6,7,11,16,6,14,9,11,10,10,7,10,8,8,4,5,8,4,4,5,2,4,1,1,0\n", + "0,0,2,3,4,5,4,6,2,9,7,4,9,10,8,11,16,12,15,17,19,10,18,13,15,11,8,4,7,11,6,7,6,5,1,3,1,0,0,0\n", + "0,1,1,3,1,4,6,2,8,2,10,3,11,9,13,15,5,15,6,10,10,5,14,15,12,7,4,5,11,4,6,9,5,6,1,1,2,1,2,1\n", + "0,0,1,3,2,5,1,2,7,6,6,3,12,9,4,14,4,6,12,9,12,7,11,7,16,8,13,6,7,6,10,7,6,3,1,5,4,3,0,0\n", + "0,0,1,2,3,4,5,7,5,4,10,5,12,12,5,4,7,9,18,16,16,10,15,15,10,4,3,7,5,9,4,6,2,4,1,4,2,2,2,1\n", + "0,1,2,1,1,3,5,3,6,3,10,10,11,10,13,10,13,6,6,14,5,4,5,5,9,4,12,7,7,4,7,9,3,3,6,3,4,1,2,0\n", + "0,1,2,2,3,5,2,4,5,6,8,3,5,4,3,15,15,12,16,7,20,15,12,8,9,6,12,5,8,3,8,5,4,1,3,2,1,3,1,0\n", + "0,0,0,2,4,4,5,3,3,3,10,4,4,4,14,11,15,13,10,14,11,17,9,11,11,7,10,12,10,10,10,8,7,5,2,2,4,1,2,1\n", + "0,0,2,1,1,4,4,7,2,9,4,10,12,7,6,6,11,12,9,15,15,6,6,13,5,12,9,6,4,7,7,6,5,4,1,4,2,2,2,1\n", + "0,1,2,1,1,4,5,4,4,5,9,7,10,3,13,13,8,9,17,16,16,15,12,13,5,12,10,9,11,9,4,5,5,2,2,5,1,0,0,1\n", + "0,0,1,3,2,3,6,4,5,7,2,4,11,11,3,8,8,16,5,13,16,5,8,8,6,9,10,10,9,3,3,5,3,5,4,5,3,3,0,1\n", + "0,1,1,2,2,5,1,7,4,2,5,5,4,6,6,4,16,11,14,16,14,14,8,17,4,14,13,7,6,3,7,7,5,6,3,4,2,2,1,1\n", + "0,1,1,1,4,1,6,4,6,3,6,5,6,4,14,13,13,9,12,19,9,10,15,10,9,10,10,7,5,6,8,6,6,4,3,5,2,1,1,1\n", + "0,0,0,1,4,5,6,3,8,7,9,10,8,6,5,12,15,5,10,5,8,13,18,17,14,9,13,4,10,11,10,8,8,6,5,5,2,0,2,0\n", + "0,0,1,0,3,2,5,4,8,2,9,3,3,10,12,9,14,11,13,8,6,18,11,9,13,11,8,5,5,2,8,5,3,5,4,1,3,1,1,0\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "pb9EugDCJA4c" - }, - "source": [ - "**Understanding the `check_zeros(x)` Helper Function**\n", - "\n", - "The `check_zeros(x)` function is provided as a tool to assist with your data analysis. While you do not need to modify or fully understand the internal workings of this function, it's important to grasp its input, output, and what the output signifies:\n", - "\n", - "1. **Input**:\n", - " - **Parameter `x`**: This function takes an array of numbers as its input. In the context of your assignment, this array will typically represent a set of data points from your patient data, such as mean inflammation scores.\n", - "\n", - "2. **Output**:\n", - " - The function returns a boolean value: either `True` or `False`.\n", - "\n", - "3. **Interpreting the Output**:\n", - " - **Output is `True`**: This indicates that the array `x` contains at least one zero value. In the context of your analysis, this means that at least one patient has a mean inflammation score of 0, signaling a potential issue or anomaly in the data.\n", - " - **Output is `False`**: This signifies that there are no zero values in the array `x`. For your patient data, it means no patient has a mean inflammation score of 0, and thus no apparent anomalies of this type were detected.\n", - "\n", - "**Usage in Your Analysis**:\n", - "When using `check_zeros(x)` in conjunction with your `patient_summary()` function in the `detect_problems()` function, you'll be checking whether any patient in your dataset has an average (mean) inflammation score of 0." + "data": { + "text/plain": [ + "(60, 40)" ] - }, + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "main_dir = '/'.join(os.getcwd().split('\\\\')[:-2])\n", + "df = []\n", + "with open(main_dir + all_paths[0], 'r') as f:\n", + " # YOUR CODE HERE: Use the readline() or readlines() method to read the .csv file into 'contents'\n", + " contents = f.readlines()\n", + " # YOUR CODE HERE: Iterate through 'contents' using a for loop and print each row for inspection\n", + " for row in contents:\n", + " row_strip = row.strip()\n", + " print(row_strip)\n", + " df.append(map(int, row_strip.split(',')))\n", + "\n", + "df = pd.DataFrame(df)\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_svDiRkdIwiT" - }, - "outputs": [], - "source": [ - "# Run this cell so you can use this helper function\n", - "\n", - "def check_zeros(x):\n", - " '''\n", - " Given an array, x, check whether any values in x equal 0.\n", - " Return True if any values found, else returns False.\n", - " '''\n", - " # np.where() checks every value in x against the condition (x == 0) and returns a tuple of indices where it was True (i.e. x was 0)\n", - " flag = np.where(x == 0)[0]\n", - "\n", - " # Checks if there are any objects in flag (i.e. not empty)\n", - " # If not empty, it found at least one zero so flag is True, and vice-versa.\n", - " return len(flag) > 0" + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...30313233343536373839
00013124783...4457342300
10121213226...3544551101
20113326259...10542232211
30020422167...3563342321
40113313524...9632242011
50012242164...8473544321
60022422558...8842354111
70012312353...4935253221
80003156558...4647632100
90112135358...2545141200
100100433554...4345533221
110100342785...8354554011
120021436467...5473542301
130000131665...5874641300
140121114152...8251342020
150110124364...10956534220
160000236574...9875314021
170001214367...2365423010
180021254278...6921122010
190120143227...6661124311
200113144182...3243154220
210023232638...8566143020
220003451778...4482651010
230111133263...5351144120
240111235363...5561111021
250021332744...8572241110
260012422357...7482213011
270011151522...9453254321
280022346376...7783541310
290001446386...6955252101
300110324686...10426552321
310023345367...3664522300
320122236667...5852552021
330021356758...2972421211
340012415523...5662352111
350003136434...3951654220
360122255146...6454634321
370112315122...9954421010
380103241159...5521111301
390113115537...2363354321
400002213455...2962253001
410013312189...4826642200
420113452137...5855612120
430013143678...10251542101
440113344634...10687254311
450122431489...5844524110
460023454629...6765131000
470113146282...6956112121
480013251276...10763154300
490012345754...4624142221
500121135363...7933634120
510122352456...8541321310
520002445333...10875224121
530021144729...7654142221
540121145445...4552251001
550013236457...3535453301
560112251742...7756342211
570111416463...8664352111
580001456387...10886552020
590010325482...8535413110
\n", + "

60 rows × 40 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... 30 31 32 33 34 35 36 \\\n", + "0 0 0 1 3 1 2 4 7 8 3 ... 4 4 5 7 3 4 2 \n", + "1 0 1 2 1 2 1 3 2 2 6 ... 3 5 4 4 5 5 1 \n", + "2 0 1 1 3 3 2 6 2 5 9 ... 10 5 4 2 2 3 2 \n", + "3 0 0 2 0 4 2 2 1 6 7 ... 3 5 6 3 3 4 2 \n", + "4 0 1 1 3 3 1 3 5 2 4 ... 9 6 3 2 2 4 2 \n", + "5 0 0 1 2 2 4 2 1 6 4 ... 8 4 7 3 5 4 4 \n", + "6 0 0 2 2 4 2 2 5 5 8 ... 8 8 4 2 3 5 4 \n", + "7 0 0 1 2 3 1 2 3 5 3 ... 4 9 3 5 2 5 3 \n", + "8 0 0 0 3 1 5 6 5 5 8 ... 4 6 4 7 6 3 2 \n", + "9 0 1 1 2 1 3 5 3 5 8 ... 2 5 4 5 1 4 1 \n", + "10 0 1 0 0 4 3 3 5 5 4 ... 4 3 4 5 5 3 3 \n", + "11 0 1 0 0 3 4 2 7 8 5 ... 8 3 5 4 5 5 4 \n", + "12 0 0 2 1 4 3 6 4 6 7 ... 5 4 7 3 5 4 2 \n", + "13 0 0 0 0 1 3 1 6 6 5 ... 5 8 7 4 6 4 1 \n", + "14 0 1 2 1 1 1 4 1 5 2 ... 8 2 5 1 3 4 2 \n", + "15 0 1 1 0 1 2 4 3 6 4 ... 10 9 5 6 5 3 4 \n", + "16 0 0 0 0 2 3 6 5 7 4 ... 9 8 7 5 3 1 4 \n", + "17 0 0 0 1 2 1 4 3 6 7 ... 2 3 6 5 4 2 3 \n", + "18 0 0 2 1 2 5 4 2 7 8 ... 6 9 2 1 1 2 2 \n", + "19 0 1 2 0 1 4 3 2 2 7 ... 6 6 6 1 1 2 4 \n", + "20 0 1 1 3 1 4 4 1 8 2 ... 3 2 4 3 1 5 4 \n", + "21 0 0 2 3 2 3 2 6 3 8 ... 8 5 6 6 1 4 3 \n", + "22 0 0 0 3 4 5 1 7 7 8 ... 4 4 8 2 6 5 1 \n", + "23 0 1 1 1 1 3 3 2 6 3 ... 5 3 5 1 1 4 4 \n", + "24 0 1 1 1 2 3 5 3 6 3 ... 5 5 6 1 1 1 1 \n", + "25 0 0 2 1 3 3 2 7 4 4 ... 8 5 7 2 2 4 1 \n", + "26 0 0 1 2 4 2 2 3 5 7 ... 7 4 8 2 2 1 3 \n", + "27 0 0 1 1 1 5 1 5 2 2 ... 9 4 5 3 2 5 4 \n", + "28 0 0 2 2 3 4 6 3 7 6 ... 7 7 8 3 5 4 1 \n", + "29 0 0 0 1 4 4 6 3 8 6 ... 6 9 5 5 2 5 2 \n", + "30 0 1 1 0 3 2 4 6 8 6 ... 10 4 2 6 5 5 2 \n", + "31 0 0 2 3 3 4 5 3 6 7 ... 3 6 6 4 5 2 2 \n", + "32 0 1 2 2 2 3 6 6 6 7 ... 5 8 5 2 5 5 2 \n", + "33 0 0 2 1 3 5 6 7 5 8 ... 2 9 7 2 4 2 1 \n", + "34 0 0 1 2 4 1 5 5 2 3 ... 5 6 6 2 3 5 2 \n", + "35 0 0 0 3 1 3 6 4 3 4 ... 3 9 5 1 6 5 4 \n", + "36 0 1 2 2 2 5 5 1 4 6 ... 6 4 5 4 6 3 4 \n", + "37 0 1 1 2 3 1 5 1 2 2 ... 9 9 5 4 4 2 1 \n", + "38 0 1 0 3 2 4 1 1 5 9 ... 5 5 2 1 1 1 1 \n", + "39 0 1 1 3 1 1 5 5 3 7 ... 2 3 6 3 3 5 4 \n", + "40 0 0 0 2 2 1 3 4 5 5 ... 2 9 6 2 2 5 3 \n", + "41 0 0 1 3 3 1 2 1 8 9 ... 4 8 2 6 6 4 2 \n", + "42 0 1 1 3 4 5 2 1 3 7 ... 5 8 5 5 6 1 2 \n", + "43 0 0 1 3 1 4 3 6 7 8 ... 10 2 5 1 5 4 2 \n", + "44 0 1 1 3 3 4 4 6 3 4 ... 10 6 8 7 2 5 4 \n", + "45 0 1 2 2 4 3 1 4 8 9 ... 5 8 4 4 5 2 4 \n", + "46 0 0 2 3 4 5 4 6 2 9 ... 6 7 6 5 1 3 1 \n", + "47 0 1 1 3 1 4 6 2 8 2 ... 6 9 5 6 1 1 2 \n", + "48 0 0 1 3 2 5 1 2 7 6 ... 10 7 6 3 1 5 4 \n", + "49 0 0 1 2 3 4 5 7 5 4 ... 4 6 2 4 1 4 2 \n", + "50 0 1 2 1 1 3 5 3 6 3 ... 7 9 3 3 6 3 4 \n", + "51 0 1 2 2 3 5 2 4 5 6 ... 8 5 4 1 3 2 1 \n", + "52 0 0 0 2 4 4 5 3 3 3 ... 10 8 7 5 2 2 4 \n", + "53 0 0 2 1 1 4 4 7 2 9 ... 7 6 5 4 1 4 2 \n", + "54 0 1 2 1 1 4 5 4 4 5 ... 4 5 5 2 2 5 1 \n", + "55 0 0 1 3 2 3 6 4 5 7 ... 3 5 3 5 4 5 3 \n", + "56 0 1 1 2 2 5 1 7 4 2 ... 7 7 5 6 3 4 2 \n", + "57 0 1 1 1 4 1 6 4 6 3 ... 8 6 6 4 3 5 2 \n", + "58 0 0 0 1 4 5 6 3 8 7 ... 10 8 8 6 5 5 2 \n", + "59 0 0 1 0 3 2 5 4 8 2 ... 8 5 3 5 4 1 3 \n", + "\n", + " 37 38 39 \n", + "0 3 0 0 \n", + "1 1 0 1 \n", + "2 2 1 1 \n", + "3 3 2 1 \n", + "4 0 1 1 \n", + "5 3 2 1 \n", + "6 1 1 1 \n", + "7 2 2 1 \n", + "8 1 0 0 \n", + "9 2 0 0 \n", + "10 2 2 1 \n", + "11 0 1 1 \n", + "12 3 0 1 \n", + "13 3 0 0 \n", + "14 0 2 0 \n", + "15 2 2 0 \n", + "16 0 2 1 \n", + "17 0 1 0 \n", + "18 0 1 0 \n", + "19 3 1 1 \n", + "20 2 2 0 \n", + "21 0 2 0 \n", + "22 0 1 0 \n", + "23 1 2 0 \n", + "24 0 2 1 \n", + "25 1 1 0 \n", + "26 0 1 1 \n", + "27 3 2 1 \n", + "28 3 1 0 \n", + "29 1 0 1 \n", + "30 3 2 1 \n", + "31 3 0 0 \n", + "32 0 2 1 \n", + "33 2 1 1 \n", + "34 1 1 1 \n", + "35 2 2 0 \n", + "36 3 2 1 \n", + "37 0 1 0 \n", + "38 3 0 1 \n", + "39 3 2 1 \n", + "40 0 0 1 \n", + "41 2 0 0 \n", + "42 1 2 0 \n", + "43 1 0 1 \n", + "44 3 1 1 \n", + "45 1 1 0 \n", + "46 0 0 0 \n", + "47 1 2 1 \n", + "48 3 0 0 \n", + "49 2 2 1 \n", + "50 1 2 0 \n", + "51 3 1 0 \n", + "52 1 2 1 \n", + "53 2 2 1 \n", + "54 0 0 1 \n", + "55 3 0 1 \n", + "56 2 1 1 \n", + "57 1 1 1 \n", + "58 0 2 0 \n", + "59 1 1 0 \n", + "\n", + "[60 rows x 40 columns]" ] - }, + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# pRINT SAMPLE DATAFRAME\n", + "pd.DataFrame(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "LEYPM5v4JT0i" - }, - "outputs": [], - "source": [ - "# Define your function `detect_problems` here\n", - "\n", - "def detect_problems(file_path):\n", - " #YOUR CODE HERE: use patient_summary() to get the means and check_zeros() to check for zeros in the means\n", - "\n", - " return" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "inflammation_ 01 shape is (60, 40)\n", + "inflammation_ 02 shape is (60, 40)\n", + "inflammation_ 03 shape is (60, 40)\n", + "inflammation_ 04 shape is (60, 40)\n", + "inflammation_ 05 shape is (60, 40)\n", + "inflammation_ 06 shape is (60, 40)\n", + "inflammation_ 07 shape is (60, 40)\n", + "inflammation_ 08 shape is (60, 40)\n", + "inflammation_ 09 shape is (60, 40)\n", + "inflammation_ 10 shape is (60, 40)\n", + "inflammation_ 11 shape is (60, 40)\n", + "inflammation_ 12 shape is (60, 40)\n" + ] + } + ], + "source": [ + "# reading all together and converting into dataframe\n", + "df_all = []\n", + "for i in range(len(all_paths)):\n", + " df = []\n", + " with open(main_dir + all_paths[0], 'r') as f:\n", + " # YOUR CODE HERE: Use the readline() or readlines() method to read the .csv file into 'contents'\n", + " contents = f.readlines()\n", + " # YOUR CODE HERE: Iterate through 'contents' using a for loop and print each row for inspection\n", + " for row in contents:\n", + " row_strip = row.strip()\n", + " #print(row_strip)\n", + " df.append(map(int, row_strip.split(',')))\n", + "\n", + " df = pd.DataFrame(df)\n", + " df_all.append(df)\n", + "\n", + "for i, df in enumerate(df_all):\n", + " print(f'inflammation_ {i+1:02} shape is {df.shape}')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sacozX7oB1VP" + }, + "source": [ + "## 2. Data Summarization Function: `patient_summary`\n", + "\n", + "Your next step is to create a function named `patient_summary` that will compute summary statistics for each patient's data over a 40-day period.\n", + "\n", + "**Function Specifications**:\n", + "- **Function Name**: `patient_summary`\n", + "- **Parameters**:\n", + " 1. `file_path`: A string representing the path to the CSV file containing the patient data.\n", + " 2. `operation`: A string specifying the type of summary operation to perform. Acceptable values are \"mean\", \"max\", or \"min\". This will determine whether the function calculates the average, maximum, or minimum number of flare-ups for each patient over the 40 days.\n", + "\n", + "**Expected Behavior**:\n", + "- Your function should read the data from the file at `file_path`.\n", + "- Perform the specified `operation` (mean, max, or min) to summarize the flare-ups data for each of the 60 patients.\n", + "- Return an array with 60 elements, each element being the result of the summary operation for a corresponding patient.\n", + "\n", + "**Expected Output**:\n", + "- The output should be an array with a length of 60, aligning with the number of patients in the study.\n", + "\n", + "**Hints for Implementation**:\n", + "1. **Utilizing NumPy**: For efficient data manipulation and computation, consider using NumPy, as discussed in the `10_numpy` slides.\n", + "2. **Output Shape**: Ensure that the shape of your output data matches the number of patients, which is 60." + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "id": "82-bk4CBB1w4" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "def patient_summary(file_path, operation):\n", + " # load the data from the file\n", + " data = np.loadtxt(fname=file_path, delimiter=',')\n", + " ax = 1 # this specifies that the operation should be done for each row (patient)\n", + "\n", + " # implement the specific operation based on the 'operation' argument\n", + " if operation == 'mean':\n", + " # YOUR CODE HERE: calculate the mean (average) number of flare-ups for each patient\n", + " summary_values = np.mean(data, axis = ax)\n", + "\n", + " elif operation == 'max':\n", + " # YOUR CODE HERE: calculate the maximum number of flare-ups experienced by each patient\n", + " summary_values = np.max(data, axis = ax)\n", + "\n", + " elif operation == 'min':\n", + " # YOUR CODE HERE: calculate the minimum number of flare-ups experienced by each patient\n", + " summary_values = np.min(data, axis = ax)\n", + "\n", + " else:\n", + " # if the operation is not one of the expected values, raise an error\n", + " raise ValueError(\"Invalid operation. Please choose 'mean', 'max', or 'min'.\")\n", + "\n", + " return summary_values" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "id": "3TYo0-1SDLrd" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Test out your code here\n", - "# Your output for the first file should be False\n", - "print(detect_problems(all_paths[0]))" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "60\n" + ] + } + ], + "source": [ + "# test it out on the data file we read in and make sure the size is what we expect i.e., 60\n", + "# Your output for the first file should be 60\n", + "data_min = patient_summary(main_dir + all_paths[0], 'min')\n", + "print(len(data_min))" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "j9SUzhiGuHhS" - }, - "source": [ - "| Criteria | Complete Criteria | Incomplete Criteria |\n", - "|------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------|\n", - "| **General Criteria** | | |\n", - "| Code Execution | All code cells execute without errors. | Any code cell produces an error upon execution. |\n", - "| Code Quality | Code is well-organized, concise, and includes necessary comments for clarity. | Code is unorganized, verbose, or lacks necessary comments. |\n", - "| Data Handling | Data files are correctly handled and processed. | Data files are not handled or processed correctly. |\n", - "| Adherence to Instructions | Follows all instructions and requirements as per the assignment. | Misses or incorrectly implements one or more of the assignment requirements. |\n", - "| **Specific Criteria** | | |\n", - "| 1. Reading in our files | Correctly prints out information from the first file. | Fails to print out information from the first file. |\n", - "| 2. Summarizing our data | Correctly defines `patient_summary()` function. Function processes data as per `operation` and outputs correctly shaped data (60 entries). | Incomplete or incorrect definition of `patient_summary()`. Incorrect implementation of operation or wrong output shape.|\n", - "| 3. Checking for Errors | Correctly defines `detect_problems()` function. Function uses `patient_summary()` and `check_zeros()` to identify mean inflammation of 0 accurately. | Incorrect definition or implementation of `detect_problems()` function. Fails to accurately identify mean inflammation of 0.|\n", - "| **Overall Assessment** | Meets all the general and specific criteria, indicating a strong understanding of the assignment objectives. | Fails to meet one or more of the general or specific criteria, indicating a need for further learning or clarification.|\n" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "the inflammation_ 01 file length is 60\n", + "the inflammation_ 02 file length is 60\n", + "the inflammation_ 03 file length is 60\n", + "the inflammation_ 04 file length is 60\n", + "the inflammation_ 05 file length is 60\n", + "the inflammation_ 06 file length is 60\n", + "the inflammation_ 07 file length is 60\n", + "the inflammation_ 08 file length is 60\n", + "the inflammation_ 09 file length is 60\n", + "the inflammation_ 10 file length is 60\n", + "the inflammation_ 11 file length is 60\n", + "the inflammation_ 12 file length is 60\n" + ] + } + ], + "source": [ + "# Testing out all the datafiles\n", + "for i in range(len(all_paths)):\n", + " data_min = patient_summary(main_dir + all_paths[i], 'min')\n", + " print(f'the inflammation_ {i+1:02} file length is {len(data_min)}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "I-5m0RvxFx4J" + }, + "source": [ + "## 3. Error Detection in Patient Data\n", + "\n", + "Your final task is to develop a function named `detect_problems` that identifies any irregularities in the patient data, specifically focusing on detecting patients with a mean inflammation score of 0.\n", + "\n", + "**Function Specifications**:\n", + "- **Function Name**: `detect_problems`\n", + "- **Parameter**:\n", + " - `file_path`: A string that specifies the path to the CSV file containing patient data.\n", + "\n", + "**Expected Behavior**:\n", + "- The function should read the patient data from the file at `file_path`.\n", + "- Utilize the previously defined `patient_summary()` function to calculate the mean inflammation for each patient.\n", + "- Employ an additional helper function `check_zeros(x)` (provided) to determine if there are any zero values in the array of mean inflammations.\n", + "- The `detect_problems()` function should return `True` if there is at least one patient with a mean inflammation score of 0, and `False` otherwise.\n", + "\n", + "**Hints for Implementation**:\n", + "1. Call `patient_summary(file_path, 'mean')` to get the mean inflammation scores for all patients.\n", + "2. Use `check_zeros()` to evaluate the mean scores. This helper function takes an array as input and returns `True` if it finds zero values in the array.\n", + "3. Based on the output from `check_zeros()`, the `detect_problems()` function should return `True` (indicating an issue) if any mean inflammation scores of 0 are found, or `False` if none are found.\n", + "\n", + "**Note**: This function is crucial for identifying potential data entry errors, such as healthy individuals being mistakenly included in the dataset or other data-related issues." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pb9EugDCJA4c" + }, + "source": [ + "**Understanding the `check_zeros(x)` Helper Function**\n", + "\n", + "The `check_zeros(x)` function is provided as a tool to assist with your data analysis. While you do not need to modify or fully understand the internal workings of this function, it's important to grasp its input, output, and what the output signifies:\n", + "\n", + "1. **Input**:\n", + " - **Parameter `x`**: This function takes an array of numbers as its input. In the context of your assignment, this array will typically represent a set of data points from your patient data, such as mean inflammation scores.\n", + "\n", + "2. **Output**:\n", + " - The function returns a boolean value: either `True` or `False`.\n", + "\n", + "3. **Interpreting the Output**:\n", + " - **Output is `True`**: This indicates that the array `x` contains at least one zero value. In the context of your analysis, this means that at least one patient has a mean inflammation score of 0, signaling a potential issue or anomaly in the data.\n", + " - **Output is `False`**: This signifies that there are no zero values in the array `x`. For your patient data, it means no patient has a mean inflammation score of 0, and thus no apparent anomalies of this type were detected.\n", + "\n", + "**Usage in Your Analysis**:\n", + "When using `check_zeros(x)` in conjunction with your `patient_summary()` function in the `detect_problems()` function, you'll be checking whether any patient in your dataset has an average (mean) inflammation score of 0." + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "id": "_svDiRkdIwiT" + }, + "outputs": [], + "source": [ + "# Run this cell so you can use this helper function\n", + "\n", + "def check_zeros(x):\n", + " '''\n", + " Given an array, x, check whether any values in x equal 0.\n", + " Return True if any values found, else returns False.\n", + " '''\n", + " # np.where() checks every value in x against the condition (x == 0) and returns a tuple of indices where it was True (i.e. x was 0)\n", + " flag = np.where(x == 0)[0]\n", + "\n", + " # Checks if there are any objects in flag (i.e. not empty)\n", + " # If not empty, it found at least one zero so flag is True, and vice-versa.\n", + " return len(flag) > 0" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "id": "LEYPM5v4JT0i" + }, + "outputs": [], + "source": [ + "# Define your function `detect_problems` here\n", + "\n", + "def detect_problems(file_path):\n", + " #YOUR CODE HERE: use patient_summary() to get the means and check_zeros() to check for zeros in the means\n", + " # Calculate the mean inflation values for each row\n", + " avg_values = patient_summary(file_path, 'mean')\n", + " # Check for the zero values\n", + " avg_is_zero = check_zeros(avg_values)\n", + "\n", + " return avg_is_zero" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "TY_ppBzHvdak" - }, - "source": [ - "## References\n", - "\n", - "### Data Sources\n", - "- Software Carpentry. _Python Novice Inflammation Data_. http://swcarpentry.github.io/python-novice-inflammation/data/python-novice-inflammation-data.zip\n" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n" + ] } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.8" + ], + "source": [ + "# Test out your code here\n", + "# Your output for the first file should be False\n", + "print(detect_problems(main_dir + all_paths[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The output for the 1 is False\n", + "The output for the 2 is False\n", + "The output for the 3 is True\n", + "The output for the 4 is False\n", + "The output for the 5 is False\n", + "The output for the 6 is False\n", + "The output for the 7 is False\n", + "The output for the 8 is True\n", + "The output for the 9 is False\n", + "The output for the 10 is False\n", + "The output for the 11 is True\n", + "The output for the 12 is False\n" + ] } + ], + "source": [ + "# To outputs of all the files are\n", + "for i in range(len(all_paths)):\n", + " print(f'The output for the {i+1} is {detect_problems(main_dir + all_paths[i])}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j9SUzhiGuHhS" + }, + "source": [ + "| Criteria | Complete Criteria | Incomplete Criteria |\n", + "|------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------|\n", + "| **General Criteria** | | |\n", + "| Code Execution | All code cells execute without errors. | Any code cell produces an error upon execution. |\n", + "| Code Quality | Code is well-organized, concise, and includes necessary comments for clarity. | Code is unorganized, verbose, or lacks necessary comments. |\n", + "| Data Handling | Data files are correctly handled and processed. | Data files are not handled or processed correctly. |\n", + "| Adherence to Instructions | Follows all instructions and requirements as per the assignment. | Misses or incorrectly implements one or more of the assignment requirements. |\n", + "| **Specific Criteria** | | |\n", + "| 1. Reading in our files | Correctly prints out information from the first file. | Fails to print out information from the first file. |\n", + "| 2. Summarizing our data | Correctly defines `patient_summary()` function. Function processes data as per `operation` and outputs correctly shaped data (60 entries). | Incomplete or incorrect definition of `patient_summary()`. Incorrect implementation of operation or wrong output shape.|\n", + "| 3. Checking for Errors | Correctly defines `detect_problems()` function. Function uses `patient_summary()` and `check_zeros()` to identify mean inflammation of 0 accurately. | Incorrect definition or implementation of `detect_problems()` function. Fails to accurately identify mean inflammation of 0.|\n", + "| **Overall Assessment** | Meets all the general and specific criteria, indicating a strong understanding of the assignment objectives. | Fails to meet one or more of the general or specific criteria, indicating a need for further learning or clarification.|\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TY_ppBzHvdak" + }, + "source": [ + "## References\n", + "\n", + "### Data Sources\n", + "- Software Carpentry. _Python Novice Inflammation Data_. http://swcarpentry.github.io/python-novice-inflammation/data/python-novice-inflammation-data.zip\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "dsi_participant", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/12_03_2024.ipynb b/12_03_2024.ipynb new file mode 100644 index 000000000..26cb10aa2 --- /dev/null +++ b/12_03_2024.ipynb @@ -0,0 +1,334 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# list [] -> mutable and ordered\n", + "# tuples () -> immutable and ordered\n", + "# sets {} -> mutable, unordered and distinct\n", + "\n", + "# dictionaties {} -> mutable, ordered, key:value pairs, evry key is unique, \n", + "# keys can be any immutable data type, values can be any data type\n", + "\n", + "capitals = {'Canada': 'Ottawa',\n", + " 'United States': 'Washington, D.C.',\n", + " 'Mexico': 'Mexico City'}" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Canada': 'Ottawa',\n", + " 'United States': 'Washington, D.C.',\n", + " 'Mexico': 'Mexico City'}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "capitals" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{2020: 'Tokiyo', 2016: 'Reo de Janiero', 2012: 'London'}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "olympic_cities = {2020: \"Tokiyo\",\n", + " 2016: 'Reo de Janiero',\n", + " 2012 : 'London'}\n", + "olympic_cities" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'summer': {2020: 'Tokiyo', 2016: 'Reo de Janiero', 2021: 'London'},\n", + " 'winter': {2022: 'Beijing', 2018: 'Pyneangchanged'}}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_olympic_hosts = {'summer': olympic_cities,\n", + " 'winter': {2022: 'Beijing',\n", + " 2018: 'Pyneangchanged'}}\n", + "all_olympic_hosts" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "empty_dictionary = {} # this is the conventional method" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "still_empty = dict() # this is also works" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Reo de Janiero'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "olympic_cities[2016]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'London'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "olympic_cities[2012]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "2014", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0molympic_cities\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m2014\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m: 2014" + ] + } + ], + "source": [ + "olympic_cities[2014]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{2020: 'Tokiyo', 2016: 'Reo de Janiero', 2021: 'London'}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_olympic_hosts['summer']" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Tokiyo'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_olympic_hosts['summer'][2020]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{2020: 'Tokiyo', 2016: 'Reo de Janiero', 2021: 'London'}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_olympic_hosts.get('summer', 2020)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Athens'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "olympic_cities.get(2004, 'Athens')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "olympic_cities.get(2004)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "2016 in olympic_cities " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "'Reo de Janiero' in olympic_cities" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dsi_participant", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Csestudy/2.png b/Csestudy/2.png new file mode 100644 index 000000000..451671603 Binary files /dev/null and b/Csestudy/2.png differ diff --git a/Csestudy/one.png b/Csestudy/one.png new file mode 100644 index 000000000..4a46b32cf Binary files /dev/null and b/Csestudy/one.png differ