add notebook w/ examples

balapriyac · web-flow · commit 8fa134e23852 · 2025-07-06T21:19:12.000+05:30
diff --git a/optimize-python-code/optimize_python_code.ipynb b/optimize-python-code/optimize_python_code.ipynb
@@ -0,0 +1,385 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Nwd80bTrGYRD",
+        "outputId": "b6dc558c-fe9a-4bf2-c547-05dec0f3987f"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Loop time: 0.0840 seconds\n"
+          ]
+        }
+      ],
+      "source": [
+        "import time\n",
+        "\n",
+        "def square_numbers_loop(numbers):\n",
+        "    result = []\n",
+        "    for num in numbers:\n",
+        "        result.append(num ** 2)\n",
+        "    return result\n",
+        "\n",
+        "# Let's test this with 100,000 numbers to see the performance\n",
+        "test_numbers = list(range(1000000))\n",
+        "\n",
+        "start_time = time.time()\n",
+        "squared_loop = square_numbers_loop(test_numbers)\n",
+        "loop_time = time.time() - start_time\n",
+        "print(f\"Loop time: {loop_time:.4f} seconds\")\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def square_numbers_comprehension(numbers):\n",
+        "    return [num ** 2 for num in numbers]  # Create the entire list in one line\n",
+        "\n",
+        "start_time = time.time()\n",
+        "squared_comprehension = square_numbers_comprehension(test_numbers)\n",
+        "comprehension_time = time.time() - start_time\n",
+        "print(f\"Comprehension time: {comprehension_time:.4f} seconds\")\n",
+        "print(f\"Improvement: {loop_time / comprehension_time:.2f}x faster\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Gjd_ofARGaSb",
+        "outputId": "e6459964-6439-46e4-ff11-1ea7c715e75d"
+      },
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Comprehension time: 0.0736 seconds\n",
+            "Improvement: 1.14x faster\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def find_common_elements_list(list1, list2):\n",
+        "    common = []\n",
+        "    for item in list1:  # Go through each item in the first list\n",
+        "        if item in list2:  # Check if it exists in the second list\n",
+        "            common.append(item)  # If yes, add it to our common list\n",
+        "    return common\n",
+        "\n",
+        "# Test with reasonably large lists\n",
+        "large_list1 = list(range(10000))\n",
+        "large_list2 = list(range(5000, 15000))\n",
+        "\n",
+        "start_time = time.time()\n",
+        "common_list = find_common_elements_list(large_list1, large_list2)\n",
+        "list_time = time.time() - start_time\n",
+        "print(f\"List approach time: {list_time:.4f} seconds\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1vQ_oPOcGgXn",
+        "outputId": "9c334eeb-a4b4-4529-f2a8-bc1b7b18d10b"
+      },
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "List approach time: 0.8478 seconds\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def find_common_elements_set(list1, list2):\n",
+        "    set2 = set(list2)  # Convert list to a set (one-time cost)\n",
+        "    return [item for item in list1 if item in set2]  # Check membership in set\n",
+        "\n",
+        "start_time = time.time()\n",
+        "common_set = find_common_elements_set(large_list1, large_list2)\n",
+        "set_time = time.time() - start_time\n",
+        "print(f\"Set approach time: {set_time:.4f} seconds\")\n",
+        "print(f\"Improvement: {list_time / set_time:.2f}x faster\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "lTAADi1gHHOq",
+        "outputId": "e7f77a7e-8cd6-49b4-cfb7-e41905812be6"
+      },
+      "execution_count": 8,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Set approach time: 0.0010 seconds\n",
+            "Improvement: 863.53x faster\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def calculate_sum_manual(numbers):\n",
+        "    total = 0\n",
+        "    for num in numbers:\n",
+        "        total += num\n",
+        "    return total\n",
+        "\n",
+        "def find_max_manual(numbers):\n",
+        "    max_val = numbers[0]\n",
+        "    for num in numbers[1:]:\n",
+        "        if num > max_val:\n",
+        "            max_val = num\n",
+        "    return max_val\n",
+        "\n",
+        "test_numbers = list(range(1000000))\n",
+        "\n",
+        "start_time = time.time()\n",
+        "manual_sum = calculate_sum_manual(test_numbers)\n",
+        "manual_max = find_max_manual(test_numbers)\n",
+        "manual_time = time.time() - start_time\n",
+        "print(f\"Manual approach time: {manual_time:.4f} seconds\")\n",
+        "\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "G3vjwOPVHL2p",
+        "outputId": "d995cc1e-f1f7-422e-f6fe-8372879d2fe7"
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Manual approach time: 0.0805 seconds\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "start_time = time.time()\n",
+        "builtin_sum = sum(test_numbers)\n",
+        "builtin_max = max(test_numbers)\n",
+        "builtin_time = time.time() - start_time\n",
+        "print(f\"Built-in approach time: {builtin_time:.4f} seconds\")\n",
+        "print(f\"Improvement: {manual_time / builtin_time:.2f}x faster\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "nokYMyTZHUPX",
+        "outputId": "e6b20016-1531-46ad-91e0-723e0401cfa1"
+      },
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Built-in approach time: 0.0413 seconds\n",
+            "Improvement: 1.95x faster\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def create_csv_plus(data):\n",
+        "    result = \"\"  # Start with an empty string\n",
+        "    for row in data:  # Go through each row of data\n",
+        "        for i, item in enumerate(row):  # Go through each item in the row\n",
+        "            result += str(item)  # Add the item to our result string\n",
+        "            if i < len(row) - 1:  # If it's not the last item\n",
+        "                result += \",\"     # Add a comma\n",
+        "        result += \"\\n\"  # Add a newline after each row\n",
+        "    return result\n",
+        "\n",
+        "# Test data: 1000 rows with 10 columns each\n",
+        "test_data = [[f\"item_{i}_{j}\" for j in range(10)] for i in range(1000)]\n",
+        "\n",
+        "start_time = time.time()\n",
+        "csv_plus = create_csv_plus(test_data)\n",
+        "plus_time = time.time() - start_time\n",
+        "print(f\"String concatenation time: {plus_time:.4f} seconds\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "YlSwN9aKHXn9",
+        "outputId": "5d4199cf-e185-49ee-dc87-bc610e084e81"
+      },
+      "execution_count": 13,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "String concatenation time: 0.0043 seconds\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def create_csv_join(data):\n",
+        "    # For each row, join the items with commas, then join all rows with newlines\n",
+        "    return \"\\n\".join(\",\".join(str(item) for item in row) for row in data)\n",
+        "\n",
+        "start_time = time.time()\n",
+        "csv_join = create_csv_join(test_data)\n",
+        "join_time = time.time() - start_time\n",
+        "print(f\"Join method time: {join_time:.4f} seconds\")\n",
+        "print(f\"Improvement: {plus_time / join_time:.2f}x faster\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "RZo5MaMmHcgg",
+        "outputId": "5e6012df-5365-4c54-8439-a179f91cf9d1"
+      },
+      "execution_count": 14,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Join method time: 0.0022 seconds\n",
+            "Improvement: 1.94x faster\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import sys\n",
+        "\n",
+        "def process_large_dataset_list(n):\n",
+        "    processed_data = []\n",
+        "    for i in range(n):\n",
+        "        # Simulate some data processing\n",
+        "        processed_value = i ** 2 + i * 3 + 42\n",
+        "        processed_data.append(processed_value)  # Store each processed value\n",
+        "    return processed_data\n",
+        "\n",
+        "# Test with 100,000 items\n",
+        "n = 100000\n",
+        "list_result = process_large_dataset_list(n)\n",
+        "list_memory = sys.getsizeof(list_result)\n",
+        "print(f\"List memory usage: {list_memory:,} bytes\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "AgYsuPQlHgYP",
+        "outputId": "4c832fb5-4a62-4bbd-c5d4-d0999d1948aa"
+      },
+      "execution_count": 16,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "List memory usage: 800,984 bytes\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def process_large_dataset_generator(n):\n",
+        "    for i in range(n):\n",
+        "        # Simulate some data processing\n",
+        "        processed_value = i ** 2 + i * 3 + 42\n",
+        "        yield processed_value  # Yield each value instead of storing it\n",
+        "\n",
+        "# Create the generator (this doesn't process anything yet!)\n",
+        "gen_result = process_large_dataset_generator(n)\n",
+        "gen_memory = sys.getsizeof(gen_result)\n",
+        "print(f\"Generator memory usage: {gen_memory:,} bytes\")\n",
+        "print(f\"Memory improvement: {list_memory / gen_memory:.0f}x less memory\")\n",
+        "\n",
+        "# Now we can process items one at a time\n",
+        "total = 0\n",
+        "for value in process_large_dataset_generator(n):\n",
+        "    total += value\n",
+        "    # Each value is processed on-demand and can be garbage collected\n",
+        "\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Q6WaCaRGHo2D",
+        "outputId": "708cc09f-176c-4aa0-c9d9-3eb019957ef8"
+      },
+      "execution_count": 17,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Generator memory usage: 224 bytes\n",
+            "Memory improvement: 3576x less memory\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "cNe54bSyHwwW"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}