Skip to content

Commit 8fa134e

Browse files
authored
add notebook w/ examples
1 parent 742a1fe commit 8fa134e

1 file changed

Lines changed: 385 additions & 0 deletions

File tree

Lines changed: 385 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,385 @@
1+
{
2+
"nbformat": 4,
3+
"nbformat_minor": 0,
4+
"metadata": {
5+
"colab": {
6+
"provenance": []
7+
},
8+
"kernelspec": {
9+
"name": "python3",
10+
"display_name": "Python 3"
11+
},
12+
"language_info": {
13+
"name": "python"
14+
}
15+
},
16+
"cells": [
17+
{
18+
"cell_type": "code",
19+
"execution_count": 5,
20+
"metadata": {
21+
"colab": {
22+
"base_uri": "https://localhost:8080/"
23+
},
24+
"id": "Nwd80bTrGYRD",
25+
"outputId": "b6dc558c-fe9a-4bf2-c547-05dec0f3987f"
26+
},
27+
"outputs": [
28+
{
29+
"output_type": "stream",
30+
"name": "stdout",
31+
"text": [
32+
"Loop time: 0.0840 seconds\n"
33+
]
34+
}
35+
],
36+
"source": [
37+
"import time\n",
38+
"\n",
39+
"def square_numbers_loop(numbers):\n",
40+
" result = []\n",
41+
" for num in numbers:\n",
42+
" result.append(num ** 2)\n",
43+
" return result\n",
44+
"\n",
45+
"# Let's test this with 100,000 numbers to see the performance\n",
46+
"test_numbers = list(range(1000000))\n",
47+
"\n",
48+
"start_time = time.time()\n",
49+
"squared_loop = square_numbers_loop(test_numbers)\n",
50+
"loop_time = time.time() - start_time\n",
51+
"print(f\"Loop time: {loop_time:.4f} seconds\")\n"
52+
]
53+
},
54+
{
55+
"cell_type": "code",
56+
"source": [
57+
"def square_numbers_comprehension(numbers):\n",
58+
" return [num ** 2 for num in numbers] # Create the entire list in one line\n",
59+
"\n",
60+
"start_time = time.time()\n",
61+
"squared_comprehension = square_numbers_comprehension(test_numbers)\n",
62+
"comprehension_time = time.time() - start_time\n",
63+
"print(f\"Comprehension time: {comprehension_time:.4f} seconds\")\n",
64+
"print(f\"Improvement: {loop_time / comprehension_time:.2f}x faster\")\n"
65+
],
66+
"metadata": {
67+
"colab": {
68+
"base_uri": "https://localhost:8080/"
69+
},
70+
"id": "Gjd_ofARGaSb",
71+
"outputId": "e6459964-6439-46e4-ff11-1ea7c715e75d"
72+
},
73+
"execution_count": 6,
74+
"outputs": [
75+
{
76+
"output_type": "stream",
77+
"name": "stdout",
78+
"text": [
79+
"Comprehension time: 0.0736 seconds\n",
80+
"Improvement: 1.14x faster\n"
81+
]
82+
}
83+
]
84+
},
85+
{
86+
"cell_type": "code",
87+
"source": [
88+
"def find_common_elements_list(list1, list2):\n",
89+
" common = []\n",
90+
" for item in list1: # Go through each item in the first list\n",
91+
" if item in list2: # Check if it exists in the second list\n",
92+
" common.append(item) # If yes, add it to our common list\n",
93+
" return common\n",
94+
"\n",
95+
"# Test with reasonably large lists\n",
96+
"large_list1 = list(range(10000))\n",
97+
"large_list2 = list(range(5000, 15000))\n",
98+
"\n",
99+
"start_time = time.time()\n",
100+
"common_list = find_common_elements_list(large_list1, large_list2)\n",
101+
"list_time = time.time() - start_time\n",
102+
"print(f\"List approach time: {list_time:.4f} seconds\")\n"
103+
],
104+
"metadata": {
105+
"colab": {
106+
"base_uri": "https://localhost:8080/"
107+
},
108+
"id": "1vQ_oPOcGgXn",
109+
"outputId": "9c334eeb-a4b4-4529-f2a8-bc1b7b18d10b"
110+
},
111+
"execution_count": 7,
112+
"outputs": [
113+
{
114+
"output_type": "stream",
115+
"name": "stdout",
116+
"text": [
117+
"List approach time: 0.8478 seconds\n"
118+
]
119+
}
120+
]
121+
},
122+
{
123+
"cell_type": "code",
124+
"source": [
125+
"def find_common_elements_set(list1, list2):\n",
126+
" set2 = set(list2) # Convert list to a set (one-time cost)\n",
127+
" return [item for item in list1 if item in set2] # Check membership in set\n",
128+
"\n",
129+
"start_time = time.time()\n",
130+
"common_set = find_common_elements_set(large_list1, large_list2)\n",
131+
"set_time = time.time() - start_time\n",
132+
"print(f\"Set approach time: {set_time:.4f} seconds\")\n",
133+
"print(f\"Improvement: {list_time / set_time:.2f}x faster\")\n"
134+
],
135+
"metadata": {
136+
"colab": {
137+
"base_uri": "https://localhost:8080/"
138+
},
139+
"id": "lTAADi1gHHOq",
140+
"outputId": "e7f77a7e-8cd6-49b4-cfb7-e41905812be6"
141+
},
142+
"execution_count": 8,
143+
"outputs": [
144+
{
145+
"output_type": "stream",
146+
"name": "stdout",
147+
"text": [
148+
"Set approach time: 0.0010 seconds\n",
149+
"Improvement: 863.53x faster\n"
150+
]
151+
}
152+
]
153+
},
154+
{
155+
"cell_type": "code",
156+
"source": [
157+
"def calculate_sum_manual(numbers):\n",
158+
" total = 0\n",
159+
" for num in numbers:\n",
160+
" total += num\n",
161+
" return total\n",
162+
"\n",
163+
"def find_max_manual(numbers):\n",
164+
" max_val = numbers[0]\n",
165+
" for num in numbers[1:]:\n",
166+
" if num > max_val:\n",
167+
" max_val = num\n",
168+
" return max_val\n",
169+
"\n",
170+
"test_numbers = list(range(1000000))\n",
171+
"\n",
172+
"start_time = time.time()\n",
173+
"manual_sum = calculate_sum_manual(test_numbers)\n",
174+
"manual_max = find_max_manual(test_numbers)\n",
175+
"manual_time = time.time() - start_time\n",
176+
"print(f\"Manual approach time: {manual_time:.4f} seconds\")\n",
177+
"\n"
178+
],
179+
"metadata": {
180+
"colab": {
181+
"base_uri": "https://localhost:8080/"
182+
},
183+
"id": "G3vjwOPVHL2p",
184+
"outputId": "d995cc1e-f1f7-422e-f6fe-8372879d2fe7"
185+
},
186+
"execution_count": 9,
187+
"outputs": [
188+
{
189+
"output_type": "stream",
190+
"name": "stdout",
191+
"text": [
192+
"Manual approach time: 0.0805 seconds\n"
193+
]
194+
}
195+
]
196+
},
197+
{
198+
"cell_type": "code",
199+
"source": [
200+
"start_time = time.time()\n",
201+
"builtin_sum = sum(test_numbers)\n",
202+
"builtin_max = max(test_numbers)\n",
203+
"builtin_time = time.time() - start_time\n",
204+
"print(f\"Built-in approach time: {builtin_time:.4f} seconds\")\n",
205+
"print(f\"Improvement: {manual_time / builtin_time:.2f}x faster\")\n"
206+
],
207+
"metadata": {
208+
"colab": {
209+
"base_uri": "https://localhost:8080/"
210+
},
211+
"id": "nokYMyTZHUPX",
212+
"outputId": "e6b20016-1531-46ad-91e0-723e0401cfa1"
213+
},
214+
"execution_count": 10,
215+
"outputs": [
216+
{
217+
"output_type": "stream",
218+
"name": "stdout",
219+
"text": [
220+
"Built-in approach time: 0.0413 seconds\n",
221+
"Improvement: 1.95x faster\n"
222+
]
223+
}
224+
]
225+
},
226+
{
227+
"cell_type": "code",
228+
"source": [
229+
"def create_csv_plus(data):\n",
230+
" result = \"\" # Start with an empty string\n",
231+
" for row in data: # Go through each row of data\n",
232+
" for i, item in enumerate(row): # Go through each item in the row\n",
233+
" result += str(item) # Add the item to our result string\n",
234+
" if i < len(row) - 1: # If it's not the last item\n",
235+
" result += \",\" # Add a comma\n",
236+
" result += \"\\n\" # Add a newline after each row\n",
237+
" return result\n",
238+
"\n",
239+
"# Test data: 1000 rows with 10 columns each\n",
240+
"test_data = [[f\"item_{i}_{j}\" for j in range(10)] for i in range(1000)]\n",
241+
"\n",
242+
"start_time = time.time()\n",
243+
"csv_plus = create_csv_plus(test_data)\n",
244+
"plus_time = time.time() - start_time\n",
245+
"print(f\"String concatenation time: {plus_time:.4f} seconds\")\n"
246+
],
247+
"metadata": {
248+
"colab": {
249+
"base_uri": "https://localhost:8080/"
250+
},
251+
"id": "YlSwN9aKHXn9",
252+
"outputId": "5d4199cf-e185-49ee-dc87-bc610e084e81"
253+
},
254+
"execution_count": 13,
255+
"outputs": [
256+
{
257+
"output_type": "stream",
258+
"name": "stdout",
259+
"text": [
260+
"String concatenation time: 0.0043 seconds\n"
261+
]
262+
}
263+
]
264+
},
265+
{
266+
"cell_type": "code",
267+
"source": [
268+
"def create_csv_join(data):\n",
269+
" # For each row, join the items with commas, then join all rows with newlines\n",
270+
" return \"\\n\".join(\",\".join(str(item) for item in row) for row in data)\n",
271+
"\n",
272+
"start_time = time.time()\n",
273+
"csv_join = create_csv_join(test_data)\n",
274+
"join_time = time.time() - start_time\n",
275+
"print(f\"Join method time: {join_time:.4f} seconds\")\n",
276+
"print(f\"Improvement: {plus_time / join_time:.2f}x faster\")\n"
277+
],
278+
"metadata": {
279+
"colab": {
280+
"base_uri": "https://localhost:8080/"
281+
},
282+
"id": "RZo5MaMmHcgg",
283+
"outputId": "5e6012df-5365-4c54-8439-a179f91cf9d1"
284+
},
285+
"execution_count": 14,
286+
"outputs": [
287+
{
288+
"output_type": "stream",
289+
"name": "stdout",
290+
"text": [
291+
"Join method time: 0.0022 seconds\n",
292+
"Improvement: 1.94x faster\n"
293+
]
294+
}
295+
]
296+
},
297+
{
298+
"cell_type": "code",
299+
"source": [
300+
"import sys\n",
301+
"\n",
302+
"def process_large_dataset_list(n):\n",
303+
" processed_data = []\n",
304+
" for i in range(n):\n",
305+
" # Simulate some data processing\n",
306+
" processed_value = i ** 2 + i * 3 + 42\n",
307+
" processed_data.append(processed_value) # Store each processed value\n",
308+
" return processed_data\n",
309+
"\n",
310+
"# Test with 100,000 items\n",
311+
"n = 100000\n",
312+
"list_result = process_large_dataset_list(n)\n",
313+
"list_memory = sys.getsizeof(list_result)\n",
314+
"print(f\"List memory usage: {list_memory:,} bytes\")"
315+
],
316+
"metadata": {
317+
"colab": {
318+
"base_uri": "https://localhost:8080/"
319+
},
320+
"id": "AgYsuPQlHgYP",
321+
"outputId": "4c832fb5-4a62-4bbd-c5d4-d0999d1948aa"
322+
},
323+
"execution_count": 16,
324+
"outputs": [
325+
{
326+
"output_type": "stream",
327+
"name": "stdout",
328+
"text": [
329+
"List memory usage: 800,984 bytes\n"
330+
]
331+
}
332+
]
333+
},
334+
{
335+
"cell_type": "code",
336+
"source": [
337+
"def process_large_dataset_generator(n):\n",
338+
" for i in range(n):\n",
339+
" # Simulate some data processing\n",
340+
" processed_value = i ** 2 + i * 3 + 42\n",
341+
" yield processed_value # Yield each value instead of storing it\n",
342+
"\n",
343+
"# Create the generator (this doesn't process anything yet!)\n",
344+
"gen_result = process_large_dataset_generator(n)\n",
345+
"gen_memory = sys.getsizeof(gen_result)\n",
346+
"print(f\"Generator memory usage: {gen_memory:,} bytes\")\n",
347+
"print(f\"Memory improvement: {list_memory / gen_memory:.0f}x less memory\")\n",
348+
"\n",
349+
"# Now we can process items one at a time\n",
350+
"total = 0\n",
351+
"for value in process_large_dataset_generator(n):\n",
352+
" total += value\n",
353+
" # Each value is processed on-demand and can be garbage collected\n",
354+
"\n"
355+
],
356+
"metadata": {
357+
"colab": {
358+
"base_uri": "https://localhost:8080/"
359+
},
360+
"id": "Q6WaCaRGHo2D",
361+
"outputId": "708cc09f-176c-4aa0-c9d9-3eb019957ef8"
362+
},
363+
"execution_count": 17,
364+
"outputs": [
365+
{
366+
"output_type": "stream",
367+
"name": "stdout",
368+
"text": [
369+
"Generator memory usage: 224 bytes\n",
370+
"Memory improvement: 3576x less memory\n"
371+
]
372+
}
373+
]
374+
},
375+
{
376+
"cell_type": "code",
377+
"source": [],
378+
"metadata": {
379+
"id": "cNe54bSyHwwW"
380+
},
381+
"execution_count": null,
382+
"outputs": []
383+
}
384+
]
385+
}

0 commit comments

Comments
 (0)