1+ {
2+ "nbformat" : 4 ,
3+ "nbformat_minor" : 0 ,
4+ "metadata" : {
5+ "colab" : {
6+ "provenance" : []
7+ },
8+ "kernelspec" : {
9+ "name" : " python3" ,
10+ "display_name" : " Python 3"
11+ },
12+ "language_info" : {
13+ "name" : " python"
14+ }
15+ },
16+ "cells" : [
17+ {
18+ "cell_type" : " code" ,
19+ "execution_count" : 5 ,
20+ "metadata" : {
21+ "colab" : {
22+ "base_uri" : " https://localhost:8080/"
23+ },
24+ "id" : " Nwd80bTrGYRD" ,
25+ "outputId" : " b6dc558c-fe9a-4bf2-c547-05dec0f3987f"
26+ },
27+ "outputs" : [
28+ {
29+ "output_type" : " stream" ,
30+ "name" : " stdout" ,
31+ "text" : [
32+ " Loop time: 0.0840 seconds\n "
33+ ]
34+ }
35+ ],
36+ "source" : [
37+ " import time\n " ,
38+ " \n " ,
39+ " def square_numbers_loop(numbers):\n " ,
40+ " result = []\n " ,
41+ " for num in numbers:\n " ,
42+ " result.append(num ** 2)\n " ,
43+ " return result\n " ,
44+ " \n " ,
45+ " # Let's test this with 100,000 numbers to see the performance\n " ,
46+ " test_numbers = list(range(1000000))\n " ,
47+ " \n " ,
48+ " start_time = time.time()\n " ,
49+ " squared_loop = square_numbers_loop(test_numbers)\n " ,
50+ " loop_time = time.time() - start_time\n " ,
51+ " print(f\" Loop time: {loop_time:.4f} seconds\" )\n "
52+ ]
53+ },
54+ {
55+ "cell_type" : " code" ,
56+ "source" : [
57+ " def square_numbers_comprehension(numbers):\n " ,
58+ " return [num ** 2 for num in numbers] # Create the entire list in one line\n " ,
59+ " \n " ,
60+ " start_time = time.time()\n " ,
61+ " squared_comprehension = square_numbers_comprehension(test_numbers)\n " ,
62+ " comprehension_time = time.time() - start_time\n " ,
63+ " print(f\" Comprehension time: {comprehension_time:.4f} seconds\" )\n " ,
64+ " print(f\" Improvement: {loop_time / comprehension_time:.2f}x faster\" )\n "
65+ ],
66+ "metadata" : {
67+ "colab" : {
68+ "base_uri" : " https://localhost:8080/"
69+ },
70+ "id" : " Gjd_ofARGaSb" ,
71+ "outputId" : " e6459964-6439-46e4-ff11-1ea7c715e75d"
72+ },
73+ "execution_count" : 6 ,
74+ "outputs" : [
75+ {
76+ "output_type" : " stream" ,
77+ "name" : " stdout" ,
78+ "text" : [
79+ " Comprehension time: 0.0736 seconds\n " ,
80+ " Improvement: 1.14x faster\n "
81+ ]
82+ }
83+ ]
84+ },
85+ {
86+ "cell_type" : " code" ,
87+ "source" : [
88+ " def find_common_elements_list(list1, list2):\n " ,
89+ " common = []\n " ,
90+ " for item in list1: # Go through each item in the first list\n " ,
91+ " if item in list2: # Check if it exists in the second list\n " ,
92+ " common.append(item) # If yes, add it to our common list\n " ,
93+ " return common\n " ,
94+ " \n " ,
95+ " # Test with reasonably large lists\n " ,
96+ " large_list1 = list(range(10000))\n " ,
97+ " large_list2 = list(range(5000, 15000))\n " ,
98+ " \n " ,
99+ " start_time = time.time()\n " ,
100+ " common_list = find_common_elements_list(large_list1, large_list2)\n " ,
101+ " list_time = time.time() - start_time\n " ,
102+ " print(f\" List approach time: {list_time:.4f} seconds\" )\n "
103+ ],
104+ "metadata" : {
105+ "colab" : {
106+ "base_uri" : " https://localhost:8080/"
107+ },
108+ "id" : " 1vQ_oPOcGgXn" ,
109+ "outputId" : " 9c334eeb-a4b4-4529-f2a8-bc1b7b18d10b"
110+ },
111+ "execution_count" : 7 ,
112+ "outputs" : [
113+ {
114+ "output_type" : " stream" ,
115+ "name" : " stdout" ,
116+ "text" : [
117+ " List approach time: 0.8478 seconds\n "
118+ ]
119+ }
120+ ]
121+ },
122+ {
123+ "cell_type" : " code" ,
124+ "source" : [
125+ " def find_common_elements_set(list1, list2):\n " ,
126+ " set2 = set(list2) # Convert list to a set (one-time cost)\n " ,
127+ " return [item for item in list1 if item in set2] # Check membership in set\n " ,
128+ " \n " ,
129+ " start_time = time.time()\n " ,
130+ " common_set = find_common_elements_set(large_list1, large_list2)\n " ,
131+ " set_time = time.time() - start_time\n " ,
132+ " print(f\" Set approach time: {set_time:.4f} seconds\" )\n " ,
133+ " print(f\" Improvement: {list_time / set_time:.2f}x faster\" )\n "
134+ ],
135+ "metadata" : {
136+ "colab" : {
137+ "base_uri" : " https://localhost:8080/"
138+ },
139+ "id" : " lTAADi1gHHOq" ,
140+ "outputId" : " e7f77a7e-8cd6-49b4-cfb7-e41905812be6"
141+ },
142+ "execution_count" : 8 ,
143+ "outputs" : [
144+ {
145+ "output_type" : " stream" ,
146+ "name" : " stdout" ,
147+ "text" : [
148+ " Set approach time: 0.0010 seconds\n " ,
149+ " Improvement: 863.53x faster\n "
150+ ]
151+ }
152+ ]
153+ },
154+ {
155+ "cell_type" : " code" ,
156+ "source" : [
157+ " def calculate_sum_manual(numbers):\n " ,
158+ " total = 0\n " ,
159+ " for num in numbers:\n " ,
160+ " total += num\n " ,
161+ " return total\n " ,
162+ " \n " ,
163+ " def find_max_manual(numbers):\n " ,
164+ " max_val = numbers[0]\n " ,
165+ " for num in numbers[1:]:\n " ,
166+ " if num > max_val:\n " ,
167+ " max_val = num\n " ,
168+ " return max_val\n " ,
169+ " \n " ,
170+ " test_numbers = list(range(1000000))\n " ,
171+ " \n " ,
172+ " start_time = time.time()\n " ,
173+ " manual_sum = calculate_sum_manual(test_numbers)\n " ,
174+ " manual_max = find_max_manual(test_numbers)\n " ,
175+ " manual_time = time.time() - start_time\n " ,
176+ " print(f\" Manual approach time: {manual_time:.4f} seconds\" )\n " ,
177+ " \n "
178+ ],
179+ "metadata" : {
180+ "colab" : {
181+ "base_uri" : " https://localhost:8080/"
182+ },
183+ "id" : " G3vjwOPVHL2p" ,
184+ "outputId" : " d995cc1e-f1f7-422e-f6fe-8372879d2fe7"
185+ },
186+ "execution_count" : 9 ,
187+ "outputs" : [
188+ {
189+ "output_type" : " stream" ,
190+ "name" : " stdout" ,
191+ "text" : [
192+ " Manual approach time: 0.0805 seconds\n "
193+ ]
194+ }
195+ ]
196+ },
197+ {
198+ "cell_type" : " code" ,
199+ "source" : [
200+ " start_time = time.time()\n " ,
201+ " builtin_sum = sum(test_numbers)\n " ,
202+ " builtin_max = max(test_numbers)\n " ,
203+ " builtin_time = time.time() - start_time\n " ,
204+ " print(f\" Built-in approach time: {builtin_time:.4f} seconds\" )\n " ,
205+ " print(f\" Improvement: {manual_time / builtin_time:.2f}x faster\" )\n "
206+ ],
207+ "metadata" : {
208+ "colab" : {
209+ "base_uri" : " https://localhost:8080/"
210+ },
211+ "id" : " nokYMyTZHUPX" ,
212+ "outputId" : " e6b20016-1531-46ad-91e0-723e0401cfa1"
213+ },
214+ "execution_count" : 10 ,
215+ "outputs" : [
216+ {
217+ "output_type" : " stream" ,
218+ "name" : " stdout" ,
219+ "text" : [
220+ " Built-in approach time: 0.0413 seconds\n " ,
221+ " Improvement: 1.95x faster\n "
222+ ]
223+ }
224+ ]
225+ },
226+ {
227+ "cell_type" : " code" ,
228+ "source" : [
229+ " def create_csv_plus(data):\n " ,
230+ " result = \"\" # Start with an empty string\n " ,
231+ " for row in data: # Go through each row of data\n " ,
232+ " for i, item in enumerate(row): # Go through each item in the row\n " ,
233+ " result += str(item) # Add the item to our result string\n " ,
234+ " if i < len(row) - 1: # If it's not the last item\n " ,
235+ " result += \" ,\" # Add a comma\n " ,
236+ " result += \"\\ n\" # Add a newline after each row\n " ,
237+ " return result\n " ,
238+ " \n " ,
239+ " # Test data: 1000 rows with 10 columns each\n " ,
240+ " test_data = [[f\" item_{i}_{j}\" for j in range(10)] for i in range(1000)]\n " ,
241+ " \n " ,
242+ " start_time = time.time()\n " ,
243+ " csv_plus = create_csv_plus(test_data)\n " ,
244+ " plus_time = time.time() - start_time\n " ,
245+ " print(f\" String concatenation time: {plus_time:.4f} seconds\" )\n "
246+ ],
247+ "metadata" : {
248+ "colab" : {
249+ "base_uri" : " https://localhost:8080/"
250+ },
251+ "id" : " YlSwN9aKHXn9" ,
252+ "outputId" : " 5d4199cf-e185-49ee-dc87-bc610e084e81"
253+ },
254+ "execution_count" : 13 ,
255+ "outputs" : [
256+ {
257+ "output_type" : " stream" ,
258+ "name" : " stdout" ,
259+ "text" : [
260+ " String concatenation time: 0.0043 seconds\n "
261+ ]
262+ }
263+ ]
264+ },
265+ {
266+ "cell_type" : " code" ,
267+ "source" : [
268+ " def create_csv_join(data):\n " ,
269+ " # For each row, join the items with commas, then join all rows with newlines\n " ,
270+ " return \"\\ n\" .join(\" ,\" .join(str(item) for item in row) for row in data)\n " ,
271+ " \n " ,
272+ " start_time = time.time()\n " ,
273+ " csv_join = create_csv_join(test_data)\n " ,
274+ " join_time = time.time() - start_time\n " ,
275+ " print(f\" Join method time: {join_time:.4f} seconds\" )\n " ,
276+ " print(f\" Improvement: {plus_time / join_time:.2f}x faster\" )\n "
277+ ],
278+ "metadata" : {
279+ "colab" : {
280+ "base_uri" : " https://localhost:8080/"
281+ },
282+ "id" : " RZo5MaMmHcgg" ,
283+ "outputId" : " 5e6012df-5365-4c54-8439-a179f91cf9d1"
284+ },
285+ "execution_count" : 14 ,
286+ "outputs" : [
287+ {
288+ "output_type" : " stream" ,
289+ "name" : " stdout" ,
290+ "text" : [
291+ " Join method time: 0.0022 seconds\n " ,
292+ " Improvement: 1.94x faster\n "
293+ ]
294+ }
295+ ]
296+ },
297+ {
298+ "cell_type" : " code" ,
299+ "source" : [
300+ " import sys\n " ,
301+ " \n " ,
302+ " def process_large_dataset_list(n):\n " ,
303+ " processed_data = []\n " ,
304+ " for i in range(n):\n " ,
305+ " # Simulate some data processing\n " ,
306+ " processed_value = i ** 2 + i * 3 + 42\n " ,
307+ " processed_data.append(processed_value) # Store each processed value\n " ,
308+ " return processed_data\n " ,
309+ " \n " ,
310+ " # Test with 100,000 items\n " ,
311+ " n = 100000\n " ,
312+ " list_result = process_large_dataset_list(n)\n " ,
313+ " list_memory = sys.getsizeof(list_result)\n " ,
314+ " print(f\" List memory usage: {list_memory:,} bytes\" )"
315+ ],
316+ "metadata" : {
317+ "colab" : {
318+ "base_uri" : " https://localhost:8080/"
319+ },
320+ "id" : " AgYsuPQlHgYP" ,
321+ "outputId" : " 4c832fb5-4a62-4bbd-c5d4-d0999d1948aa"
322+ },
323+ "execution_count" : 16 ,
324+ "outputs" : [
325+ {
326+ "output_type" : " stream" ,
327+ "name" : " stdout" ,
328+ "text" : [
329+ " List memory usage: 800,984 bytes\n "
330+ ]
331+ }
332+ ]
333+ },
334+ {
335+ "cell_type" : " code" ,
336+ "source" : [
337+ " def process_large_dataset_generator(n):\n " ,
338+ " for i in range(n):\n " ,
339+ " # Simulate some data processing\n " ,
340+ " processed_value = i ** 2 + i * 3 + 42\n " ,
341+ " yield processed_value # Yield each value instead of storing it\n " ,
342+ " \n " ,
343+ " # Create the generator (this doesn't process anything yet!)\n " ,
344+ " gen_result = process_large_dataset_generator(n)\n " ,
345+ " gen_memory = sys.getsizeof(gen_result)\n " ,
346+ " print(f\" Generator memory usage: {gen_memory:,} bytes\" )\n " ,
347+ " print(f\" Memory improvement: {list_memory / gen_memory:.0f}x less memory\" )\n " ,
348+ " \n " ,
349+ " # Now we can process items one at a time\n " ,
350+ " total = 0\n " ,
351+ " for value in process_large_dataset_generator(n):\n " ,
352+ " total += value\n " ,
353+ " # Each value is processed on-demand and can be garbage collected\n " ,
354+ " \n "
355+ ],
356+ "metadata" : {
357+ "colab" : {
358+ "base_uri" : " https://localhost:8080/"
359+ },
360+ "id" : " Q6WaCaRGHo2D" ,
361+ "outputId" : " 708cc09f-176c-4aa0-c9d9-3eb019957ef8"
362+ },
363+ "execution_count" : 17 ,
364+ "outputs" : [
365+ {
366+ "output_type" : " stream" ,
367+ "name" : " stdout" ,
368+ "text" : [
369+ " Generator memory usage: 224 bytes\n " ,
370+ " Memory improvement: 3576x less memory\n "
371+ ]
372+ }
373+ ]
374+ },
375+ {
376+ "cell_type" : " code" ,
377+ "source" : [],
378+ "metadata" : {
379+ "id" : " cNe54bSyHwwW"
380+ },
381+ "execution_count" : null ,
382+ "outputs" : []
383+ }
384+ ]
385+ }
0 commit comments