-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path02_counting_and_grouping.py
More file actions
103 lines (84 loc) · 3.07 KB
/
02_counting_and_grouping.py
File metadata and controls
103 lines (84 loc) · 3.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""
Example 2: Counting and Grouping Optimization
Shows efficient patterns for counting occurrences and grouping data.
"""
from collections import Counter, defaultdict
# ❌ BEFORE: Inefficient counting with nested loops
def count_words_slow(words):
"""Count word occurrences using list - O(n²) complexity"""
counts = []
for word in words:
found = False
for count_item in counts:
if count_item[0] == word:
count_item[1] += 1
found = True
break
if not found:
counts.append([word, 1])
return counts
# ✅ AFTER: Efficient counting with dict - O(n) complexity
def count_words_fast(words):
"""Count word occurrences using dict - O(n) complexity"""
counts = {}
for word in words:
counts[word] = counts.get(word, 0) + 1
return counts
# ✅ BEST: Using Counter from collections
def count_words_best(words):
"""Count word occurrences using Counter - O(n) and most readable"""
return Counter(words)
# Real-world example: Log analysis
# ❌ BEFORE: Inefficient grouping
def analyze_logs_slow(log_entries):
"""Group log entries by severity level - inefficient"""
grouped = []
for entry in log_entries:
level = entry['level']
found = False
for group in grouped:
if group['level'] == level:
group['entries'].append(entry)
found = True
break
if not found:
grouped.append({'level': level, 'entries': [entry]})
return grouped
# ✅ AFTER: Efficient grouping with defaultdict
def analyze_logs_fast(log_entries):
"""Group log entries by severity level - efficient O(n)"""
grouped = defaultdict(list)
for entry in log_entries:
grouped[entry['level']].append(entry)
return dict(grouped) # Convert to regular dict if needed
# ✅ ALTERNATIVE: Using dict.setdefault
def analyze_logs_alternative(log_entries):
"""Group log entries using setdefault - also efficient"""
grouped = {}
for entry in log_entries:
grouped.setdefault(entry['level'], []).append(entry)
return grouped
# Performance comparison
if __name__ == "__main__":
import time
import random
# Generate test data
words = [random.choice(['apple', 'banana', 'cherry', 'date', 'elderberry'])
for _ in range(10000)]
# Test slow version
start = time.perf_counter()
result_slow = count_words_slow(words)
slow_time = time.perf_counter() - start
# Test fast version
start = time.perf_counter()
result_fast = count_words_fast(words)
fast_time = time.perf_counter() - start
# Test Counter version
start = time.perf_counter()
result_best = count_words_best(words)
best_time = time.perf_counter() - start
print(f"Slow (list) approach: {slow_time:.6f}s")
print(f"Fast (dict) approach: {fast_time:.6f}s")
print(f"Best (Counter) approach: {best_time:.6f}s")
print(f"\nDict is {slow_time/fast_time:.1f}x faster than list")
print(f"Counter is {slow_time/best_time:.1f}x faster than list")