|
| 1 | +#!/usr/bin/env python |
| 2 | +# encoding: utf-8 |
| 3 | + |
| 4 | +""" |
| 5 | +@author: zhanghe |
| 6 | +@software: PyCharm |
| 7 | +@file: merge_sort.py |
| 8 | +@time: 2017/3/27 下午11:40 |
| 9 | +""" |
| 10 | + |
| 11 | +from collections import deque |
| 12 | +from copy import copy |
| 13 | + |
| 14 | + |
| 15 | +def merge_sort(lst): |
| 16 | + if len(lst) <= 1: |
| 17 | + return lst |
| 18 | + |
| 19 | + def merge(left, right): |
| 20 | + merged, left, right = deque(), deque(left), deque(right) |
| 21 | + while left and right: |
| 22 | + merged.append(left.popleft() if left[0] <= right[0] else right.popleft()) # deque popleft is also O(1) |
| 23 | + merged.extend(right if right else left) |
| 24 | + return list(merged) |
| 25 | + |
| 26 | + middle = int(len(lst) // 2) |
| 27 | + left_lst = merge_sort(lst[:middle]) |
| 28 | + right_lst = merge_sort(lst[middle:]) |
| 29 | + return merge(left_lst, right_lst) |
| 30 | + |
| 31 | + |
| 32 | +def test_merge_simple(): |
| 33 | + """ |
| 34 | + 测试简单归并 |
| 35 | + :return: |
| 36 | + """ |
| 37 | + test_list_a = [1, 3, 4, 5, 9] |
| 38 | + test_list_b = [1, 2, 3, 6, 8, 10, 11] |
| 39 | + test_list = copy(test_list_a) |
| 40 | + test_list.extend(test_list_b) |
| 41 | + |
| 42 | + print test_list_a |
| 43 | + print test_list_b |
| 44 | + print test_list |
| 45 | + print merge_sort(test_list) |
| 46 | + |
| 47 | + |
| 48 | +def g_next(g_l): |
| 49 | + """ |
| 50 | + 获取生成器元素 |
| 51 | + :param g_l: |
| 52 | + :return: |
| 53 | + """ |
| 54 | + try: |
| 55 | + r = int(g_l.next()) |
| 56 | + except StopIteration: |
| 57 | + r = None |
| 58 | + return r |
| 59 | + |
| 60 | + |
| 61 | +def merge_sort_t(m, n): |
| 62 | + """ |
| 63 | + 归并排序两个迭代器 |
| 64 | + :param m: |
| 65 | + :param n: |
| 66 | + :return: |
| 67 | + """ |
| 68 | + # 去除换行并转生成器 |
| 69 | + g_a = (a.strip() for a in m) |
| 70 | + g_b = (b.strip() for b in n) |
| 71 | + |
| 72 | + n_a = g_next(g_a) |
| 73 | + n_b = g_next(g_b) |
| 74 | + while 1: |
| 75 | + print n_a, n_b |
| 76 | + if not (n_a or n_b): |
| 77 | + return |
| 78 | + elif n_a is None and n_b is not None: |
| 79 | + yield '+', n_b |
| 80 | + n_b = g_next(g_b) |
| 81 | + elif n_b is None and n_a is not None: |
| 82 | + yield '-', n_a |
| 83 | + n_a = g_next(g_a) |
| 84 | + elif n_a <= n_b: |
| 85 | + print '-', n_a |
| 86 | + yield n_a |
| 87 | + n_a = g_next(g_a) |
| 88 | + else: |
| 89 | + print '+', n_b |
| 90 | + yield n_b |
| 91 | + n_b = g_next(g_b) |
| 92 | + |
| 93 | + |
| 94 | +def test_merge_big_file(): |
| 95 | + """ |
| 96 | + 测试大文件归并 |
| 97 | + :return: |
| 98 | + """ |
| 99 | + |
| 100 | + # 打开文件(文件本身就是迭代器) |
| 101 | + f_a = open('a.log', 'r') |
| 102 | + f_b = open('b.log', 'r') |
| 103 | + |
| 104 | + for i in merge_sort_t(f_a, f_b): |
| 105 | + print i |
| 106 | + |
| 107 | + # 关闭文件 |
| 108 | + f_a.close() |
| 109 | + f_b.close() |
| 110 | + |
| 111 | + |
| 112 | +if __name__ == '__main__': |
| 113 | + # test_merge_simple() |
| 114 | + test_merge_big_file() |
0 commit comments