-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathfile_sorted_join.py
More file actions
75 lines (67 loc) · 2.43 KB
/
file_sorted_join.py
File metadata and controls
75 lines (67 loc) · 2.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""
Module with classes for file management
- File reader
- Writer in files
- Reader and join of several ordered files
"""
import os
from devoutils.fileio.file_reader import FileReader
class FileSortedJoin:
"""
From several ordered files returns an iterator that returns the
content of all the files ordered.
The arguments it receives are:
- The list of files to join
- if the order is in reverse
- The comparator to use
"""
def __init__(self, part_files, **kwargs):
self.__files = []
self.__reverse = kwargs.get('reverse', False)
self.__reverse_mod = -1 if self.__reverse else 1
kwargs['is_gzip'] = True
for tmp_file in part_files:
self.__files.append(FileReader(tmp_file, **kwargs))
self.__last_sorted_list = []
self.__comparator = kwargs.get('comp', lambda a, b: (a > b) - (a < b))
for file in self.__files:
self.__in_sort(self.__last_sorted_list, (file.next(), file))
def __in_sort(self, sorted_list, item):
"""
Insert item in list 'sorted_list', and keep it sorted assuming
'sorted_list' is sorted. If item is already in 'sorted_list',
insert it to the right of the rightmost item.
"""
lowest = 0
highest = len(sorted_list)
while lowest < highest:
mid = (lowest + highest) // 2
if (self.__reverse_mod * self.__comparator(item[0],
sorted_list[mid][0]))\
< 0:
highest = mid
else:
lowest = mid + 1
sorted_list.insert(lowest, item)
def __iter__(self):
return self
def next(self):
"""
Iterate the elements of the files ordered by returning the element
suitable for ordination.
When it reaches the end of all the files it launches a StopIteration
:return: next element
"""
if self.__last_sorted_list:
data = self.__last_sorted_list.pop(0)
try:
self.__in_sort(self.__last_sorted_list, (data[1].next(),
data[1]))
except StopIteration:
os.remove(data[1].file_name)
return data[0]
raise StopIteration
def __next__(self):
return self.next()
def __getitem__(self, position):
return self.next()