-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscratchdb.py
More file actions
348 lines (291 loc) · 10.7 KB
/
scratchdb.py
File metadata and controls
348 lines (291 loc) · 10.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
### A simple key-value database that supports set, get, and pop operations.
import ast
import os
import pickle
import struct
import sys
# The physical layer
class FileStorage(object):
INTEGER_FORMAT = '!Q'
INTEGER_LENGTH = 8 # in bytes, this has to be consistent with INTEGER_FORMAT, see https://docs.python.org/3.1/library/struct.html#format-characters
# INTEGER_FORMAT = '!H'
# INTEGER_LENGTH = 2
def __init__(self, filename):
# never truncate the file if it exists
try:
f = open(filename, 'bx+') # x mode raises an exception if the file exists, see https://docs.python.org/3/library/functions.html#open
except FileExistsError:
f = open(filename, 'r+b')
self._f = f
# ensure there are zero bytes at the end of the file
self._zero_end()
# methods that interact with the file itself ie the wrapper around it
def _tell(self):
"""Wrapper around File.tell(). Returns the current stream position."""
return self._f.tell()
def _seek(self, offset, whence=0):
"""
Wrapper around File.seek(). Moves the stream position to the
indicated offset.
"""
return self._f.seek(offset, whence)
def _seek_start(self):
"""
Moves the stream position to the beginning of the file and returns
that address.
"""
return self._f.seek(0, os.SEEK_SET)
def _seek_end(self):
"""
Moves the stream position to the end of file and returns that address.
"""
return self._f.seek(0, os.SEEK_END)
def _is_empty(self):
end_address = self._seek_end()
return end_address == 0
def _read(self, n):
"""
Wrapper around File.read() Reads n bytes starting from the current
stream position.
"""
return self._f.read(n)
def _write(self, bs):
"""
Wrapper around File.write(). Writes the data to the file which should
be an iterable of bytes.
"""
addr = self._f.write(bs)
self._f.flush()
return addr
def _is_closed(self):
return self._f.closed
# internal utility methods
def _zero_end(self):
"""
Writes zero bytes at the end of the file so that reading a 0 integer at
the end will succeed.
"""
if self._is_empty() or self._seek_end() < self.INTEGER_LENGTH:
self._seek_end()
self._write_integer(0)
else:
self._seek(-self.INTEGER_LENGTH, os.SEEK_END)
last_bytes = self._read(self.INTEGER_LENGTH)
if any([byte != b'\x00' for byte in last_bytes]):
self._seek_end()
self._write_integer(0)
def _read_integer(self):
"""Reads an integer from the file at the current stream position."""
bs = self._read(self.INTEGER_LENGTH)
return struct.unpack(self.INTEGER_FORMAT, bs)[0] # struct.unpack always returns a tuple
def _write_integer(self, n):
"""Writes an integer to the file at the current stream position."""
bs = struct.pack(self.INTEGER_FORMAT, n)
self._write(bs)
def _read_integer_and_rewind(self):
"""
Reads an integer from the file at the current stream position and
leaves the current stream position unchanged.
"""
n = self._read_integer()
self._seek(-self.INTEGER_LENGTH, os.SEEK_CUR)
return n
def _write_formatted(self, data):
"""
Writes an int with the length of the data followed by the data at the
current stream position.
"""
addr = self._tell()
length = len(data) + self.INTEGER_LENGTH
self._write_integer(length)
self._write(data)
return addr
def _seek_formatted_data_end(self, start_at=0):
"""
Moves the current stream position to the end of the data after start_at.
Assumes that start_at is the starting position of formatted data ie that
reading that address yields an integer with the length of the data that
follows.
"""
self._seek(start_at)
length = self._read_integer_and_rewind()
while length > 0:
self._seek(length, os.SEEK_CUR)
length = self._read_integer_and_rewind()
# the external api
def read(self, address):
"""
Reads the data at the given address.
Returns None if the address is past the end of the data on file.
"""
self._seek(address)
data_length = self._read_integer()
if data_length == 0:
return None
data = self._read(data_length - self.INTEGER_LENGTH)
return data
def append(self, data):
"""
Writes the data at the end of the file. Returns the address of the data
in the file.
"""
self._seek_formatted_data_end()
last_address = self._write_formatted(data)
self._zero_end()
return last_address
def close(self):
self._f.close()
@property
def is_open(self):
return not self.is_closed
@property
def is_closed(self):
return self._is_closed()
def next_address(self, address):
"""
Returns the address of the first piece of data after address or None if
there is no data past the address.
"""
self._seek_start()
length = self._read_integer_and_rewind()
read_address = 0
while read_address <= address and length > 0:
length = self._read_integer_and_rewind()
self._seek(length, os.SEEK_CUR)
read_address += length
if read_address == address:
return None
return read_address
# The logical layer
class Logical(object):
def __init__(self, dbname):
self._keys_storage = FileStorage(dbname + '.keys')
self._values_storage = FileStorage(dbname + '.values')
def _read_keys(self):
keys = []
address = 0
while address is not None:
key_data = self._keys_storage.read(address)
if key_data is not None:
key = pickle.loads(key_data)
keys.append(key)
address = self._keys_storage.next_address(address)
return keys
def _insert(self, key, value, for_deletion=False):
if not for_deletion:
value_data = pickle.dumps(value)
value_address = self._values_storage.append(value_data)
else:
value_address = None
key_tuple = (key, value_address)
key_data = pickle.dumps(key_tuple)
key_address = self._keys_storage.append(key_data)
def get(self, key):
# the key_data type is determined in _insert(), it's a tuple:
# (key, value_address)
# note that we do updates by inserting another copy of the key.
# this means that to retrieve the key we have to look at all of them
keys = self._read_keys()
value_data = None
for k, value_address in keys:
if k == key:
if value_address is None:
value_data = None
else:
value_data = self._values_storage.read(value_address)
if value_data is None:
raise KeyError('Key %s not found' % str(key))
return pickle.loads(value_data)
def set(self, key, value):
return self._insert(key, value, for_deletion=False)
def pop(self, key):
return self._insert(key, value=None, for_deletion=True)
def close_storage(self):
self._keys_storage.close()
self._values_storage.close()
# The database API
class ScratchDB(object):
def __init__(self, dbname):
self._ds = Logical(dbname)
def get(self, key):
return self._ds.get(key)
def set(self, key, value):
return self._ds.set(key, value)
def pop(self, key):
return self._ds.pop(key)
def close(self):
return self._ds.close_storage()
class QueryProcessor(object):
def __init__(self, db):
self._db = db
def _validate_cmd(self, s):
return s in ['set', 'get', 'pop']
def _to_python(self, s):
try:
pyval = ast.literal_eval(s)
except (SyntaxError, ValueError):
pyval = str(s)
return pyval
def _format(self, v):
return '<%s>: %s' % (type(v).__name__, v)
def _handle_get(self, key_string):
key = self._to_python(key_string)
try:
val = self._db.get(key)
except KeyError:
return 'Key not found: %s' % key_string
return self._format(val)
def _handle_set(self, key_string, args):
key = self._to_python(key_string)
value = self._to_python(''.join(args))
self._db.set(key, value)
return 'Set key %s to %s' % (self._format(key), self._format(value))
def _handle_pop(self, key_string):
key = self._to_python(key_string)
self._db.pop(key)
return 'Key popped: %s' % key_string
def execute(self, user_input):
"""
Accepts a string as provided by the user and returns the output that
should be displayed. The return value is a string with the result of
the query or an error message.
"""
cmd, key_string, *args = user_input.split()
if not self._validate_cmd(cmd):
return 'Invalid query. %s is not a ScratchDB command.' % cmd
if cmd == 'get':
if args:
return 'Invalid query. get should only have one argument, the key to get.'
return self._handle_get(key_string)
elif cmd == 'set':
if not args:
return 'Invalid query. set should have 2 arguments, the key to set and the value to set it to.'
return self._handle_set(key_string, args)
elif cmd == 'pop':
if args:
return 'Invalid query. pop should only have one argument, the key to pop.'
return self._handle_pop(key_string)
class Client(object):
def __init__(self):
args = sys.argv[1:]
if len(args) != 1:
self.print_usage()
sys.exit()
dbname = args[0]
self._db = ScratchDB(dbname)
self._qp = QueryProcessor(self._db)
def run_repl(self):
print('Use Ctrl-D to exit.')
while True:
try:
user_input = input('[scratchdb]=> ')
output = self._qp.execute(user_input)
print(' ' + output)
except (EOFError, KeyboardInterrupt):
sys.exit()
def print_usage(self):
print('Usage: python scratchdb <database name>.')
print('Necessary files will be created if they do not exist.')
if __name__ == '__main__':
client = Client()
client.run_repl()