-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathMaxLogic.BufferedFile.pas
More file actions
316 lines (264 loc) · 8.57 KB
/
MaxLogic.BufferedFile.pas
File metadata and controls
316 lines (264 loc) · 8.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
Unit MaxLogic.BufferedFile;
{
TBufferedFile is a single-direction read helper tailored for workloads that
consume streams byte-by-byte or in very small slices. It maintains an internal
block cache to amortize disk I/O while keeping cursor movement cheap.
Best use cases:
* Parsers or tokenizers that advance one byte/character at a time.
* Scenarios with frequent random peek/copy operations on small ranges.
Things it does NOT do:
* Writing or bidirectional buffering.
* Replacing RTL TFileStream for bulk reads (use >= 4 KB chunks if throughput
is the goal — TBufferedFile only wins when the caller cannot batch reads).
Construction:
* Create with a buffer size tuned to your smallest hot read.
* Call Open with a filename or stream. Position/Seek operate relative to the
underlying stream but remain read-only.
Keep usages simple:
* Iterate via Cursor/NextByte for sequential scans.
* Use copyBytes/CopyRawByteString for range access without disturbing the cursor.
}
{$IFNDEF DEBUG}
{$DEFINE USE_INLINE}
{$ENDIF}
Interface
Uses
sysUtils, classes;
Type
TBufferedFile = Class
Public Const
cDefaultBlockSize = 16 * 1024;
Strict Private
fFile: TStream;
fOwnsStream: Boolean;
fMaxBufferSize: Integer; // how many bytes will we read from the file to the buffer
fBuffer: TBytes;
FCursor: pByte; // the current processed byte in the fBuffer
fBufferSize: Integer; // the total amount of bytes in the buffer
fStartBuffer: pByte;
fFileSize: int64; // the total file size on disk
fBufferOffsetInFile: int64; // the position of the first byte in the buffer related to the underlying file
Procedure CleanUp;
Procedure readNextBlock; {$IFDEF USE_INLINE}Inline; {$ENDIF}
Private
// in file
fPosition: int64;
Procedure SetPosition(Const Value: int64);
Public
Constructor Create(aBufferSize: Integer = cDefaultBlockSize);
Destructor Destroy; Override;
Procedure Open(Const aFileName: String; aShareMode: Cardinal = fmShareDenyWrite); Overload;
Procedure Open(aStream: TStream; aTakeOwnerShipOfStream: Boolean = False); Overload;
Procedure Reset; // go back to start of the stream
Function EoF: Boolean; {$IFDEF USE_INLINE}Inline; {$ENDIF}
// returns false if we are at EOF
Function NextByte: Boolean; {$IFDEF USE_INLINE}Inline; {$ENDIF}
// tries to move in the buffer first, if that fails, it makes a hard re-read of the buffer from the file
Procedure Seek(Const aCount: int64);
// a Index referes to the file Position
// note: this will try to read from the internal buffer first,
// but if that fails then the fileStream will be accessed
// NOTE: that does not change the position property
Function copyBytes(Const aStartIndex, aCount: int64): TBytes; Overload; {$IFDEF USE_INLINE}Inline; {$ENDIF}
// aStartIndex is in relation to the file, not to the buffer
Procedure copyBytes(Const aStartIndex, aCount: int64; Var aBuffer: TBytes); Overload; {$IFDEF USE_INLINE}Inline; {$ENDIF}
Procedure copyBytes(Const aStartIndex, aCount: int64; aBuffer: pointer); Overload; {$IFDEF USE_INLINE}Inline; {$ENDIF}
Function CopyRawByteString(Const aStartIndex, aCount: int64): rawByteString; {$IFDEF USE_INLINE}Inline; {$ENDIF}
Function CharCursor: AnsiChar; {$IFDEF USE_INLINE}Inline; {$ENDIF}
Function pCharCursor: pAnsiChar; {$IFDEF USE_INLINE}Inline; {$ENDIF}
// the current byte
Property Cursor: pByte Read FCursor;
// the total position in the file
Property Position: int64 Read fPosition Write SetPosition;
Property Size: int64 Read fFileSize;
End;
Implementation
{ TBufferedFile }
Procedure TBufferedFile.CleanUp;
Begin
If assigned(fFile) Then
Begin
If fOwnsStream Then
fFile.Free;
fFile := Nil;
End;
End;
Procedure TBufferedFile.copyBytes(Const aStartIndex, aCount: int64;
Var aBuffer: TBytes);
Begin
If aCount <= 0 Then
Begin
SetLength(aBuffer, 0);
Exit;
End;
SetLength(aBuffer, aCount);
copyBytes(aStartIndex, aCount, @aBuffer[0]);
End;
Procedure TBufferedFile.copyBytes(Const aStartIndex, aCount: int64; aBuffer: pointer);
Var
InBufferOffset: int64;
CurFilePosition: int64;
Remaining: int64;
ToRead: Integer;
ReadCount: Integer;
Dest: pByte;
Begin
If (aCount <= 0) Then
Exit;
If (aStartIndex < 0) Or (aCount < 0) Then
raise EArgumentOutOfRangeException.Create('Requested range is invalid.');
If (aCount > 0) And (aStartIndex > fFileSize - aCount) Then
raise EArgumentOutOfRangeException.Create('Requested range exceeds file size.');
If (aStartIndex >= fBufferOffsetInFile) And (aStartIndex + aCount <= fBufferOffsetInFile + fBufferSize) Then
Begin
// translate InFileOffset to in BufferOffset
InBufferOffset := (aStartIndex - fBufferOffsetInFile);
Move(fBuffer[InBufferOffset], aBuffer^, NativeInt(aCount));
End Else Begin
CurFilePosition := fFile.Position; // store the current offset
Try
fFile.Position := aStartIndex;
Dest := pByte(aBuffer);
Remaining := aCount;
While Remaining > 0 Do
Begin
If Remaining > High(Integer) Then
ToRead := High(Integer)
Else
ToRead := Integer(Remaining);
ReadCount := fFile.Read(Dest^, ToRead);
If ReadCount <> ToRead Then
raise EReadError.Create('Unable to read requested byte range from stream.');
Inc(Dest, ReadCount);
Dec(Remaining, ReadCount);
End;
Finally
fFile.Position := CurFilePosition; // restore the current offset
End;
End;
End;
Function TBufferedFile.copyBytes(Const aStartIndex, aCount: int64): TBytes;
Begin
SetLength(result, aCount);
If aCount <> 0 Then
copyBytes(aStartIndex, aCount, result);
End;
Constructor TBufferedFile.Create;
Begin
Inherited Create;
fMaxBufferSize := aBufferSize;
SetLength(fBuffer, aBufferSize);
End;
Destructor TBufferedFile.Destroy;
Begin
CleanUp;
Inherited;
End;
Function TBufferedFile.EoF: Boolean;
Begin
result := Position >= fFileSize;
End;
Function TBufferedFile.NextByte: Boolean;
Var
InBufferOffset: NativeInt;
Begin
If EoF Then
exit(False);
InBufferOffset := NativeInt(FCursor) - NativeInt(fStartBuffer);
If (InBufferOffset + 1 < fBufferSize) Then
Begin
Inc(FCursor);
Inc(fPosition);
Exit(True);
End;
readNextBlock;
If fBufferSize = 0 Then
Begin
fPosition := fFileSize;
Result := False;
Exit;
End;
fPosition := fBufferOffsetInFile;
Result := True;
End;
Procedure TBufferedFile.Open(Const aFileName: String; aShareMode: Cardinal = fmShareDenyWrite);
Var
fs: TFileStream;
Begin
fs := TFileStream.Create(aFileName, fmOpenRead, aShareMode);
Open(fs);
fOwnsStream := True;
End;
Procedure TBufferedFile.readNextBlock;
Begin
fBufferOffsetInFile := fFile.Position;
fPosition := fFile.Position;
fBufferSize := fFile.Read(fBuffer[0], fMaxBufferSize);
FCursor := @fBuffer[0];
fStartBuffer := FCursor;
End;
Procedure TBufferedFile.Reset;
Begin
If Position <> 0 Then
Seek(-Position);
End;
Procedure TBufferedFile.Seek(Const aCount: int64);
Var
Target: int64;
BufferEnd: int64;
CursorOffset: Integer;
Begin
Target := fPosition + aCount;
If Target < 0 Then
Target := 0
Else If Target > fFileSize Then
Target := fFileSize;
BufferEnd := fBufferOffsetInFile + fBufferSize;
If (Target >= fBufferOffsetInFile) And (Target < BufferEnd) Then
Begin
fPosition := Target;
FCursor := fStartBuffer;
CursorOffset := Integer(Target - fBufferOffsetInFile);
Inc(FCursor, CursorOffset);
Exit;
End;
fFile.Position := Target;
readNextBlock;
fPosition := Target;
If (fBufferSize > 0) And (Target >= fBufferOffsetInFile) And
(Target < (fBufferOffsetInFile + fBufferSize)) Then
Begin
FCursor := fStartBuffer;
CursorOffset := Integer(Target - fBufferOffsetInFile);
Inc(FCursor, CursorOffset);
End Else
FCursor := fStartBuffer;
End;
Procedure TBufferedFile.SetPosition(Const Value: int64);
Begin
Seek(Value - fPosition);
End;
Function TBufferedFile.CharCursor: AnsiChar;
Begin
result := AnsiChar(FCursor^);
End;
Function TBufferedFile.pCharCursor: pAnsiChar;
Begin
result := pAnsiChar(FCursor);
End;
Function TBufferedFile.CopyRawByteString(Const aStartIndex, aCount: int64): rawByteString;
Begin
SetLength(result, aCount);
If aCount <> 0 Then
copyBytes(aStartIndex, aCount, @result[1]);
End;
Procedure TBufferedFile.Open(aStream: TStream; aTakeOwnerShipOfStream: Boolean = False);
Begin
CleanUp;
fOwnsStream := aTakeOwnerShipOfStream;
fFile := aStream;
fFileSize := fFile.Size;
fPosition := 0;
readNextBlock;
End;
End.