forked from sanbuphy/learn-coding-agent
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfiles.ts
More file actions
156 lines (151 loc) · 2.56 KB
/
files.ts
File metadata and controls
156 lines (151 loc) · 2.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/**
* Binary file extensions to skip for text-based operations.
* These files can't be meaningfully compared as text and are often large.
*/
export const BINARY_EXTENSIONS = new Set([
// Images
'.png',
'.jpg',
'.jpeg',
'.gif',
'.bmp',
'.ico',
'.webp',
'.tiff',
'.tif',
// Videos
'.mp4',
'.mov',
'.avi',
'.mkv',
'.webm',
'.wmv',
'.flv',
'.m4v',
'.mpeg',
'.mpg',
// Audio
'.mp3',
'.wav',
'.ogg',
'.flac',
'.aac',
'.m4a',
'.wma',
'.aiff',
'.opus',
// Archives
'.zip',
'.tar',
'.gz',
'.bz2',
'.7z',
'.rar',
'.xz',
'.z',
'.tgz',
'.iso',
// Executables/binaries
'.exe',
'.dll',
'.so',
'.dylib',
'.bin',
'.o',
'.a',
'.obj',
'.lib',
'.app',
'.msi',
'.deb',
'.rpm',
// Documents (PDF is here; FileReadTool excludes it at the call site)
'.pdf',
'.doc',
'.docx',
'.xls',
'.xlsx',
'.ppt',
'.pptx',
'.odt',
'.ods',
'.odp',
// Fonts
'.ttf',
'.otf',
'.woff',
'.woff2',
'.eot',
// Bytecode / VM artifacts
'.pyc',
'.pyo',
'.class',
'.jar',
'.war',
'.ear',
'.node',
'.wasm',
'.rlib',
// Database files
'.sqlite',
'.sqlite3',
'.db',
'.mdb',
'.idx',
// Design / 3D
'.psd',
'.ai',
'.eps',
'.sketch',
'.fig',
'.xd',
'.blend',
'.3ds',
'.max',
// Flash
'.swf',
'.fla',
// Lock/profiling data
'.lockb',
'.dat',
'.data',
])
/**
* Check if a file path has a binary extension.
*/
export function hasBinaryExtension(filePath: string): boolean {
const ext = filePath.slice(filePath.lastIndexOf('.')).toLowerCase()
return BINARY_EXTENSIONS.has(ext)
}
/**
* Number of bytes to read for binary content detection.
*/
const BINARY_CHECK_SIZE = 8192
/**
* Check if a buffer contains binary content by looking for null bytes
* or a high proportion of non-printable characters.
*/
export function isBinaryContent(buffer: Buffer): boolean {
// Check first BINARY_CHECK_SIZE bytes (or full buffer if smaller)
const checkSize = Math.min(buffer.length, BINARY_CHECK_SIZE)
let nonPrintable = 0
for (let i = 0; i < checkSize; i++) {
const byte = buffer[i]!
// Null byte is a strong indicator of binary
if (byte === 0) {
return true
}
// Count non-printable, non-whitespace bytes
// Printable ASCII is 32-126, plus common whitespace (9, 10, 13)
if (
byte < 32 &&
byte !== 9 && // tab
byte !== 10 && // newline
byte !== 13 // carriage return
) {
nonPrintable++
}
}
// If more than 10% non-printable, likely binary
return nonPrintable / checkSize > 0.1
}