-
Notifications
You must be signed in to change notification settings - Fork 94
Expand file tree
/
Copy pathcheck-links.mjs
More file actions
290 lines (240 loc) · 8.39 KB
/
check-links.mjs
File metadata and controls
290 lines (240 loc) · 8.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
#!/usr/bin/env node
/**
* Dead link checker for MDX documentation files.
* Transparency note; This is AI generated, if it doesn't detect things correctly
* please blame this script, and use AI to improve its detection. It's proven massively
* helpful during larger docs refactors so far.
*
* Checks for:
* - Broken internal links (markdown and JSX/HTML style)
* - Missing anchor/heading references
* - Invalid file paths
*
* Usage: node dev/check-links.mjs [--check-anchors]
*/
import fs from 'fs';
import path from 'path';
import { glob } from 'glob';
import GithubSlugger from 'github-slugger';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const DOCS_DIR = path.join(path.dirname(__dirname), 'docs');
// Parse CLI flags
const args = process.argv.slice(2);
const CHECK_ANCHORS = args.includes('--check-anchors');
// Regex patterns for extracting links
const MARKDOWN_LINK_REGEX = /\[([^\]]*)\]\(([^)]+)\)/g;
const JSX_HREF_REGEX = /href=["']([^"']+)["']/g;
const SRC_ATTR_REGEX = /src=["']([^"']+)["']/g;
// Extract headings from MDX content to build anchor map
function extractHeadings(content) {
const slugger = new GithubSlugger();
const headingRegex = /^#{1,6}\s+(.+)$/gm;
const headings = new Set();
// Remove code blocks to avoid false positives
const contentWithoutCode = content.replace(/```[\s\S]*?```/g, '');
let match;
while ((match = headingRegex.exec(contentWithoutCode)) !== null) {
// Handle headings with links: [Text](/path) -> Text
const linkMatch = match[1].match(/\[([^\]]+)\]\([^)]+\)/);
const title = linkMatch ? linkMatch[1] : match[1];
headings.add(slugger.slug(title.trim()));
}
return headings;
}
// Get all MDX files and build a map of valid paths
async function buildPathMap() {
const files = await glob('**/*.mdx', { cwd: DOCS_DIR });
const pathMap = new Map();
const headingsMap = new Map();
for (const file of files) {
const fullPath = path.join(DOCS_DIR, file);
const content = fs.readFileSync(fullPath, 'utf-8');
// Route path (without .mdx extension)
const routePath = '/' + file.replace(/\.mdx$/, '').replace(/\/index$/, '');
// Also allow trailing slash variant
pathMap.set(routePath, fullPath);
pathMap.set(routePath + '/', fullPath);
// Handle index files
if (file.endsWith('index.mdx')) {
const dirPath = '/' + file.replace(/\/index\.mdx$/, '');
pathMap.set(dirPath, fullPath);
pathMap.set(dirPath + '/', fullPath);
}
// Extract headings for anchor validation
const headings = extractHeadings(content);
headingsMap.set(routePath, headings);
headingsMap.set(routePath + '/', headings);
}
return { pathMap, headingsMap };
}
// Check if a path exists in public directory
function checkPublicPath(linkPath) {
const publicPath = path.join(path.dirname(__dirname), 'public', linkPath);
return fs.existsSync(publicPath);
}
// Check if a path exists in docs directory (for images in docs/)
function checkDocsPath(linkPath) {
const docsPath = path.join(DOCS_DIR, linkPath);
return fs.existsSync(docsPath);
}
// Parse and validate links in a single file
function extractLinks(content, filePath) {
const links = [];
// Remove code blocks to avoid checking links in code examples
const contentWithoutCode = content.replace(/```[\s\S]*?```/g, (match) => {
// Replace with same number of newlines to preserve line numbers
return match.replace(/[^\n]/g, ' ');
});
// Extract markdown links [text](url)
let match;
while ((match = MARKDOWN_LINK_REGEX.exec(contentWithoutCode)) !== null) {
const url = match[2].trim();
const lineNumber = contentWithoutCode.substring(0, match.index).split('\n').length;
links.push({ url, lineNumber, type: 'markdown' });
}
// Reset lastIndex for href regex
JSX_HREF_REGEX.lastIndex = 0;
while ((match = JSX_HREF_REGEX.exec(contentWithoutCode)) !== null) {
const url = match[1].trim();
const lineNumber = contentWithoutCode.substring(0, match.index).split('\n').length;
links.push({ url, lineNumber, type: 'href' });
}
// Reset lastIndex for src regex
SRC_ATTR_REGEX.lastIndex = 0;
while ((match = SRC_ATTR_REGEX.exec(contentWithoutCode)) !== null) {
const url = match[1].trim();
const lineNumber = contentWithoutCode.substring(0, match.index).split('\n').length;
links.push({ url, lineNumber, type: 'src' });
}
return links;
}
// Check if a link is valid
function validateLink(link, currentFile, pathMap, headingsMap) {
const { url } = link;
// Skip external links, mailto, tel, javascript, etc.
if (url.startsWith('http://') || url.startsWith('https://') ||
url.startsWith('mailto:') || url.startsWith('tel:') ||
url.startsWith('javascript:') || url.startsWith('data:') ||
url.startsWith('command:')) {
return null;
}
// Skip angle-bracket URLs like <https://example.com>
if (url.startsWith('<http') || url.startsWith('<https')) {
return null;
}
// Skip email addresses without mailto: prefix (common shorthand)
if (url.includes('@') && !url.startsWith('/') && !url.includes('/')) {
return null;
}
// Handle anchor-only links (#heading)
if (url.startsWith('#')) {
if (!CHECK_ANCHORS) {
return null;
}
const anchor = url.substring(1);
const currentRoute = '/' + path.relative(DOCS_DIR, currentFile)
.replace(/\.mdx$/, '')
.replace(/\/index$/, '');
const headings = headingsMap.get(currentRoute);
if (headings && !headings.has(anchor)) {
return `Anchor "${anchor}" not found in current file`;
}
return null;
}
// Parse path and anchor
const [linkPath, anchor] = url.split('#');
// Handle relative paths
let resolvedPath;
if (linkPath.startsWith('/')) {
resolvedPath = linkPath;
} else {
// Relative path - resolve from current file's directory
// For index.mdx files, resolve from parent since the URL is the folder itself
let currentDir = path.dirname(currentFile);
if (path.basename(currentFile) === 'index.mdx') {
currentDir = path.dirname(currentDir);
}
const relativeToDocs = path.relative(DOCS_DIR, currentDir);
resolvedPath = '/' + path.join(relativeToDocs, linkPath).replace(/\\/g, '/');
}
// Normalize path (remove trailing slash for comparison, handle ..)
resolvedPath = path.normalize(resolvedPath).replace(/\\/g, '/');
if (!resolvedPath.startsWith('/')) {
resolvedPath = '/' + resolvedPath;
}
// Check if it's a valid route
if (pathMap.has(resolvedPath)) {
// If there's an anchor, validate it (only when flag is enabled)
if (CHECK_ANCHORS && anchor) {
const headings = headingsMap.get(resolvedPath);
if (headings && !headings.has(anchor)) {
return `Anchor "${anchor}" not found in "${resolvedPath}"`;
}
}
return null;
}
// Check if it's a public asset
if (checkPublicPath(resolvedPath)) {
return null;
}
// Check if it's a file with extension (like .png, .pdf)
if (path.extname(resolvedPath)) {
// Could be an asset - check public folder or docs folder
if (checkPublicPath(resolvedPath) || checkDocsPath(resolvedPath)) {
return null;
}
return `File not found: "${resolvedPath}"`;
}
return `Page not found: "${resolvedPath}"`;
}
async function main() {
console.log('🔍 Checking for dead links in MDX files...\n');
const { pathMap, headingsMap } = await buildPathMap();
const files = await glob('**/*.mdx', { cwd: DOCS_DIR });
let totalErrors = 0;
const errors = [];
for (const file of files) {
const fullPath = path.join(DOCS_DIR, file);
const content = fs.readFileSync(fullPath, 'utf-8');
const links = extractLinks(content, fullPath);
const fileErrors = [];
for (const link of links) {
const error = validateLink(link, fullPath, pathMap, headingsMap);
if (error) {
fileErrors.push({
line: link.lineNumber,
url: link.url,
error
});
}
}
if (fileErrors.length > 0) {
errors.push({
file: `docs/${file}`,
errors: fileErrors
});
totalErrors += fileErrors.length;
}
}
// Output results
if (errors.length === 0) {
console.log('✅ No dead links found!');
process.exit(0);
}
console.log(`❌ Found ${totalErrors} dead link(s) in ${errors.length} file(s):\n`);
for (const { file, errors: fileErrors } of errors) {
console.log(`\n📄 ${file}`);
for (const { line, url, error } of fileErrors) {
console.log(` Line ${line}: ${url}`);
console.log(` └─ ${error}`);
}
}
console.log('\n');
process.exit(1);
}
main().catch(err => {
console.error('Error running link checker:', err);
process.exit(1);
});