Skip to content

Commit 0f9a0a3

Browse files
feat: optional parse PDF metadata as JS dates (#44)
* feat: parse dates * overwrite date properties if parseDate option is true * perf: use Proxy object --------- Co-authored-by: Johann Schopplich <[email protected]>
1 parent e061987 commit 0f9a0a3

File tree

2 files changed

+90
-3
lines changed

2 files changed

+90
-3
lines changed

src/meta.ts

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,70 @@
11
import type { DocumentInitParameters, PDFDocumentProxy } from 'pdfjs-dist/types/src/display/api'
2-
import { getDocumentProxy, isPDFDocumentProxy } from './utils'
2+
import { getDocumentProxy, getResolvedPDFJS, isPDFDocumentProxy } from './utils'
3+
4+
const XMP_DATE_PROPERTIES = [
5+
'xmp:createdate',
6+
'xmp:modifydate',
7+
'xmp:metadatadate',
8+
'xap:createdate',
9+
'xap:modifydate',
10+
'xap:metadatadate',
11+
]
312

413
export async function getMeta(
514
data: DocumentInitParameters['data'] | PDFDocumentProxy,
15+
options: {
16+
parseDates?: boolean
17+
} = {},
618
) {
719
const pdf = isPDFDocumentProxy(data) ? data : await getDocumentProxy(data)
820
const meta = await pdf.getMetadata()
921

22+
const info = (meta?.info || {}) as Record<string, any>
23+
24+
if (options.parseDates) {
25+
const { PDFDateString } = await getResolvedPDFJS()
26+
27+
// Primary date properties from /Info dictionary
28+
if (info?.CreationDate) {
29+
info.CreationDate = PDFDateString.toDateObject(info?.CreationDate)
30+
}
31+
if (info?.ModDate) {
32+
info.ModDate = PDFDateString.toDateObject(info?.ModDate)
33+
}
34+
35+
// Override metadata getter to parse XMP date properties
36+
if (meta.metadata) {
37+
meta.metadata = new Proxy(meta.metadata, {
38+
get(target, prop) {
39+
if (prop === 'get') {
40+
return (name: string) => {
41+
const value = target.get(name)
42+
43+
if (XMP_DATE_PROPERTIES.includes(name) && value) {
44+
return parseISODateString(value)
45+
}
46+
47+
return value
48+
}
49+
}
50+
return target[prop as keyof typeof target]
51+
},
52+
})
53+
}
54+
}
55+
1056
return {
11-
info: (meta?.info ?? {}) as Record<string, any>,
12-
metadata: ({ ...meta?.metadata }) as Record<string, any>,
57+
info,
58+
metadata: meta?.metadata || {},
59+
}
60+
}
61+
62+
function parseISODateString(isoDateString: string) {
63+
if (!isoDateString)
64+
return
65+
66+
const parsedDate = Date.parse(isoDateString)
67+
if (!Number.isNaN(parsedDate)) {
68+
return new Date(parsedDate)
1369
}
1470
}

test/index.test.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,37 @@ describe('unpdf', () => {
119119

120120
expect(info.Creator).toMatchInlineSnapshot('"Writer"')
121121
})
122+
123+
it('parses PDF dates when parseDates option is enabled', async () => {
124+
// Test basic date parsing from /Info dictionary
125+
const { info: infoWithDates } = await getMeta(await getPDF(), { parseDates: true })
126+
127+
expect(infoWithDates.ModDate).toBeUndefined() // ModDate not present in sample.pdf
128+
expect(infoWithDates.CreationDate).toBeInstanceOf(Date)
129+
expect(infoWithDates.CreationDate.getFullYear()).toBe(2007)
130+
131+
// Test XMP metadata date parsing
132+
const { info: infoLinks, metadata: linksMetadata } = await getMeta(
133+
await getDocumentProxy(await getPDF('links.pdf')),
134+
{ parseDates: true },
135+
)
136+
137+
// Verify /Info dates are parsed
138+
expect(infoLinks.CreationDate).toBeInstanceOf(Date)
139+
expect(infoLinks.ModDate).toBeInstanceOf(Date)
140+
expect(infoLinks.CreationDate.getFullYear()).toBe(2024)
141+
expect(infoLinks.ModDate.getFullYear()).toBe(2024)
142+
143+
// Verify XMP dates are parsed
144+
expect(linksMetadata.get('xmp:createdate')).toBeInstanceOf(Date)
145+
expect(linksMetadata.get('xmp:modifydate')).toBeInstanceOf(Date)
146+
expect(linksMetadata.get('xmp:metadatadate')).toBeInstanceOf(Date)
147+
expect(linksMetadata.get('xmp:createdate').getFullYear()).toBe(2024)
148+
149+
expect(linksMetadata.get('xap:createdate')).toBeNull()
150+
expect(linksMetadata.get('xap:modifydate')).toBeNull()
151+
expect(linksMetadata.get('xap:metadatadate')).toBeNull()
152+
})
122153
})
123154

124155
async function getPDF(filename = 'sample.pdf') {

0 commit comments

Comments
 (0)