diff --git a/src/auth/webid-tls.js b/src/auth/webid-tls.js index 046af7b..60d36d4 100644 --- a/src/auth/webid-tls.js +++ b/src/auth/webid-tls.js @@ -139,7 +139,7 @@ function parseKeyObject(keyObj) { async function fetchProfileKeys(webId) { const response = await fetchWithTimeout(webId, { headers: { - 'Accept': 'application/ld+json, text/turtle, application/json' + 'Accept': 'text/html' } }); @@ -157,7 +157,7 @@ async function fetchProfileKeys(webId) { jsonLd = await turtleToJsonLd(text, webId); } else if (contentType.includes('text/html')) { // Try to extract JSON-LD from HTML data island - const jsonLdMatch = text.match(/([\s\S]*?)<\/script>/i); + const jsonLdMatch = text.match(/]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/i); if (jsonLdMatch) { jsonLd = JSON.parse(jsonLdMatch[1]); } else { diff --git a/test/webid-tls.test.js b/test/webid-tls.test.js index 202a43e..c367fe4 100644 --- a/test/webid-tls.test.js +++ b/test/webid-tls.test.js @@ -94,6 +94,56 @@ describe('WebID-TLS', () => { }); }); + describe('HTML JSON-LD extraction regex', () => { + // Test the regex pattern used to extract JSON-LD from HTML profiles + const jsonLdRegex = /]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/i; + + it('should match basic script tag', () => { + const html = ''; + const match = html.match(jsonLdRegex); + assert.ok(match, 'Should match basic script tag'); + assert.strictEqual(match[1], '{"@id": "#me"}'); + }); + + it('should match script tag with additional attributes', () => { + const html = ''; + const match = html.match(jsonLdRegex); + assert.ok(match, 'Should match script tag with id attribute'); + assert.strictEqual(match[1], '{"@id": "#me"}'); + }); + + it('should match script tag with attributes before type', () => { + const html = ''; + const match = html.match(jsonLdRegex); + assert.ok(match, 'Should match script tag with id before type'); + assert.strictEqual(match[1], '{"@id": "#me"}'); + }); + + it('should match script tag with single quotes', () => { + const html = ""; + const match = html.match(jsonLdRegex); + assert.ok(match, 'Should match script tag with single quotes'); + }); + + it('should match script tag with newlines in content', () => { + const html = ``; + const match = html.match(jsonLdRegex); + assert.ok(match, 'Should match script tag with multiline content'); + assert.ok(match[1].includes('"@id": "#me"')); + }); + + it('should not match non-jsonld script tags', () => { + const html = ''; + const match = html.match(jsonLdRegex); + assert.strictEqual(match, null, 'Should not match JavaScript script tag'); + }); + }); + describe('SAN format variations', () => { it('should handle lowercase uri prefix', () => { // Some certs might have lowercase