Skip to content

Commit 033da20

Browse files
Shaun Mahonyclaude
andcommitted
Add Vierstra motif archetypes, fix JASPAR version, improve CIS-BP links
- Add Vierstra non-redundant TF motif clustering v2.0 as a new database: parse MEME-format archetype models grouped by TF family - Fix JASPAR version display: 2024 → 2026 - Fix CIS-BP: always update urlPattern on existing records so homepage links don't go missing on re-sync - Add CIS-BP per-entry links via TF identifier (baseId) so match result names link to the correct TF report page - Add "vierstra" to ReferenceDatabase source enum and DatabaseSelector Co-Authored-By: Claude Opus 4.6 <[email protected]>
1 parent 476d8ba commit 033da20

File tree

9 files changed

+354
-11
lines changed

9 files changed

+354
-11
lines changed

web/src/app/admin/page.tsx

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ export default function AdminPage() {
4242
const [hocomocoResult, setHocomocoResult] = useState<string | null>(null);
4343
const [hocomocoCollection, setHocomocoCollection] = useState<string>("H14CORE");
4444

45+
// Vierstra state
46+
const [vierstraSyncing, setVierstraSyncing] = useState(false);
47+
const [vierstraResult, setVierstraResult] = useState<string | null>(null);
48+
4549
const fetchData = useCallback(async () => {
4650
const [jobsRes, dbsRes] = await Promise.all([
4751
fetch("/api/admin/jobs"),
@@ -116,6 +120,30 @@ export default function AdminPage() {
116120
}
117121
};
118122

123+
const handleVierstraSync = async () => {
124+
setVierstraSyncing(true);
125+
setVierstraResult(null);
126+
try {
127+
const res = await fetch("/api/admin/sync-vierstra", {
128+
method: "POST",
129+
headers: { "Content-Type": "application/json" },
130+
});
131+
const data = await res.json();
132+
if (res.ok) {
133+
setVierstraResult(
134+
`Sync complete: ${data.result.totalStored} archetypes stored from ${data.result.families.length} families, ${data.result.errors.length} errors`
135+
);
136+
fetchData();
137+
} else {
138+
setVierstraResult(`Sync failed: ${data.error}`);
139+
}
140+
} catch (err) {
141+
setVierstraResult(`Sync failed: ${err instanceof Error ? err.message : String(err)}`);
142+
} finally {
143+
setVierstraSyncing(false);
144+
}
145+
};
146+
119147
const handleCisbpSync = async () => {
120148
setCisbpSyncing(true);
121149
setCisbpResult(null);
@@ -363,6 +391,33 @@ export default function AdminPage() {
363391
<p className="text-sm text-gray-600 mt-2">{hocomocoResult}</p>
364392
)}
365393
</div>
394+
395+
{/* Vierstra Sync Controls */}
396+
<div className="border-t border-gray-200 pt-4 mt-4">
397+
<h4 className="text-sm font-medium text-gray-900 mb-3">
398+
Sync Vierstra Motif Archetypes
399+
</h4>
400+
<p className="text-xs text-gray-500 mb-3">
401+
Downloads consensus archetype models from the{" "}
402+
<a
403+
href="https://resources.altius.org/~jvierstra/projects/motif-clustering-v2.0beta/"
404+
target="_blank"
405+
rel="noopener noreferrer"
406+
className="underline hover:text-gray-600"
407+
>
408+
Vierstra non-redundant TF motif clustering v2.0
409+
</a>
410+
. Motifs are grouped by TF family.
411+
</p>
412+
<div className="flex items-center gap-3">
413+
<Button onClick={handleVierstraSync} disabled={vierstraSyncing}>
414+
{vierstraSyncing ? "Syncing..." : "Sync Vierstra Archetypes"}
415+
</Button>
416+
</div>
417+
{vierstraResult && (
418+
<p className="text-sm text-gray-600 mt-2">{vierstraResult}</p>
419+
)}
420+
</div>
366421
</Card>
367422
</div>
368423
);
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import { NextRequest, NextResponse } from "next/server";
2+
import { connectDB } from "@/lib/db/mongoose";
3+
import { syncVierstra } from "@/lib/vierstra/sync";
4+
5+
function isAdmin(request: NextRequest): boolean {
6+
return request.cookies.get("stamp-admin")?.value === "authenticated";
7+
}
8+
9+
export async function POST(request: NextRequest) {
10+
if (!isAdmin(request)) {
11+
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
12+
}
13+
14+
try {
15+
await connectDB();
16+
const result = await syncVierstra();
17+
return NextResponse.json({ success: true, result });
18+
} catch (error) {
19+
return NextResponse.json(
20+
{
21+
error: `Vierstra sync failed: ${
22+
error instanceof Error ? error.message : String(error)
23+
}`,
24+
},
25+
{ status: 500 }
26+
);
27+
}
28+
}

web/src/components/job/DatabaseSelector.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,11 @@ export function DatabaseSelector({ value, onChange }: DatabaseSelectorProps) {
5555
);
5656

5757
// Source display names and order
58-
const sourceOrder = ["jaspar", "hocomoco", "cisbp", "custom"];
58+
const sourceOrder = ["jaspar", "hocomoco", "vierstra", "cisbp", "custom"];
5959
const sourceLabels: Record<string, string> = {
6060
jaspar: "JASPAR",
6161
hocomoco: "HOCOMOCO",
62+
vierstra: "Vierstra Archetypes",
6263
cisbp: "CIS-BP",
6364
custom: "Custom",
6465
};

web/src/lib/cisbp/sync.ts

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ export interface CisbpSyncResult {
1818
const CISBP_BASE_URL =
1919
"https://cisbp.ccbr.utoronto.ca/data/3_00/DataFiles/Bulk_downloads/EntireDataset";
2020

21+
const CISBP_URL_PATTERN =
22+
"https://cisbp.ccbr.utoronto.ca/TFnewreport.php?searchTF={id}";
23+
2124
/**
2225
* Sync CIS-BP motifs by downloading directly from the CIS-BP server.
2326
* Uses streaming ZIP extraction to avoid loading large files into memory as strings.
@@ -208,11 +211,16 @@ async function storeCisbpMotifsFromStream(
208211
source: "cisbp",
209212
description: "Catalog of Inferred Sequence Binding Preferences",
210213
version: "Build 3.00",
211-
urlPattern: "https://cisbp.ccbr.utoronto.ca/TFreport.php?searchTF={id}",
214+
urlPattern: CISBP_URL_PATTERN,
212215
taxonGroups: [],
213216
isActive: true,
214217
});
215218
await refDb.save();
219+
} else {
220+
await ReferenceDatabase.updateOne(
221+
{ _id: refDb._id },
222+
{ version: "Build 3.00", urlPattern: CISBP_URL_PATTERN }
223+
);
216224
}
217225

218226
const dbId = refDb._id as Types.ObjectId;
@@ -258,6 +266,7 @@ async function storeCisbpMotifsFromStream(
258266
motifDocs.push({
259267
databaseRef: dbId,
260268
matrixId: motifId,
269+
baseId: info?.tfId || null,
261270
name: tfName,
262271
dbSource: "CIS-BP",
263272
group: species,
@@ -328,11 +337,16 @@ async function storeCisbpMotifs(
328337
source: "cisbp",
329338
description: "Catalog of Inferred Sequence Binding Preferences",
330339
version: "Build 3.00",
331-
urlPattern: "https://cisbp.ccbr.utoronto.ca/TFreport.php?searchTF={id}",
340+
urlPattern: CISBP_URL_PATTERN,
332341
taxonGroups: [],
333342
isActive: true,
334343
});
335344
await refDb.save();
345+
} else {
346+
await ReferenceDatabase.updateOne(
347+
{ _id: refDb._id },
348+
{ version: "Build 3.00", urlPattern: CISBP_URL_PATTERN }
349+
);
336350
}
337351

338352
const dbId = refDb._id as Types.ObjectId;
@@ -358,6 +372,7 @@ async function storeCisbpMotifs(
358372
motifDocs.push({
359373
databaseRef: dbId,
360374
matrixId: motifId,
375+
baseId: info?.tfId || null,
361376
name: tfName,
362377
dbSource: "CIS-BP",
363378
group: species,

web/src/lib/db/models/ReferenceDatabase.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import mongoose, { Schema, Document } from "mongoose";
33
export interface IReferenceDatabase extends Document {
44
name: string;
55
slug: string;
6-
source: "jaspar" | "cisbp" | "hocomoco" | "custom";
6+
source: "jaspar" | "cisbp" | "hocomoco" | "vierstra" | "custom";
77
description: string;
88
jasparCollection: string | null;
99
version: string | null;
@@ -21,7 +21,7 @@ const ReferenceDatabaseSchema = new Schema<IReferenceDatabase>(
2121
{
2222
name: { type: String, required: true },
2323
slug: { type: String, required: true, unique: true, index: true },
24-
source: { type: String, required: true, enum: ["jaspar", "cisbp", "hocomoco", "custom"] },
24+
source: { type: String, required: true, enum: ["jaspar", "cisbp", "hocomoco", "vierstra", "custom"] },
2525
description: { type: String, default: "" },
2626
jasparCollection: { type: String, default: null },
2727
version: { type: String, default: null },

web/src/lib/jaspar/sync.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ export async function syncJaspar(options: SyncOptions = {}): Promise<SyncResult>
3333
source: "jaspar",
3434
description: `JASPAR ${collection} transcription factor binding profiles`,
3535
jasparCollection: collection,
36-
version: "2024",
36+
version: "2026",
3737
urlPattern: "https://jaspar.elixir.no/matrix/{id}",
3838
taxonGroups: [],
3939
isActive: true,
@@ -143,7 +143,7 @@ export async function syncJaspar(options: SyncOptions = {}): Promise<SyncResult>
143143
lastSyncedAt: new Date(),
144144
motifCount,
145145
taxonGroups: storedTaxonGroups,
146-
version: "2024",
146+
version: "2026",
147147
urlPattern: "https://jaspar.elixir.no/matrix/{id}",
148148
}
149149
);

web/src/lib/vierstra/parser.ts

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/**
2+
* Parser for the Vierstra motif clustering v2.0 MEME-format archetype files.
3+
*
4+
* MEME format:
5+
* MOTIF AC0001:DLX/LHX:Homeodomain AC0001:DLX/LHX:Homeodomain
6+
*
7+
* letter-probability matrix: alength= 4 w= 6 nsites= 20 E= 0
8+
* 0.014812 0.085107 0.008622 0.891459
9+
* ...
10+
*
11+
* The motif ID is structured as "archetypeId:tfNames:family".
12+
*/
13+
14+
export interface VierstraMotifRecord {
15+
motifId: string; // full ID, e.g. "AC0001:DLX/LHX:Homeodomain"
16+
archetypeId: string; // e.g. "AC0001"
17+
tfNames: string; // e.g. "DLX/LHX"
18+
family: string; // e.g. "Homeodomain"
19+
pfm: { A: number[]; C: number[]; G: number[]; T: number[] };
20+
}
21+
22+
/**
23+
* Parse the consensus_pwms.meme file into an array of motif records.
24+
*
25+
* The file contains a header (MEME version, ALPHABET, strands, Background)
26+
* followed by MOTIF blocks, each with a letter-probability matrix.
27+
*/
28+
export function parseMemePwms(content: string): VierstraMotifRecord[] {
29+
const motifs: VierstraMotifRecord[] = [];
30+
const lines = content.split(/\r?\n/);
31+
32+
let i = 0;
33+
while (i < lines.length) {
34+
const line = lines[i].trim();
35+
36+
if (line.startsWith("MOTIF ")) {
37+
// Parse motif ID — format: "MOTIF <id> [<alt>]"
38+
const motifId = line.substring(6).split(/\s+/)[0];
39+
const { archetypeId, tfNames, family } = parseMotifId(motifId);
40+
41+
// Advance to "letter-probability matrix:" line
42+
i++;
43+
while (i < lines.length && !lines[i].trim().startsWith("letter-probability matrix:")) {
44+
i++;
45+
}
46+
if (i >= lines.length) break;
47+
48+
// Parse matrix header: "letter-probability matrix: alength= 4 w= 6 ..."
49+
i++;
50+
51+
// Read matrix rows until blank line or next MOTIF or EOF
52+
const A: number[] = [];
53+
const C: number[] = [];
54+
const G: number[] = [];
55+
const T: number[] = [];
56+
57+
while (i < lines.length) {
58+
const row = lines[i].trim();
59+
if (row === "" || row.startsWith("MOTIF ") || row.startsWith("URL ")) break;
60+
61+
const vals = row.split(/\s+/).map(Number);
62+
if (vals.length >= 4 && !isNaN(vals[0])) {
63+
A.push(vals[0]);
64+
C.push(vals[1]);
65+
G.push(vals[2]);
66+
T.push(vals[3]);
67+
}
68+
i++;
69+
}
70+
71+
if (A.length > 0) {
72+
motifs.push({
73+
motifId,
74+
archetypeId,
75+
tfNames,
76+
family,
77+
pfm: { A, C, G, T },
78+
});
79+
}
80+
} else {
81+
i++;
82+
}
83+
}
84+
85+
return motifs;
86+
}
87+
88+
/**
89+
* Parse a Vierstra motif ID like "AC0001:DLX/LHX:Homeodomain".
90+
*/
91+
function parseMotifId(id: string): { archetypeId: string; tfNames: string; family: string } {
92+
const parts = id.split(":");
93+
if (parts.length >= 3) {
94+
return {
95+
archetypeId: parts[0],
96+
tfNames: parts[1],
97+
family: parts.slice(2).join(":"),
98+
};
99+
} else if (parts.length === 2) {
100+
return { archetypeId: parts[0], tfNames: parts[1], family: "Unknown" };
101+
}
102+
return { archetypeId: id, tfNames: id, family: "Unknown" };
103+
}

0 commit comments

Comments
 (0)