Skip to content

Commit 03230aa

Browse files
Shaun Mahonyclaude
andcommitted
Add HOCOMOCO v14 as a motif database
Stream-parse the HOCOMOCO annotation JSONL to extract PCM matrices, TF names, species, and TF classification. Supports both H14CORE (full) and H14CORE-CLUSTERED (non-redundant) collections. Motifs are grouped by species (Human/Mouse) and link to hocomoco14.autosome.org. Co-Authored-By: Claude Opus 4.6 <[email protected]>
1 parent 765b77a commit 03230aa

File tree

6 files changed

+380
-3
lines changed

6 files changed

+380
-3
lines changed

web/src/app/admin/page.tsx

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ export default function AdminPage() {
3737
const [cisbpSyncing, setCisbpSyncing] = useState(false);
3838
const [cisbpResult, setCisbpResult] = useState<string | null>(null);
3939

40+
// HOCOMOCO state
41+
const [hocomocoSyncing, setHocomocoSyncing] = useState(false);
42+
const [hocomocoResult, setHocomocoResult] = useState<string | null>(null);
43+
const [hocomocoCollection, setHocomocoCollection] = useState<string>("H14CORE");
44+
4045
const fetchData = useCallback(async () => {
4146
const [jobsRes, dbsRes] = await Promise.all([
4247
fetch("/api/admin/jobs"),
@@ -86,6 +91,31 @@ export default function AdminPage() {
8691
}
8792
};
8893

94+
const handleHocomocoSync = async () => {
95+
setHocomocoSyncing(true);
96+
setHocomocoResult(null);
97+
try {
98+
const res = await fetch("/api/admin/sync-hocomoco", {
99+
method: "POST",
100+
headers: { "Content-Type": "application/json" },
101+
body: JSON.stringify({ collection: hocomocoCollection }),
102+
});
103+
const data = await res.json();
104+
if (res.ok) {
105+
setHocomocoResult(
106+
`Sync complete: ${data.result.totalStored} motifs stored from ${data.result.groups.length} groups, ${data.result.errors.length} errors`
107+
);
108+
fetchData();
109+
} else {
110+
setHocomocoResult(`Sync failed: ${data.error}`);
111+
}
112+
} catch (err) {
113+
setHocomocoResult(`Sync failed: ${err instanceof Error ? err.message : String(err)}`);
114+
} finally {
115+
setHocomocoSyncing(false);
116+
}
117+
};
118+
89119
const handleCisbpSync = async () => {
90120
setCisbpSyncing(true);
91121
setCisbpResult(null);
@@ -291,6 +321,48 @@ export default function AdminPage() {
291321
<p className="text-sm text-gray-600 mt-2">{cisbpResult}</p>
292322
)}
293323
</div>
324+
325+
{/* HOCOMOCO Sync Controls */}
326+
<div className="border-t border-gray-200 pt-4 mt-4">
327+
<h4 className="text-sm font-medium text-gray-900 mb-3">
328+
Sync HOCOMOCO Database
329+
</h4>
330+
<p className="text-xs text-gray-500 mb-3">
331+
Downloads motif data from{" "}
332+
<a
333+
href="https://hocomoco14.autosome.org/"
334+
target="_blank"
335+
rel="noopener noreferrer"
336+
className="underline hover:text-gray-600"
337+
>
338+
HOCOMOCO v14
339+
</a>
340+
. Select a collection to sync:
341+
</p>
342+
<div className="flex flex-wrap gap-2 mb-4">
343+
{(["H14CORE", "H14CORE-CLUSTERED"] as const).map((col) => (
344+
<button
345+
key={col}
346+
className={`px-3 py-1 rounded-full text-xs font-medium transition-colors ${
347+
hocomocoCollection === col
348+
? "bg-brand-100 text-brand-700 border border-brand-300"
349+
: "bg-gray-100 text-gray-600 border border-gray-200 hover:bg-gray-200"
350+
}`}
351+
onClick={() => setHocomocoCollection(col)}
352+
>
353+
{col === "H14CORE" ? "CORE (full, ~1595 motifs)" : "CLUSTERED (non-redundant, ~523 motifs)"}
354+
</button>
355+
))}
356+
</div>
357+
<div className="flex items-center gap-3">
358+
<Button onClick={handleHocomocoSync} disabled={hocomocoSyncing}>
359+
{hocomocoSyncing ? "Syncing..." : "Sync HOCOMOCO"}
360+
</Button>
361+
</div>
362+
{hocomocoResult && (
363+
<p className="text-sm text-gray-600 mt-2">{hocomocoResult}</p>
364+
)}
365+
</div>
294366
</Card>
295367
</div>
296368
);
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import { NextRequest, NextResponse } from "next/server";
2+
import { connectDB } from "@/lib/db/mongoose";
3+
import { syncHocomoco } from "@/lib/hocomoco/sync";
4+
5+
function isAdmin(request: NextRequest): boolean {
6+
return request.cookies.get("stamp-admin")?.value === "authenticated";
7+
}
8+
9+
export async function POST(request: NextRequest) {
10+
if (!isAdmin(request)) {
11+
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
12+
}
13+
14+
try {
15+
await connectDB();
16+
17+
const body = await request.json().catch(() => ({}));
18+
const collection = body.collection === "H14CORE-CLUSTERED"
19+
? "H14CORE-CLUSTERED" as const
20+
: "H14CORE" as const;
21+
22+
const result = await syncHocomoco(collection);
23+
return NextResponse.json({ success: true, result });
24+
} catch (error) {
25+
return NextResponse.json(
26+
{
27+
error: `HOCOMOCO sync failed: ${
28+
error instanceof Error ? error.message : String(error)
29+
}`,
30+
},
31+
{ status: 500 }
32+
);
33+
}
34+
}

web/src/components/job/DatabaseSelector.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,10 @@ export function DatabaseSelector({ value, onChange }: DatabaseSelectorProps) {
5555
);
5656

5757
// Source display names and order
58-
const sourceOrder = ["jaspar", "cisbp", "custom"];
58+
const sourceOrder = ["jaspar", "hocomoco", "cisbp", "custom"];
5959
const sourceLabels: Record<string, string> = {
6060
jaspar: "JASPAR",
61+
hocomoco: "HOCOMOCO",
6162
cisbp: "CIS-BP",
6263
custom: "Custom",
6364
};

web/src/lib/db/models/ReferenceDatabase.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import mongoose, { Schema, Document } from "mongoose";
33
export interface IReferenceDatabase extends Document {
44
name: string;
55
slug: string;
6-
source: "jaspar" | "cisbp" | "custom";
6+
source: "jaspar" | "cisbp" | "hocomoco" | "custom";
77
description: string;
88
jasparCollection: string | null;
99
version: string | null;
@@ -21,7 +21,7 @@ const ReferenceDatabaseSchema = new Schema<IReferenceDatabase>(
2121
{
2222
name: { type: String, required: true },
2323
slug: { type: String, required: true, unique: true, index: true },
24-
source: { type: String, required: true, enum: ["jaspar", "cisbp", "custom"] },
24+
source: { type: String, required: true, enum: ["jaspar", "cisbp", "hocomoco", "custom"] },
2525
description: { type: String, default: "" },
2626
jasparCollection: { type: String, default: null },
2727
version: { type: String, default: null },

web/src/lib/hocomoco/parser.ts

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/**
2+
* Parser for HOCOMOCO v14 annotation JSONL files.
3+
*
4+
* Each line in the annotation JSONL is a self-contained JSON object with all
5+
* motif data: matrix values (PCM/PFM), TF name, species, TF classification,
6+
* and quality rating.
7+
*/
8+
9+
export interface HocomocoMotifRecord {
10+
motifId: string;
11+
tfName: string;
12+
quality: string;
13+
species: string; // "Human" or "Mouse"
14+
tfClass: string | null;
15+
family: string | null;
16+
pfm: { A: number[]; C: number[]; G: number[]; T: number[] };
17+
}
18+
19+
/**
20+
* Parse a single line from an H14CORE or H14CORE-CLUSTERED annotation JSONL.
21+
*
22+
* Expected JSON fields per line:
23+
* name – motif ID (e.g. "AHR.H14CORE.0.P.B")
24+
* tf – TF gene symbol (e.g. "AHR")
25+
* quality – "A" | "B" | "C" | "D"
26+
* pcm – position count matrix [[A,C,G,T], ...] (row-major)
27+
* pfm – position frequency matrix [[A,C,G,T], ...] (row-major)
28+
* original_motif.species – "HUMAN" | "MOUSE"
29+
* masterlist_info.tfclass_class – TF class description
30+
* masterlist_info.tfclass_family – TF family name
31+
*
32+
* Returns null if the line cannot be parsed or has no matrix data.
33+
*/
34+
export function parseAnnotationLine(line: string): HocomocoMotifRecord | null {
35+
const trimmed = line.trim();
36+
if (!trimmed) return null;
37+
38+
let obj: Record<string, unknown>;
39+
try {
40+
obj = JSON.parse(trimmed);
41+
} catch {
42+
return null;
43+
}
44+
45+
const motifId = obj.name as string | undefined;
46+
const tfName = obj.tf as string | undefined;
47+
if (!motifId || !tfName) return null;
48+
49+
// Get matrix data – prefer PCM (counts) over PFM (frequencies)
50+
const pcm = obj.pcm as number[][] | undefined;
51+
const pfmRaw = obj.pfm as number[][] | undefined;
52+
const matrix = pcm || pfmRaw;
53+
if (!matrix || matrix.length === 0) return null;
54+
55+
// Transpose from row-major [[A,C,G,T], ...] to column-major {A:[], C:[], G:[], T:[]}
56+
const pfm = transposePcm(matrix);
57+
if (!pfm) return null;
58+
59+
// Species: from original_motif.species
60+
const originalMotif = obj.original_motif as Record<string, unknown> | undefined;
61+
const rawSpecies = (originalMotif?.species as string) || "";
62+
const species = normalizeSpecies(rawSpecies);
63+
64+
// TF classification
65+
const masterlistInfo = obj.masterlist_info as Record<string, unknown> | undefined;
66+
const tfClass = (masterlistInfo?.tfclass_class as string) || null;
67+
const family = (masterlistInfo?.tfclass_family as string) || null;
68+
69+
const quality = (obj.quality as string) || "";
70+
71+
return { motifId, tfName, quality, species, tfClass, family, pfm };
72+
}
73+
74+
/**
75+
* Transpose a row-major matrix [[A,C,G,T], ...] (one row per position)
76+
* to column-major { A: [...], C: [...], G: [...], T: [...] }.
77+
*/
78+
function transposePcm(
79+
rows: number[][]
80+
): { A: number[]; C: number[]; G: number[]; T: number[] } | null {
81+
if (rows.length === 0) return null;
82+
83+
const A: number[] = [];
84+
const C: number[] = [];
85+
const G: number[] = [];
86+
const T: number[] = [];
87+
88+
for (const row of rows) {
89+
if (row.length < 4) return null;
90+
A.push(row[0]);
91+
C.push(row[1]);
92+
G.push(row[2]);
93+
T.push(row[3]);
94+
}
95+
96+
return { A, C, G, T };
97+
}
98+
99+
function normalizeSpecies(raw: string): string {
100+
const upper = raw.toUpperCase().trim();
101+
if (upper === "HUMAN") return "Human";
102+
if (upper === "MOUSE") return "Mouse";
103+
if (upper) return raw.charAt(0).toUpperCase() + raw.slice(1).toLowerCase();
104+
return "Unknown";
105+
}

0 commit comments

Comments
 (0)