Skip to content

Commit 31c5cfe

Browse files
chore(cogstack-cohorter): Publish cohorter images (#74)
Publish cohorter images via new github workflow Make random data generation configurable at runtime. Allow SNOMED data to be provided at runtime.
1 parent d2941df commit 31c5cfe

7 files changed

Lines changed: 205 additions & 12 deletions

File tree

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
name: ci-build-cohorter
2+
3+
on:
4+
push:
5+
branches: [main]
6+
tags:
7+
- "cohorter-v*.*.*" # e.g., cohorter-v1.2.3
8+
paths:
9+
- "cogstack-cohorter/**"
10+
- ".github/workflows/cogstack-cohorter-docker**"
11+
pull_request:
12+
paths:
13+
- "cogstack-cohorter/**"
14+
- ".github/workflows/cogstack-cohorter-docker**"
15+
16+
jobs:
17+
docker:
18+
runs-on: ubuntu-latest
19+
if: github.event.pull_request.user.login != 'dependabot[bot]' && github.repository == 'CogStack/cogstack-platform'
20+
strategy:
21+
matrix:
22+
include:
23+
- name: nl2dsl
24+
context: cogstack-cohorter/NL2DSL
25+
dockerfile: cogstack-cohorter/NL2DSL/Dockerfile
26+
image: cogstacksystems/cogstack-cohorter-nl2dsl
27+
28+
- name: webapp
29+
context: cogstack-cohorter/WebAPP
30+
dockerfile: cogstack-cohorter/WebAPP/Dockerfile
31+
image: cogstacksystems/cogstack-cohorter-webapp
32+
33+
steps:
34+
- name: Checkout branch
35+
uses: actions/checkout@v4
36+
37+
- name: Login to DockerHub
38+
uses: docker/login-action@v3
39+
with:
40+
username: ${{ secrets.DOCKERHUB_USERNAME }}
41+
password: ${{ secrets.DOCKERHUB_TOKEN }}
42+
43+
- name: Set up Docker Buildx
44+
uses: docker/setup-buildx-action@v3
45+
46+
- name: Extract metadata (tags, labels) for Docker
47+
id: meta
48+
uses: docker/metadata-action@v5
49+
with:
50+
images: ${{ matrix.image }}
51+
tags: |
52+
# latest tag on the default branch
53+
type=raw,value=latest,enable={{is_default_branch}}
54+
# Include all default tags
55+
type=schedule
56+
type=ref,event=branch
57+
type=ref,event=tag
58+
type=ref,event=pr
59+
type=sha
60+
61+
- name: Build and push Docker image
62+
id: push
63+
uses: docker/build-push-action@v6
64+
with:
65+
context: ${{ matrix.context }}
66+
file: ${{ matrix.dockerfile }}
67+
push: ${{ github.event_name != 'pull_request' }}
68+
tags: ${{ steps.meta.outputs.tags }}
69+
labels: ${{ steps.meta.outputs.labels }}
70+
cache-from: type=registry,ref=${{ matrix.image }}:buildcache
71+
cache-to: type=registry,ref=${{ matrix.image }}:buildcache,mode=max
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
node_modules/
2+
client/node_modules/
3+
client-react/node_modules/
4+
server/node_modules/
5+
6+
# Data files are never baked into the image — supply them via volume mount at runtime
7+
server/data/
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
FROM node:latest
22
WORKDIR /usr/src/app
3-
COPY . .
43

5-
RUN cd /usr/src/app/server/data && tar xzvf snomed_terms_data.tar.gz
4+
COPY . .
5+
RUN cd /usr/src/app/client-react && npm install && npm run build
6+
RUN cd /usr/src/app/server && npm install
67

7-
ARG random
8-
RUN if [ "$random" = "true" ] ; then cd /usr/src/app/server/data && node --max-old-space-size=32768 gen_random_data.js ; fi
8+
COPY entrypoint.sh /entrypoint.sh
9+
RUN sed -i 's/\r$//' /entrypoint.sh && chmod +x /entrypoint.sh
910

10-
RUN cd /usr/src/app/server && npm install
1111
EXPOSE 3000
1212
WORKDIR /usr/src/app/server
13-
CMD ["node", "--max-old-space-size=32768", "server.js"]
13+
ENTRYPOINT ["/entrypoint.sh"]
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/bin/sh
2+
set -e
3+
4+
DATA_DIR=/usr/src/app/server/data
5+
6+
# ── Step 1: extract archive if JSON data isn't already present ────────────────
7+
if [ ! -f "$DATA_DIR/snomed_terms.json" ]; then
8+
if [ -f "$DATA_DIR/snomed_terms_data.tar.gz" ]; then
9+
echo "[webapp] Extracting SNOMED data archive..."
10+
tar xzvf "$DATA_DIR/snomed_terms_data.tar.gz" -C "$DATA_DIR"
11+
else
12+
echo "[webapp] ERROR: No data found at $DATA_DIR." >&2
13+
echo "[webapp] Mount a directory containing snomed_terms.json (and related files)" >&2
14+
echo "[webapp] or snomed_terms_data.tar.gz via a Docker volume:" >&2
15+
echo "[webapp] -v /your/data:/usr/src/app/server/data" >&2
16+
exit 1
17+
fi
18+
fi
19+
20+
# ── Step 2 (optional): generate random patient data ───────────────────────────
21+
# Set RANDOM_DATA=true in the container environment to generate synthetic data.
22+
if [ "${RANDOM_DATA}" = "true" ]; then
23+
echo "[webapp] Generating random demo patient data..."
24+
node --max-old-space-size=32768 /usr/src/app/server/gen_random_data.js
25+
fi
26+
27+
# ── Step 3: start the server ──────────────────────────────────────────────────
28+
exec node --max-old-space-size=32768 server.js
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// Use this script to generate random data for the app
2+
// Run this script with the command:
3+
// node --max-old-space-size=32768 gen_random_data.js
4+
console.log('Generating random data')
5+
const fs = require('fs');
6+
const snomed_terms = require('./data/snomed_terms.json');
7+
8+
// Returns a random integer between min (inclusive) and max (inclusive).
9+
function random_int(min, max) {
10+
min = Math.ceil(min);
11+
max = Math.floor(max);
12+
return Math.floor(Math.random() * (max - min + 1)) + min;
13+
}
14+
15+
const sex_id2code = ['Male', 'Female', 'Unknown']
16+
const eth_id2code = ['Asian', 'Black', 'White', 'Mixed', 'Other', 'Unknown'];
17+
let ptt2age = {};
18+
let ptt2sex = {};
19+
let ptt2eth = {};
20+
let ptt2dod = {};
21+
let cui2ptt_pos = {};
22+
let cui2ptt_tsp = {};
23+
24+
let ptt_num = 100000;
25+
let max_ptt = 1000; // max. number of ptt a term can have
26+
let max_age = 100;
27+
let die_pct = 10; // percentage of died ptt = 1 / die_pct
28+
29+
// generate ptt_num random patient data
30+
for (let i=0;i<ptt_num;i++) {
31+
ptt2age[i] = random_int(0,max_age);
32+
ptt2sex[i] = sex_id2code[random_int(0,sex_id2code.length-1)];
33+
ptt2eth[i] = eth_id2code[random_int(0,eth_id2code.length-1)];
34+
ptt2dod[i] = random_int(0,die_pct) == 0 ? random_int(Math.floor(Date.now()/1000) - (60*60*24*365*10), Math.floor(Date.now()/1000)) : 0;
35+
if (i%100000 == 0) console.log('ptt:', i, `${Math.floor((i/ptt_num)*100)}%`);
36+
}
37+
38+
// for each snomed terms, generate some random mention data
39+
for (let i=0;i<snomed_terms.length;i++) {
40+
if (snomed_terms[i]['str'].search('(disorder)')==-1 && snomed_terms[i]['str'].search('(finding)')==-1 &&
41+
snomed_terms[i]['str'].search('(procedure)')==-1 && snomed_terms[i]['str'].search('(substance)')==-1)
42+
continue;
43+
let picked = {};
44+
cui2ptt_pos[i] = {};
45+
cui2ptt_tsp[i] = {};
46+
for (let j=0;j<random_int(0,max_ptt);j++) {
47+
let ptt = random_int(0, ptt_num-1);
48+
while (picked[ptt]) ptt = random_int(0, ptt_num-1);
49+
picked[ptt] = true;
50+
cui2ptt_pos[i][ptt] = random_int(1,100);
51+
cui2ptt_tsp[i][ptt] = random_int(Math.floor(Date.now()/1000) - (60*60*24*365*10), Math.floor(Date.now()/1000));
52+
}
53+
if (i%100000 == 0) console.log('men:', i, `${Math.floor((i/snomed_terms.length)*100)}%`);
54+
}
55+
56+
// write to files
57+
console.log('Writing to files...')
58+
fs.writeFileSync('data/ptt2age.json', JSON.stringify(ptt2age));
59+
fs.writeFileSync('data/ptt2sex.json', JSON.stringify(ptt2sex));
60+
fs.writeFileSync('data/ptt2eth.json', JSON.stringify(ptt2eth));
61+
fs.writeFileSync('data/ptt2dod.json', JSON.stringify(ptt2dod));
62+
const pos_out = fs.createWriteStream('data/cui2ptt_pos.jsonl', {flags: 'w'});
63+
const tsp_out = fs.createWriteStream('data/cui2ptt_tsp.jsonl', {flags: 'w'});
64+
Object.keys(cui2ptt_pos).forEach( k => { pos_out.write(`{"${snomed_terms[k]['cui']}":` + JSON.stringify(cui2ptt_pos[k]) + '}\n'); });
65+
Object.keys(cui2ptt_tsp).forEach( k => { tsp_out.write(`{"${snomed_terms[k]['cui']}":` + JSON.stringify(cui2ptt_tsp[k]) + '}\n'); });
66+
console.log('Finished generating random data')

cogstack-cohorter/WebAPP/server/server.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ app.use(session({
2828
}))
2929

3030

31-
const NL2DSL_SERVER = process.env.NL2DSL_SERVER || "http://localhost:4000/api/compile";
31+
const NL2DSL_SERVER = process.env.NL2DSL_SERVER || "http://localhost:3002/api/compile";
3232

3333
let port = process.env.PORT || 3000;
3434
console.log('Loading data...');
@@ -181,7 +181,7 @@ for (let i=0;i<all_ptt_cnt;i++) ptt2cui_tsp_arr[i] = {};
181181
}
182182
console.log('Finished reading cui2ptt_tsp');
183183
console.timeEnd('cui2ptt_tsp');
184-
console.log('Loading data...Finsihed');
184+
console.log('Finished loading data');
185185
console.log(`Access the app on port ${port}`);
186186
})();
187187
//========================================================

cogstack-cohorter/docker-compose.yml

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,22 @@ services:
88
- ollama:/root/.ollama
99
restart: unless-stopped
1010

11+
ollama-init:
12+
image: ollama/ollama:latest
13+
container_name: ollama-pull
14+
entrypoint: >
15+
sh -c "
16+
until ollama list > /dev/null 2>&1; do
17+
echo '[ollama-pull] Waiting for Ollama...'; sleep 2;
18+
done &&
19+
ollama pull ${OLLAMA_MODEL:-gpt-oss:20b}
20+
"
21+
environment:
22+
OLLAMA_HOST: "http://ollama:11434"
23+
depends_on:
24+
- ollama
25+
restart: "no"
26+
1127
medcat:
1228
image: cogstacksystems/medcat-service:latest
1329
container_name: cohorter-medcat
@@ -32,20 +48,25 @@ services:
3248
MEDCAT_URL: "http://cohorter-medcat:5000"
3349
ALLOW_ORIGINS: "*"
3450
depends_on:
35-
- ollama
51+
- ollama-init
3652
- medcat
3753
restart: unless-stopped
3854

3955
webapp:
4056
build:
4157
context: ./WebAPP
42-
args:
43-
random: "false" # set to "true" to run gen_random_data.js during build
4458
container_name: cohorter-webapp
4559
ports:
4660
- "3000:3000"
4761
environment:
48-
NL2DSL_URL: "http://cohorter-nl2dsl:3002"
62+
NL2DSL_SERVER: "http://cohorter-nl2dsl:3002/api/compile"
63+
# Set to "true" to generate synthetic patient data on startup.
64+
RANDOM_DATA: "true"
65+
volumes:
66+
# Mount your data directory here. It should contain either:
67+
# - snomed_terms_data.tar.gz (will be auto-extracted on startup), or
68+
# - pre-extracted files: snomed_terms.json (SNOMED term lookup by Concept Unique Identifier) and cui_pt2ch.json (SNOMED CT ontology hierachy map).
69+
- ./WebAPP/server/data:/usr/src/app/server/data
4970
depends_on:
5071
- medcat
5172
- nl2dsl

0 commit comments

Comments
 (0)