1+ // Use this script to generate random data for the app
2+ // Run this script with the command:
3+ // node --max-old-space-size=32768 gen_random_data.js
4+ console . log ( 'Generating random data' )
5+ const fs = require ( 'fs' ) ;
6+ const snomed_terms = require ( './data/snomed_terms.json' ) ;
7+
8+ // Returns a random integer between min (inclusive) and max (inclusive).
9+ function random_int ( min , max ) {
10+ min = Math . ceil ( min ) ;
11+ max = Math . floor ( max ) ;
12+ return Math . floor ( Math . random ( ) * ( max - min + 1 ) ) + min ;
13+ }
14+
15+ const sex_id2code = [ 'Male' , 'Female' , 'Unknown' ]
16+ const eth_id2code = [ 'Asian' , 'Black' , 'White' , 'Mixed' , 'Other' , 'Unknown' ] ;
17+ let ptt2age = { } ;
18+ let ptt2sex = { } ;
19+ let ptt2eth = { } ;
20+ let ptt2dod = { } ;
21+ let cui2ptt_pos = { } ;
22+ let cui2ptt_tsp = { } ;
23+
24+ let ptt_num = 100000 ;
25+ let max_ptt = 1000 ; // max. number of ptt a term can have
26+ let max_age = 100 ;
27+ let die_pct = 10 ; // percentage of died ptt = 1 / die_pct
28+
29+ // generate ptt_num random patient data
30+ for ( let i = 0 ; i < ptt_num ; i ++ ) {
31+ ptt2age [ i ] = random_int ( 0 , max_age ) ;
32+ ptt2sex [ i ] = sex_id2code [ random_int ( 0 , sex_id2code . length - 1 ) ] ;
33+ ptt2eth [ i ] = eth_id2code [ random_int ( 0 , eth_id2code . length - 1 ) ] ;
34+ ptt2dod [ i ] = random_int ( 0 , die_pct ) == 0 ? random_int ( Math . floor ( Date . now ( ) / 1000 ) - ( 60 * 60 * 24 * 365 * 10 ) , Math . floor ( Date . now ( ) / 1000 ) ) : 0 ;
35+ if ( i % 100000 == 0 ) console . log ( 'ptt:' , i , `${ Math . floor ( ( i / ptt_num ) * 100 ) } %` ) ;
36+ }
37+
38+ // for each snomed terms, generate some random mention data
39+ for ( let i = 0 ; i < snomed_terms . length ; i ++ ) {
40+ if ( snomed_terms [ i ] [ 'str' ] . search ( '(disorder)' ) == - 1 && snomed_terms [ i ] [ 'str' ] . search ( '(finding)' ) == - 1 &&
41+ snomed_terms [ i ] [ 'str' ] . search ( '(procedure)' ) == - 1 && snomed_terms [ i ] [ 'str' ] . search ( '(substance)' ) == - 1 )
42+ continue ;
43+ let picked = { } ;
44+ cui2ptt_pos [ i ] = { } ;
45+ cui2ptt_tsp [ i ] = { } ;
46+ for ( let j = 0 ; j < random_int ( 0 , max_ptt ) ; j ++ ) {
47+ let ptt = random_int ( 0 , ptt_num - 1 ) ;
48+ while ( picked [ ptt ] ) ptt = random_int ( 0 , ptt_num - 1 ) ;
49+ picked [ ptt ] = true ;
50+ cui2ptt_pos [ i ] [ ptt ] = random_int ( 1 , 100 ) ;
51+ cui2ptt_tsp [ i ] [ ptt ] = random_int ( Math . floor ( Date . now ( ) / 1000 ) - ( 60 * 60 * 24 * 365 * 10 ) , Math . floor ( Date . now ( ) / 1000 ) ) ;
52+ }
53+ if ( i % 100000 == 0 ) console . log ( 'men:' , i , `${ Math . floor ( ( i / snomed_terms . length ) * 100 ) } %` ) ;
54+ }
55+
56+ // write to files
57+ console . log ( 'Writing to files...' )
58+ fs . writeFileSync ( 'data/ptt2age.json' , JSON . stringify ( ptt2age ) ) ;
59+ fs . writeFileSync ( 'data/ptt2sex.json' , JSON . stringify ( ptt2sex ) ) ;
60+ fs . writeFileSync ( 'data/ptt2eth.json' , JSON . stringify ( ptt2eth ) ) ;
61+ fs . writeFileSync ( 'data/ptt2dod.json' , JSON . stringify ( ptt2dod ) ) ;
62+ const pos_out = fs . createWriteStream ( 'data/cui2ptt_pos.jsonl' , { flags : 'w' } ) ;
63+ const tsp_out = fs . createWriteStream ( 'data/cui2ptt_tsp.jsonl' , { flags : 'w' } ) ;
64+ Object . keys ( cui2ptt_pos ) . forEach ( k => { pos_out . write ( `{"${ snomed_terms [ k ] [ 'cui' ] } ":` + JSON . stringify ( cui2ptt_pos [ k ] ) + '}\n' ) ; } ) ;
65+ Object . keys ( cui2ptt_tsp ) . forEach ( k => { tsp_out . write ( `{"${ snomed_terms [ k ] [ 'cui' ] } ":` + JSON . stringify ( cui2ptt_tsp [ k ] ) + '}\n' ) ; } ) ;
66+ console . log ( 'Finished generating random data' )
0 commit comments