11import java .net .URL ;
22import java .util .Scanner ;
3- import java .util .ArrayList ;
43import java .util .TreeMap ;
4+ import java .util .TreeSet ;
55import java .io .IOException ;
66import java .io .PrintWriter ;
77import java .io .File ;
88
99public class DownloadDefinitions {
10+ static void printStringArrayCode (Iterable <String > toPrint , String name , PrintWriter out ) {
11+ out .println (" private static final String[] " +name +" =" );
12+ char before = '{' ;
13+ for (String val : toPrint ) {
14+ out .println (" " +before +'"' +val +'"' );
15+ before = ',' ;
16+ }
17+ out .println (" };" );
18+ }
19+ static TreeMap <String ,String > readTSV (Scanner src ) {
20+ TreeMap <String ,String > ans = new TreeMap <String ,String >();
21+ while (src .hasNext ()) {
22+ String line = src .nextLine ();
23+ ans .put (line .replaceAll ("\t [^\t ]*$" ,"" ), line .replaceAll ("[^\t ]*\t " ,"" ));
24+ }
25+ return ans ;
26+ }
27+ static void addTags (TreeMap <String ,String > src , TreeMap <String ,String > dst ) {
28+ for (String key : src .keySet ()) {
29+ String tag = key .split ("\t " )[1 ];
30+ String val = src .get (key );
31+ String old = dst .get (val );
32+ if (old != null && !tag .equals (old ))
33+ throw new RuntimeException ("ERROR: uri " +val +" has multiple tags\n \t - " +old +"\n \t - " +tag );
34+ else if (old == null ) dst .put (val , tag );
35+ }
36+ }
37+
1038 public static void main (String [] args ) throws IOException {
1139 try (PrintWriter dest = new PrintWriter (new File ("edu/virginia/ged5to7/GedcomDefinitions.java" ))) {
1240 dest .println ("/* WARNING: This file is automatically generated and should not be edited by hand */" );
@@ -17,28 +45,17 @@ public static void main(String[] args) throws IOException {
1745
1846
1947 Scanner s ;
20- char before ;
21- ArrayList <String > lines ;
48+ TreeMap < String , String > known ;
49+ TreeMap <String , String > tagOf = new TreeMap < String , String >() ;
2250
2351 // enumerations
2452 s = new Scanner (new URL ("https://github.com/FamilySearch/GEDCOM/raw/main/extracted-files/enumerations.tsv" ).openStream ());
25- lines = new ArrayList <String >();
26- while (s .hasNext ()) lines .add (s .nextLine ());
27- lines .sort (null );
28- dest .println (" private static final String[] enumKeys =" );
29- before = '{' ;
30- for (String line : lines ) {
31- dest .println (" " +before +'"' +line .replaceAll ("\t [^\t ]*$" ,"\" " ));
32- before = ',' ;
33- }
34- dest .println (" };" );
35- dest .println (" private static final String[] enumVals =" );
36- before = '{' ;
37- for (String line : lines ) {
38- dest .println (" " +before +line .replaceAll (".*\t " ,"\" " )+"\" " );
39- before = ',' ;
40- }
41- dest .println (" };" );
53+ known = readTSV (s );
54+ printStringArrayCode (known .keySet (), "enumKeys" , dest );
55+ printStringArrayCode (known .values (), "enumVals" , dest );
56+ printStringArrayCode (new TreeSet <String >(known .values ()), "enumSet" , dest );
57+ addTags (known , tagOf );
58+
4259 dest .println (" /** Looks up the URI of an enumeration based on the GEDCOM 7 spec" );
4360 dest .println (" * @param ctx the URI of the containing structure." );
4461 dest .println (" * use <code>null</code> for an extension." );
@@ -48,38 +65,31 @@ public static void main(String[] args) throws IOException {
4865 dest .println (" public static String enumURI(String ctx, String tag) {" );
4966 dest .println (" if (ctx == null) {" );
5067 dest .println (" String val = \" https://gedcom.io/terms/v7/\" +tag;" );
51- dest .println (" int idx = binarySearch(enumVals , val);" );
68+ dest .println (" int idx = binarySearch(enumSet , val);" );
5269 dest .println (" if (idx < 0) return null;" );
53- dest .println (" return enumVals [idx];" );
70+ dest .println (" return enumSet [idx];" );
5471 dest .println (" } else {" );
5572 dest .println (" String key = ctx+'\\ t'+tag;" );
5673 dest .println (" int idx = binarySearch(enumKeys, key);" );
5774 dest .println (" if (idx < 0) return null;" );
5875 dest .println (" return enumVals[idx];" );
5976 dest .println (" }" );
6077 dest .println (" }" );
78+ dest .println (" public static boolean isStdEnum(String uri) {" );
79+ dest .println (" return binarySearch(enumSet, uri) >= 0;" );
80+ dest .println (" }" );
6181
6282 dest .println ();
6383
6484 // substructures
6585 s = new Scanner (new URL ("https://github.com/FamilySearch/GEDCOM/raw/main/extracted-files/substructures.tsv" ).openStream ());
66- lines = new ArrayList <String >();
67- while (s .hasNext ()) lines .add (s .nextLine ());
68- lines .sort (null );
69- dest .println (" private static final String[] structKeys =" );
70- before = '{' ;
71- for (String line : lines ) {
72- dest .println (" " +before +'"' +line .replaceAll ("\t [^\t ]*$" ,"\" " ));
73- before = ',' ;
74- }
75- dest .println (" };" );
76- dest .println (" private static final String[] structVals =" );
77- before = '{' ;
78- for (String line : lines ) {
79- dest .println (" " +before +line .replaceAll (".*\t " ,"\" " )+"\" " );
80- before = ',' ;
81- }
82- dest .println (" };" );
86+ known = readTSV (s );
87+ known .put ("\t HEAD" , "HEAD pseudostructure" ); //// HARD-CODE based on substructures.tsv implementation
88+ printStringArrayCode (known .keySet (), "structKeys" , dest );
89+ printStringArrayCode (known .values (), "structVals" , dest );
90+ printStringArrayCode (new TreeSet <String >(known .values ()), "structSet" , dest );
91+ addTags (known , tagOf );
92+
8393 dest .println (" /** Looks up the URI of an structure type based on the GEDCOM 7 spec" );
8494 dest .println (" * @param ctx the URI of the containing structure type" );
8595 dest .println (" * use <code>\" \" </code> for a record and <code>null</code> for an extension." );
@@ -89,70 +99,42 @@ public static void main(String[] args) throws IOException {
8999 dest .println (" public static String structURI(String ctx, String tag) {" );
90100 dest .println (" if (ctx == null) {" );
91101 dest .println (" String val = \" https://gedcom.io/terms/v7/\" +tag;" );
92- dest .println (" int idx = binarySearch(structVals , val);" );
102+ dest .println (" int idx = binarySearch(structSet , val);" );
93103 dest .println (" if (idx < 0) return null;" );
94- dest .println (" return structVals [idx];" );
104+ dest .println (" return structSet [idx];" );
95105 dest .println (" } else {" );
96106 dest .println (" String key = ctx+'\\ t'+tag;" );
97107 dest .println (" int idx = binarySearch(structKeys, key);" );
98108 dest .println (" if (idx < 0) return null;" );
99109 dest .println (" return structVals[idx];" );
100110 dest .println (" }" );
101111 dest .println (" }" );
112+ dest .println (" public static boolean isStdStruct(String uri) {" );
113+ dest .println (" return binarySearch(structSet, uri) >= 0;" );
114+ dest .println (" }" );
102115
103- // structure types -- uses same file and substructures above
104- TreeMap <String ,String > knownStructs = new TreeMap <String , String >();
105- for (String line : lines ) {
106- knownStructs .put (line .replaceAll (".*\t " ,"" ), line .replaceAll ("^[^\t ]*\t |\t [^\t ]*$" ,"" ));
107- }
108- dest .println (" private static final String[] knownStructs =" );
109- before = '{' ;
110- for (String uri : knownStructs .keySet ()) {
111- dest .println (" " +before +'"' +uri +'"' );
112- before = ',' ;
113- }
114- dest .println (" };" );
115- dest .println (" private static final String[] uriTag =" );
116- before = '{' ;
117- for (String tag : knownStructs .values ()) {
118- dest .println (" " +before +'"' +tag +'"' );
119- before = ',' ;
120- }
121- dest .println (" };" );
122- dest .println (" /** Looks up the tag of a structure URIbased on the GEDCOM 7 spec" );
116+ printStringArrayCode (tagOf .keySet (), "tagKeys" , dest );
117+ printStringArrayCode (tagOf .values (), "tagVals" , dest );
118+
119+ dest .println (" /** Looks up the tag of a structure URI based on the GEDCOM 7 spec" );
123120 dest .println (" * @param uri the URI of the structure type" );
124121 dest .println (" * @return the tag of the structure type, or <code>null</code> if unknown" );
125122 dest .println (" */" );
126123 dest .println (" public static String structTag(String uri) {" );
127124 dest .println (" if (uri == null) return null;" );
128- dest .println (" int idx = binarySearch(knownStructs , uri);" );
125+ dest .println (" int idx = binarySearch(tagKeys , uri);" );
129126 dest .println (" if (idx < 0) return null;" );
130- dest .println (" return uriTag [idx];" );
127+ dest .println (" return tagVals [idx];" );
131128 dest .println (" }" );
132-
133-
134129
135130 dest .println ();
136131
137132 // payloads
138133 s = new Scanner (new URL ("https://github.com/FamilySearch/GEDCOM/raw/main/extracted-files/payloads.tsv" ).openStream ());
139- lines = new ArrayList <String >();
140- while (s .hasNext ()) lines .add (s .nextLine ());
141- lines .sort (null );
142- dest .println (" private static final String[] payloadKeys =" );
143- before = '{' ;
144- for (String line : lines ) {
145- dest .println (" " +before +'"' +line .replaceAll ("\t [^\t ]*$" ,"\" " ));
146- before = ',' ;
147- }
148- dest .println (" };" );
149- dest .println (" private static final String[] payloadVals =" );
150- before = '{' ;
151- for (String line : lines ) {
152- dest .println (" " +before +line .replaceAll (".*\t " ,"\" " )+"\" " );
153- before = ',' ;
154- }
155- dest .println (" };" );
134+ known = readTSV (s );
135+ printStringArrayCode (known .keySet (), "payloadKeys" , dest );
136+ printStringArrayCode (known .values (), "payloadVals" , dest );
137+
156138 dest .println (" /** Looks up the payload type of a structure based on the GEDCOM 7 spec" );
157139 dest .println (" * @param ctx the URI of the containing structure type" );
158140 dest .println (" * @return the type code (URI or <code>\" Y|<NULL>\" </code> or <code>\" \" </code> or <code>\" @XREF:</code>tag<code>\" </code>) of the payload type, or <code>null</code> if unknown" );
@@ -164,6 +146,25 @@ public static void main(String[] args) throws IOException {
164146 dest .println (" return payloadVals[idx];" );
165147 dest .println (" }" );
166148
149+ // FHISO's language mapping
150+ s = new Scanner (new URL ("https://github.com/fhiso/legacy-format/raw/master/languages.tsv" ).openStream ());
151+ known = readTSV (s );
152+ printStringArrayCode (known .keySet (), "langKeys" , dest );
153+ printStringArrayCode (known .values (), "langVals" , dest );
154+ dest .println (" /** Looks up the language tag type of a language based ELF's mapping" );
155+ dest .println (" * @param lang the 5.5.1 language name" );
156+ dest .println (" * @return the BCP-47 language tag, or <code>null</code> if unknown" );
157+ dest .println (" */" );
158+ dest .println (" public static String langTag(String ctx) {" );
159+ dest .println (" if (ctx == null) return null;" );
160+ dest .println (" int idx = binarySearch(langKeys, ctx, String.CASE_INSENSITIVE_ORDER);" );
161+ dest .println (" if (idx < 0) return null;" );
162+ dest .println (" return langVals[idx].replace(\" *\" ,\" \" );" );
163+ dest .println (" }" );
164+
165+
166+
167+
167168
168169 dest .println ("}" );
169170 }
0 commit comments