|
1 | 1 | import java.net.URL; |
2 | 2 | import java.util.Scanner; |
3 | | -import java.util.TreeMap; |
4 | | -import java.util.TreeSet; |
5 | 3 | import java.io.IOException; |
6 | | -import java.io.PrintWriter; |
7 | | -import java.io.File; |
| 4 | +import java.io.BufferedInputStream; |
| 5 | +import java.io.FileOutputStream; |
8 | 6 |
|
9 | 7 | public class DownloadDefinitions { |
10 | | - static void printStringArrayCode(Iterable<String> toPrint, String name, PrintWriter out) { |
11 | | - out.println(" private static final String[] "+name+" ="); |
12 | | - char before = '{'; |
13 | | - for(String val : toPrint) { |
14 | | - out.println(" "+before+'"'+val+'"'); |
15 | | - before = ','; |
16 | | - } |
17 | | - out.println(" };"); |
18 | | - } |
19 | | - static void printBooleanArrayCode(Iterable<Boolean> toPrint, String name, PrintWriter out) { |
20 | | - out.println(" private static final boolean[] "+name+" ="); |
21 | | - char before = '{'; |
22 | | - for(boolean val : toPrint) { |
23 | | - out.println(" "+before+val); |
24 | | - before = ','; |
| 8 | + |
| 9 | + private static void download(String urlname, String filename) { |
| 10 | + try { |
| 11 | + URL url = new URL(urlname); |
| 12 | + try(BufferedInputStream bis = new BufferedInputStream(url.openStream())) { |
| 13 | + try(FileOutputStream fos = new FileOutputStream(filename)) { |
| 14 | + byte[] buffer = new byte[1024]; |
| 15 | + int count=0; |
| 16 | + while((count = bis.read(buffer,0,buffer.length)) != -1) { |
| 17 | + fos.write(buffer, 0, count); |
| 18 | + } |
| 19 | + } |
| 20 | + } |
| 21 | + } catch (IOException ex) { |
| 22 | + System.err.println("Unable to download\n from: "+urlname+ "\n to: "+filename+"\n "+ex); |
25 | 23 | } |
26 | | - out.println(" };"); |
27 | 24 | } |
28 | | - static void printStringArrayArrayCode(Iterable<? extends Iterable<String>> toPrint, String name, PrintWriter out) { |
29 | | - out.println(" private static final String[][] "+name+" ="); |
30 | | - char before = '{'; |
31 | | - for(Iterable<String> val : toPrint) { |
32 | | - out.println(" "+before+"{\""+String.join("\",\"", val)+"\"}"); |
33 | | - before = ','; |
| 25 | + |
| 26 | + private static void downloadIANALanguageSubtagRegistery(String filename) { |
| 27 | + try { |
| 28 | + URL url = new URL("https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry"); |
| 29 | + Scanner from = new Scanner(url.openStream()); |
| 30 | + try(FileOutputStream fos = new FileOutputStream(filename)) { |
| 31 | + boolean isLang = false; |
| 32 | + String tag = null; |
| 33 | + while(from.hasNextLine()) { |
| 34 | + String line = from.nextLine(); |
| 35 | + if (line.startsWith("Type: ")) isLang = line.equals("Type: language"); |
| 36 | + else if (line.startsWith("Subtag: ")) tag = line.substring(8); |
| 37 | + else if (isLang && line.startsWith("Description: ")) { |
| 38 | + String key = line.substring(13); |
| 39 | + fos.write((key+"\t"+tag+"\n").getBytes("UTF-8")); |
| 40 | + } |
| 41 | + } |
| 42 | + } |
| 43 | + } catch (IOException ex) { |
| 44 | + System.err.println("Unable to download\n from: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry\n to: "+filename+"\n "+ex); |
34 | 45 | } |
35 | | - out.println(" };"); |
36 | 46 | } |
37 | 47 |
|
38 | | - |
39 | | - static TreeMap<String,String> readTSV(Scanner src) { |
40 | | - TreeMap<String,String> ans = new TreeMap<String,String>(); |
41 | | - while(src.hasNext()) { |
42 | | - String line = src.nextLine(); |
43 | | - ans.put(line.replaceAll("\t[^\t]*$",""), line.replaceAll("[^\t]*\t","")); |
44 | | - } |
45 | | - return ans; |
46 | | - } |
47 | | - static void addTags(TreeMap<String,String> src, TreeMap<String,String> dst) { |
48 | | - for(String key : src.keySet()) { |
49 | | - String tag = key.split("\t")[1]; |
50 | | - String val = src.get(key); |
51 | | - String old = dst.get(val); |
52 | | - if (old != null && !tag.equals(old)) |
53 | | - throw new RuntimeException("ERROR: uri "+val+" has multiple tags\n\t- "+old+"\n\t- "+tag); |
54 | | - else if (old == null) dst.put(val, tag); |
55 | | - } |
56 | | - } |
57 | 48 |
|
58 | | - public static void main(String[] args) throws IOException { |
59 | | - try (PrintWriter dest = new PrintWriter(new File("edu/virginia/ged5to7/GedcomDefinitions.java"))) { |
60 | | - dest.println("/* WARNING: This file is automatically generated and should not be edited by hand */"); |
61 | | - dest.println("package edu.virginia.ged5to7;"); |
62 | | - dest.println("import static java.util.Arrays.binarySearch;"); |
63 | | - dest.println("\n/** A container for the substructure, payload, and enumeration rules from gedcom.io */"); |
64 | | - dest.println("public class GedcomDefinitions {"); |
65 | | - |
66 | | - |
67 | | - Scanner s; |
68 | | - TreeMap<String,String> known; |
69 | | - TreeMap<String,String> tagOf = new TreeMap<String,String>(); |
70 | | - |
71 | | - // enumerations |
72 | | - s = new Scanner(new URL("https://github.com/FamilySearch/GEDCOM/raw/main/extracted-files/enumerations.tsv").openStream()); |
73 | | - known = readTSV(s); |
74 | | - printStringArrayCode(known.keySet(), "enumKeys", dest); |
75 | | - printStringArrayCode(known.values(), "enumVals", dest); |
76 | | - printStringArrayCode(new TreeSet<String>(known.values()), "enumSet", dest); |
77 | | - addTags(known, tagOf); |
78 | | - |
79 | | - dest.println(" /** Looks up the URI of an enumeration based on the GEDCOM 7 spec"); |
80 | | - dest.println(" * @param ctx the URI of the containing structure."); |
81 | | - dest.println(" * use <code>null</code> for an extension."); |
82 | | - dest.println(" * @param tag the enumeration value"); |
83 | | - dest.println(" * @return the URI of the enumeration value, or <code>null</code> if unknown"); |
84 | | - dest.println(" */"); |
85 | | - dest.println(" public static String enumURI(String ctx, String tag) {"); |
86 | | - dest.println(" if (ctx == null) {"); |
87 | | - dest.println(" String val = \"https://gedcom.io/terms/v7/\"+tag;"); |
88 | | - dest.println(" int idx = binarySearch(enumSet, val);"); |
89 | | - dest.println(" if (idx < 0) return null;"); |
90 | | - dest.println(" return enumSet[idx];"); |
91 | | - dest.println(" } else {"); |
92 | | - dest.println(" String key = ctx+'\\t'+tag;"); |
93 | | - dest.println(" int idx = binarySearch(enumKeys, key);"); |
94 | | - dest.println(" if (idx < 0) return null;"); |
95 | | - dest.println(" return enumVals[idx];"); |
96 | | - dest.println(" }"); |
97 | | - dest.println(" }"); |
98 | | - dest.println(" public static boolean isStdEnum(String uri) {"); |
99 | | - dest.println(" return binarySearch(enumSet, uri) >= 0;"); |
100 | | - dest.println(" }"); |
101 | | - |
102 | | - dest.println(); |
103 | | - |
104 | | - // substructures |
105 | | - s = new Scanner(new URL("https://github.com/FamilySearch/GEDCOM/raw/main/extracted-files/substructures.tsv").openStream()); |
106 | | - known = readTSV(s); |
107 | | - known.put("\tHEAD", "HEAD pseudostructure"); //// HARD-CODE based on substructures.tsv implementation |
108 | | - printStringArrayCode(known.keySet(), "structKeys", dest); |
109 | | - printStringArrayCode(known.values(), "structVals", dest); |
110 | | - printStringArrayCode(new TreeSet<String>(known.values()), "structSet", dest); |
111 | | - addTags(known, tagOf); |
112 | | - |
113 | | - dest.println(" /** Looks up the URI of an structure type based on the GEDCOM 7 spec"); |
114 | | - dest.println(" * @param ctx the URI of the containing structure type"); |
115 | | - dest.println(" * use <code>\"\"</code> for a record and <code>null</code> for an extension."); |
116 | | - dest.println(" * @param tag the tag of the structure"); |
117 | | - dest.println(" * @return the URI of the structure type, or <code>null</code> if unknown"); |
118 | | - dest.println(" */"); |
119 | | - dest.println(" public static String structURI(String ctx, String tag) {"); |
120 | | - dest.println(" if (ctx == null) {"); |
121 | | - dest.println(" String val = \"https://gedcom.io/terms/v7/\"+tag;"); |
122 | | - dest.println(" int idx = binarySearch(structSet, val);"); |
123 | | - dest.println(" if (idx < 0) return null;"); |
124 | | - dest.println(" return structSet[idx];"); |
125 | | - dest.println(" } else {"); |
126 | | - dest.println(" String key = ctx+'\\t'+tag;"); |
127 | | - dest.println(" int idx = binarySearch(structKeys, key);"); |
128 | | - dest.println(" if (idx < 0) return null;"); |
129 | | - dest.println(" return structVals[idx];"); |
130 | | - dest.println(" }"); |
131 | | - dest.println(" }"); |
132 | | - dest.println(" public static boolean isStdStruct(String uri) {"); |
133 | | - dest.println(" return binarySearch(structSet, uri) >= 0;"); |
134 | | - dest.println(" }"); |
135 | | - |
136 | | - printStringArrayCode(tagOf.keySet(), "tagKeys", dest); |
137 | | - printStringArrayCode(tagOf.values(), "tagVals", dest); |
138 | | - |
139 | | - dest.println(" /** Looks up the tag of a structure URI based on the GEDCOM 7 spec"); |
140 | | - dest.println(" * @param uri the URI of the structure type"); |
141 | | - dest.println(" * @return the tag of the structure type, or <code>null</code> if unknown"); |
142 | | - dest.println(" */"); |
143 | | - dest.println(" public static String structTag(String uri) {"); |
144 | | - dest.println(" if (uri == null) return null;"); |
145 | | - dest.println(" int idx = binarySearch(tagKeys, uri);"); |
146 | | - dest.println(" if (idx < 0) return null;"); |
147 | | - dest.println(" return tagVals[idx];"); |
148 | | - dest.println(" }"); |
149 | | - |
150 | | - dest.println(); |
151 | | - |
152 | | - // payloads |
153 | | - s = new Scanner(new URL("https://github.com/FamilySearch/GEDCOM/raw/main/extracted-files/payloads.tsv").openStream()); |
154 | | - known = readTSV(s); |
155 | | - printStringArrayCode(known.keySet(), "payloadKeys", dest); |
156 | | - printStringArrayCode(known.values(), "payloadVals", dest); |
157 | | - |
158 | | - dest.println(" /** Looks up the payload type of a structure based on the GEDCOM 7 spec"); |
159 | | - dest.println(" * @param ctx the URI of the containing structure type"); |
160 | | - dest.println(" * @return the type code (URI or <code>\"Y|<NULL>\"</code> or <code>\"\"</code> or <code>\"@XREF:</code>tag<code>\"</code>) of the payload type, or <code>null</code> if unknown"); |
161 | | - dest.println(" */"); |
162 | | - dest.println(" public static String payloadURI(String ctx) {"); |
163 | | - dest.println(" if (ctx == null) return null;"); |
164 | | - dest.println(" int idx = binarySearch(payloadKeys, ctx);"); |
165 | | - dest.println(" if (idx < 0) return null;"); |
166 | | - dest.println(" return payloadVals[idx];"); |
167 | | - dest.println(" }"); |
168 | | - |
169 | | - dest.println(); |
170 | | - |
171 | | - // cardinalities |
172 | | - s = new Scanner(new URL("https://github.com/FamilySearch/GEDCOM/raw/main/extracted-files/cardinalities.tsv").openStream()); |
173 | | - known = readTSV(s); |
174 | | - TreeMap<String, TreeSet<String>> required = new TreeMap<String,TreeSet<String>>(); |
175 | | - TreeMap<String, Boolean> singular = new TreeMap<String,Boolean>(); |
176 | | - known.forEach((k,v) -> { |
177 | | - if (v.charAt(1) == '1') { |
178 | | - String[] k2 = k.split("\t"); |
179 | | - required.putIfAbsent(k2[0], new TreeSet<String>()); |
180 | | - required.get(k2[0]).add(k2[1]); |
181 | | - } |
182 | | - singular.put(k, v.charAt(3) == '1'); |
183 | | - }); |
184 | | - printStringArrayCode(required.keySet(), "reqKeys", dest); |
185 | | - printStringArrayArrayCode(required.values(), "reqVals", dest); |
186 | | - printStringArrayCode(singular.keySet(), "singleKeys", dest); |
187 | | - printBooleanArrayCode(singular.values(), "singleVals", dest); |
188 | | - |
189 | | - dest.println(" public static String[] requiredSubstructures(String struct) {"); |
190 | | - dest.println(" if (struct == null) return new String[0];"); |
191 | | - dest.println(" int idx = binarySearch(reqKeys, struct);"); |
192 | | - dest.println(" if (idx < 0) return new String[0];"); |
193 | | - dest.println(" return reqVals[idx];"); |
194 | | - dest.println(" }"); |
195 | | - |
196 | | - dest.println(" public static boolean justOne(String ctx, String uri) {"); |
197 | | - dest.println(" if (ctx == null) return false;"); |
198 | | - dest.println(" String key = ctx+'\\t'+uri;"); |
199 | | - dest.println(" int idx = binarySearch(singleKeys, key);"); |
200 | | - dest.println(" if (idx < 0) return false;"); |
201 | | - dest.println(" return singleVals[idx];"); |
202 | | - dest.println(" }"); |
203 | | - |
204 | | - |
205 | | - |
206 | | - dest.println(); |
207 | | - |
208 | | - |
209 | | - // FHISO's language mapping |
210 | | - s = new Scanner(new URL("https://github.com/fhiso/legacy-format/raw/master/languages.tsv").openStream()); |
211 | | - known = readTSV(s); |
212 | | - printStringArrayCode(known.keySet(), "langKeys", dest); |
213 | | - printStringArrayCode(known.values(), "langVals", dest); |
214 | | - dest.println(" /** Looks up the language tag type of a language based ELF's mapping"); |
215 | | - dest.println(" * @param lang the 5.5.1 language name"); |
216 | | - dest.println(" * @return the BCP-47 language tag, or <code>null</code> if unknown"); |
217 | | - dest.println(" */"); |
218 | | - dest.println(" public static String langTag(String ctx) {"); |
219 | | - dest.println(" if (ctx == null) return null;"); |
220 | | - dest.println(" int idx = binarySearch(langKeys, ctx, String.CASE_INSENSITIVE_ORDER);"); |
221 | | - dest.println(" if (idx < 0) return null;"); |
222 | | - dest.println(" return langVals[idx].replace(\"*\",\"\");"); |
223 | | - dest.println(" }"); |
224 | | - |
225 | | - dest.println("}"); |
226 | | - } |
| 49 | + public static void main(String[] args) { |
| 50 | + download( |
| 51 | + "https://github.com/FamilySearch/GEDCOM/raw/main/extracted-files/enumerations.tsv", |
| 52 | + "edu/virginia/ged5to7/config/enumerations.tsv" |
| 53 | + ); |
| 54 | + download( |
| 55 | + "https://github.com/FamilySearch/GEDCOM/raw/main/extracted-files/payloads.tsv", |
| 56 | + "edu/virginia/ged5to7/config/payloads.tsv" |
| 57 | + ); |
| 58 | + download( |
| 59 | + "https://github.com/FamilySearch/GEDCOM/raw/main/extracted-files/substructures.tsv", |
| 60 | + "edu/virginia/ged5to7/config/substructures.tsv" |
| 61 | + ); |
| 62 | + download( |
| 63 | + "https://github.com/FamilySearch/GEDCOM/raw/main/extracted-files/cardinalities.tsv", |
| 64 | + "edu/virginia/ged5to7/config/cardinalities.tsv" |
| 65 | + ); |
| 66 | + download( |
| 67 | + "https://github.com/fhiso/legacy-format/raw/master/languages.tsv", |
| 68 | + "edu/virginia/ged5to7/config/languages.tsv" |
| 69 | + ); |
| 70 | + downloadIANALanguageSubtagRegistery( |
| 71 | + "edu/virginia/ged5to7/config/all-languages.tsv" |
| 72 | + ); |
227 | 73 | } |
228 | 74 | } |
0 commit comments