|
1 | 1 | package com.baeldung.encoding; |
2 | 2 |
|
| 3 | +import java.io.BufferedReader; |
3 | 4 | import java.io.IOException; |
| 5 | +import java.io.InputStreamReader; |
| 6 | +import java.io.Reader; |
| 7 | +import java.nio.charset.*; |
| 8 | +import java.nio.file.Path; |
| 9 | +import java.nio.file.Paths; |
| 10 | +import java.util.ArrayList; |
| 11 | +import java.util.Arrays; |
| 12 | +import java.util.List; |
4 | 13 |
|
5 | 14 | import org.junit.Assert; |
6 | 15 | import org.junit.Test; |
| 16 | +import org.junit.jupiter.api.Assertions; |
| 17 | + |
| 18 | +import static java.nio.file.Files.newInputStream; |
7 | 19 |
|
8 | 20 | public class CharacterEncodingExamplesUnitTest { |
9 | 21 |
|
@@ -58,4 +70,47 @@ public void givenCharacterCh_whenConvertedtoBinaryWithEncodingUTF32_thenProduceR |
58 | 70 | "0 0 10001010 10011110 "); |
59 | 71 | } |
60 | 72 |
|
| 73 | + @Test |
| 74 | + public void givenUTF8String_decodeByUS_ASCII_ReplaceMalformedInputSequence() throws IOException { |
| 75 | + String input = "The façade pattern is a software design pattern."; |
| 76 | + Assertions.assertEquals(CharacterEncodingExamples.decodeText(input, StandardCharsets.US_ASCII, CodingErrorAction.REPLACE), |
| 77 | + "The fa��ade pattern is a software design pattern."); |
| 78 | + } |
| 79 | + |
| 80 | + @Test |
| 81 | + public void givenUTF8String_decodeByUS_ASCII_IgnoreMalformedInputSequence() throws IOException { |
| 82 | + String input = "The façade pattern is a software design pattern."; |
| 83 | + Assertions.assertEquals( |
| 84 | + CharacterEncodingExamples.decodeText(input, StandardCharsets.US_ASCII, CodingErrorAction.IGNORE), |
| 85 | + "The faade pattern is a software design pattern."); |
| 86 | + } |
| 87 | + |
| 88 | + @Test |
| 89 | + public void givenUTF8String_decodeByUS_ASCII_ReportMalformedInputSequence() { |
| 90 | + String input = "The façade pattern is a software design pattern."; |
| 91 | + Assertions.assertThrows(MalformedInputException.class, |
| 92 | + () -> CharacterEncodingExamples.decodeText(input, StandardCharsets.US_ASCII, CodingErrorAction.REPORT)); |
| 93 | + } |
| 94 | + |
| 95 | + @Test |
| 96 | + public void givenTextFile_FindSuitableCandidateEncodings() { |
| 97 | + Path path = Paths.get("src/test/resources/encoding.txt"); |
| 98 | + List<Charset> allCandidateCharSets = Arrays.asList(StandardCharsets.US_ASCII, StandardCharsets.UTF_8, StandardCharsets.ISO_8859_1); |
| 99 | + List<Charset> suitableCharsets = new ArrayList<>(); |
| 100 | + allCandidateCharSets.forEach(charset -> { |
| 101 | + try { |
| 102 | + CharsetDecoder charsetDecoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPORT); |
| 103 | + Reader reader = new InputStreamReader(newInputStream(path), charsetDecoder); |
| 104 | + BufferedReader bufferedReader = new BufferedReader(reader); |
| 105 | + while (bufferedReader.readLine() != null) { |
| 106 | + } |
| 107 | + suitableCharsets.add(charset); |
| 108 | + } catch (MalformedInputException ignored) { |
| 109 | + } catch (IOException ex) { |
| 110 | + ex.printStackTrace(); |
| 111 | + } |
| 112 | + }); |
| 113 | + Assertions.assertEquals(suitableCharsets, Arrays.asList(StandardCharsets.UTF_8, StandardCharsets.ISO_8859_1)); |
| 114 | + } |
| 115 | + |
61 | 116 | } |
0 commit comments