Skip to content

Commit c05d215

Browse files
authored
BAEL-5196 - Split a comma-separated string while ignoring commas in quotes (eugenp#11432)
* Creating the module 'core-java-string-operations-4' for new string related code samples. Implemented code samples for the article BAEL-5196 * including new module 'core-java-string-operations-4 in the parent project * fixing spacing in the pom file * fixing the maven configuration for our new project core-java-string-operations-4
1 parent d621f5d commit c05d215

5 files changed

Lines changed: 172 additions & 0 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
### Relevant Articles:
2+
3+
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<modelVersion>4.0.0</modelVersion>
6+
<artifactId>core-java-string-operations-4</artifactId>
7+
<version>0.1.0-SNAPSHOT</version>
8+
<name>core-java-string-operations-4</name>
9+
<packaging>jar</packaging>
10+
11+
<parent>
12+
<groupId>com.baeldung.core-java-modules</groupId>
13+
<artifactId>core-java-modules</artifactId>
14+
<version>0.0.1-SNAPSHOT</version>
15+
<relativePath>../</relativePath>
16+
</parent>
17+
18+
<dependencies>
19+
<dependency>
20+
<groupId>org.assertj</groupId>
21+
<artifactId>assertj-core</artifactId>
22+
<version>${assertj.version}</version>
23+
<scope>test</scope>
24+
</dependency>
25+
<dependency>
26+
<groupId>com.google.guava</groupId>
27+
<artifactId>guava</artifactId>
28+
<version>${guava.version}</version>
29+
</dependency>
30+
<dependency>
31+
<groupId>com.opencsv</groupId>
32+
<artifactId>opencsv</artifactId>
33+
<version>${opencsv.version}</version>
34+
</dependency>
35+
</dependencies>
36+
37+
<build>
38+
<plugins>
39+
<plugin>
40+
<groupId>org.apache.maven.plugins</groupId>
41+
<artifactId>maven-compiler-plugin</artifactId>
42+
<configuration>
43+
<source>${maven.compiler.source}</source>
44+
<target>${maven.compiler.target}</target>
45+
</configuration>
46+
</plugin>
47+
</plugins>
48+
</build>
49+
50+
<properties>
51+
<maven.compiler.source>11</maven.compiler.source>
52+
<maven.compiler.target>11</maven.compiler.target>
53+
<assertj.version>3.6.1</assertj.version>
54+
<guava.version>31.0.1-jre</guava.version>
55+
<opencsv.version>4.1</opencsv.version>
56+
</properties>
57+
58+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package com.baeldung.commaseparatedstring;
2+
3+
import java.io.IOException;
4+
import java.io.StringReader;
5+
import java.util.ArrayList;
6+
import java.util.Arrays;
7+
import java.util.List;
8+
import java.util.regex.Pattern;
9+
10+
import com.google.common.base.Splitter;
11+
import com.opencsv.CSVParser;
12+
import com.opencsv.CSVParserBuilder;
13+
import com.opencsv.CSVReader;
14+
import com.opencsv.CSVReaderBuilder;
15+
16+
public class SplitCommaSeparatedString {
17+
18+
public static List<String> splitWithParser(String input) {
19+
20+
List<String> tokens = new ArrayList<String>();
21+
int startPosition = 0;
22+
boolean isInQuotes = false;
23+
for (int currentPosition = 0; currentPosition < input.length(); currentPosition++) {
24+
if (input.charAt(currentPosition) == '\"') {
25+
isInQuotes = !isInQuotes;
26+
} else if (input.charAt(currentPosition) == ',' && !isInQuotes) {
27+
tokens.add(input.substring(startPosition, currentPosition));
28+
startPosition = currentPosition + 1;
29+
}
30+
}
31+
32+
String lastToken = input.substring(startPosition);
33+
if (lastToken.equals(",")) {
34+
tokens.add("");
35+
} else {
36+
tokens.add(lastToken);
37+
}
38+
39+
return tokens;
40+
}
41+
42+
public static List<String> splitWithRegex(String input) {
43+
String[] tokens = input.split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)", -1);
44+
return Arrays.asList(tokens);
45+
}
46+
47+
public static List<String> splitWithGuava(String input) {
48+
Pattern pattern = Pattern.compile(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)");
49+
Splitter splitter = Splitter.on(pattern);
50+
return splitter.splitToList(input);
51+
}
52+
53+
public static List<String[]> splitMultiLineWithOpenCSV(String input) throws IOException {
54+
CSVParser parser = new CSVParserBuilder().withSeparator(',')
55+
.build();
56+
57+
CSVReader reader = new CSVReaderBuilder(new StringReader(input)).withCSVParser(parser)
58+
.build();
59+
60+
List<String[]> list = new ArrayList<>();
61+
list = reader.readAll();
62+
reader.close();
63+
64+
return list;
65+
}
66+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package com.baeldung.commaseparatedstring;
2+
3+
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitMultiLineWithOpenCSV;
4+
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithGuava;
5+
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithParser;
6+
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithRegex;
7+
import static org.hamcrest.MatcherAssert.assertThat;
8+
import static org.hamcrest.Matchers.contains;
9+
import static org.hamcrest.Matchers.hasSize;
10+
import static org.junit.Assert.assertArrayEquals;
11+
12+
import java.io.IOException;
13+
import java.util.List;
14+
15+
import org.junit.Test;
16+
17+
public class SplitCommaSeparatedStringUnitTest {
18+
19+
@Test
20+
public void givenSingleLineInput_whenParsing_shouldIgnoreCommasInsideDoubleQuotes() {
21+
String input = "baeldung,tutorial,splitting,text,\"ignoring this comma,\"";
22+
23+
var matcher = contains("baeldung", "tutorial", "splitting", "text", "\"ignoring this comma,\"");
24+
assertThat(splitWithParser(input), matcher);
25+
assertThat(splitWithRegex(input), matcher);
26+
assertThat(splitWithGuava(input), matcher);
27+
}
28+
29+
@Test
30+
public void givenMultiLineInput_whenParsing_shouldIgnoreCommasInsideDoubleQuotes() throws IOException {
31+
String input = "baeldung,tutorial,splitting,text,\"ignoring this comma,\"" + System.lineSeparator()
32+
+ "splitting,a,regular,line,no double quotes";
33+
34+
String[] firstLine = new String[]{"baeldung", "tutorial", "splitting", "text", "ignoring this comma,"};
35+
String[] secondLine = new String[]{"splitting", "a", "regular", "line", "no double quotes"};
36+
37+
List<String[]> result = splitMultiLineWithOpenCSV(input);
38+
39+
assertThat(result, hasSize(2));
40+
assertArrayEquals(firstLine, result.get(0));
41+
assertArrayEquals(secondLine, result.get(1));
42+
}
43+
44+
}

pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,7 @@
13041304
<module>core-java-modules/core-java-jpms</module>
13051305
<module>core-java-modules/core-java-os</module>
13061306
<module>core-java-modules/core-java-string-operations-3</module>
1307+
<module>core-java-modules/core-java-string-operations-4</module>
13071308
<module>core-java-modules/core-java-time-measurements</module>
13081309
<module>core-java-modules/multimodulemavenproject</module>
13091310
<module>persistence-modules/sirix</module>

0 commit comments

Comments
 (0)