(window.webpackJsonp=window.webpackJsonp||[]).push([[1599],{2007:function(t,e,a){"use strict";a.r(e);var s=a(31),n=Object(s.a)({},(function(){var t=this,e=t.$createElement,a=t._self._c||e;return a("ContentSlotsDistributor",{attrs:{"slot-key":t.$parent.slotKey}},[a("h1",{attrs:{id:"java-pitfalls-performance-issues"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#java-pitfalls-performance-issues"}},[t._v("#")]),t._v(" Java Pitfalls - Performance Issues")]),t._v(" "),a("p",[t._v('This topic describes a number of "pitfalls" (i.e. mistakes that novice java programmers make) that relate to Java application performance.')]),t._v(" "),a("h2",{attrs:{id:"pitfall-string-concatenation-in-a-loop-does-not-scale"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#pitfall-string-concatenation-in-a-loop-does-not-scale"}},[t._v("#")]),t._v(" Pitfall - String concatenation in a loop does not scale")]),t._v(" "),a("p",[t._v("Consider the following code as an illustration:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("public")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("joinWords")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("List")]),a("span",{pre:!0,attrs:{class:"token generics"}},[a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("<")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(">")])]),t._v(" words"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),t._v(" message "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('""')]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("for")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),t._v(" word "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v(":")]),t._v(" words"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n message "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" message "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("+")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('" "')]),t._v(" "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("+")]),t._v(" word"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("return")]),t._v(" message"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n\n")])])]),a("p",[t._v("Unfortunate this code is inefficient if the "),a("code",[t._v("words")]),t._v(" list is long. The root of the problem is this statement:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[t._v("message "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" message "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("+")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('" "')]),t._v(" "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("+")]),t._v(" word"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n\n")])])]),a("p",[t._v("For each loop iteration, this statement creates a new "),a("code",[t._v("message")]),t._v(" string containing a copy of all characters in the original "),a("code",[t._v("message")]),t._v(" string with extra characters appended to it. This generates a lot of temporary strings, and does a lot of copying.")]),t._v(" "),a("p",[t._v("When we analyse "),a("code",[t._v("joinWords")]),t._v(", assuming that there are N words with an average length of M, we find that O(N) temporary strings are created and O(M.N"),a("sup",[t._v("2")]),t._v(") characters will be copied in the process. The N"),a("sup",[t._v("2")]),t._v(" component is particularly troubling.")]),t._v(" "),a("p",[t._v("The recommended approach for this kind of problem"),a("sup",[t._v("1")]),t._v(" is to use a "),a("code",[t._v("StringBuilder")]),t._v(" instead of string concatenation as follows:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("public")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("joinWords2")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("List")]),a("span",{pre:!0,attrs:{class:"token generics"}},[a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("<")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(">")])]),t._v(" words"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("StringBuilder")]),t._v(" message "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("new")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("StringBuilder")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("for")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),t._v(" word "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v(":")]),t._v(" words"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n message"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("append")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('" "')]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("append")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("word"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("return")]),t._v(" message"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("toString")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n\n")])])]),a("p",[t._v("The analysis of "),a("code",[t._v("joinWords2")]),t._v(' needs to take account of the overheads of "growing" the '),a("code",[t._v("StringBuilder")]),t._v(" backing array that holds the builder's characters. However, it turns out that the number of new objects created is O(logN) and that the number of characters copied is O(M.N) characters. The latter includes characters copied in the final "),a("code",[t._v("toString()")]),t._v(" call.")]),t._v(" "),a("p",[t._v("(It may be possible to tune this further, by creating the "),a("code",[t._v("StringBuilder")]),t._v(" with the correct capacity to start with. However, the overall complexity remains the same.)")]),t._v(" "),a("p",[t._v("Returning to the original "),a("code",[t._v("joinWords")]),t._v(" method, it turns out that the critical statement will be optimized by a typical Java compiler to something like this:")]),t._v(" "),a("div",{staticClass:"language- extra-class"},[a("pre",{pre:!0,attrs:{class:"language-text"}},[a("code",[t._v('\n StringBuilder tmp = new StringBuilder();\n tmp.append(message).append(" ").append(word);\n message = tmp.toString();\n\n')])])]),a("p",[t._v('However, the Java compiler will not "hoist" the '),a("code",[t._v("StringBuilder")]),t._v(" out of the loop, as we did by hand in the code for "),a("code",[t._v("joinWords2")]),t._v(".")]),t._v(" "),a("p",[t._v("Reference:")]),t._v(" "),a("ul",[a("li",[a("a",{attrs:{href:"http://outoffactserror.blogspot.com/2017/03/is-javas-string-operator-in-loop-slow.html",target:"_blank",rel:"noopener noreferrer"}},[t._v("\"Is Java's String '+' operator in a loop slow?\""),a("OutboundLink")],1)])]),t._v(" "),a("p",[a("sup",[t._v("1 - In Java 8 and later, the "),a("code",[t._v("Joiner")]),t._v(" class can be used to solve this particular problem. However, that is not what this example is "),a("strong",[t._v("really supposed to be about")]),t._v(".")])]),t._v(" "),a("h2",{attrs:{id:"pitfall-using-size-to-test-if-a-collection-is-empty-is-inefficient"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#pitfall-using-size-to-test-if-a-collection-is-empty-is-inefficient"}},[t._v("#")]),t._v(" Pitfall - Using size() to test if a collection is empty is inefficient.")]),t._v(" "),a("p",[t._v("The Java Collections Framework provides two related methods for all "),a("code",[t._v("Collection")]),t._v(" objects:")]),t._v(" "),a("ul",[a("li",[a("a",{attrs:{href:"https://docs.oracle.com/javase/8/docs/api/java/util/Collection.html#size--",target:"_blank",rel:"noopener noreferrer"}},[a("code",[t._v("size()")]),a("OutboundLink")],1),t._v(" returns the number of entries in a "),a("code",[t._v("Collection")]),t._v(", and")]),t._v(" "),a("li",[a("a",{attrs:{href:"https://docs.oracle.com/javase/8/docs/api/java/util/Collection.html#isEmpty--",target:"_blank",rel:"noopener noreferrer"}},[a("code",[t._v("isEmpty()")]),a("OutboundLink")],1),t._v(" method returns true if (and only if) the "),a("code",[t._v("Collection")]),t._v(" is empty.")])]),t._v(" "),a("p",[t._v("Both methods can be used to test for collection emptiness. For example:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Collection")]),a("span",{pre:!0,attrs:{class:"token generics"}},[a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("<")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(">")])]),t._v(" strings "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("new")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("ArrayList")]),a("span",{pre:!0,attrs:{class:"token generics"}},[a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("<")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(">")])]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("boolean")]),t._v(" isEmpty_wrong "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" strings"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("size")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("==")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token number"}},[t._v("0")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token comment"}},[t._v("// Avoid this")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("boolean")]),t._v(" isEmpty "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" strings"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("isEmpty")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token comment"}},[t._v("// Best")]),t._v("\n\n")])])]),a("p",[t._v("While these approaches look the same, some collection implementations do not store the size. For such a collection, the implementation of "),a("code",[t._v("size()")]),t._v(" needs to calculate the size each time it is called. For instance:")]),t._v(" "),a("ul",[a("li",[t._v("A simple linked list class (but not the "),a("code",[t._v("java.util.LinkedList")]),t._v(") might need to traverse the list to count the elements.")]),t._v(" "),a("li",[t._v("The "),a("code",[t._v("ConcurrentHashMap")]),t._v(' class needs to sum the entries in all of the map\'s "segments".')]),t._v(" "),a("li",[t._v("A lazy implementation of a collection might need to realize the entire collection in memory in order to count the elements.")])]),t._v(" "),a("p",[t._v("By contrast, an "),a("code",[t._v("isEmpty()")]),t._v(" method only needs to test if there is "),a("strong",[t._v("at least one")]),t._v(" element in the collection. This does not entail counting the elements.")]),t._v(" "),a("p",[t._v("While "),a("code",[t._v("size() == 0")]),t._v(" is not always less efficient that "),a("code",[t._v("isEmpty()")]),t._v(", it is inconceivable for a properly implemented "),a("code",[t._v("isEmpty()")]),t._v(" to be less efficient than "),a("code",[t._v("size() == 0")]),t._v(". Hence "),a("code",[t._v("isEmpty()")]),t._v(" is preferred.")]),t._v(" "),a("h2",{attrs:{id:"pitfall-using-new-to-create-primitive-wrapper-instances-is-inefficient"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#pitfall-using-new-to-create-primitive-wrapper-instances-is-inefficient"}},[t._v("#")]),t._v(" Pitfall - Using 'new' to create primitive wrapper instances is inefficient")]),t._v(" "),a("p",[t._v("The Java language allows you to use "),a("code",[t._v("new")]),t._v(" to create instances "),a("code",[t._v("Integer")]),t._v(", "),a("code",[t._v("Boolean")]),t._v(" and so on, but it is generally a bad idea. It is better to either use autoboxing (Java 5 and later) or the "),a("code",[t._v("valueOf")]),t._v(" method.")]),t._v(" "),a("div",{staticClass:"language- extra-class"},[a("pre",{pre:!0,attrs:{class:"language-text"}},[a("code",[t._v("\nInteger i1 = new Integer(1); // BAD\n Integer i2 = 2; // BEST (autoboxing)\n Integer i3 = Integer.valueOf(3); // OK\n\n")])])]),a("p",[t._v("The reason that using "),a("code",[t._v("new Integer(int)")]),t._v(" explicitly is a bad idea is that it creates a new object (unless optimized out by JIT compiler). By contrast, when autoboxing or an explicit "),a("code",[t._v("valueOf")]),t._v(" call are used, the Java runtime will try to reuse an "),a("code",[t._v("Integer")]),t._v(' object from a cache of pre-existing objects. Each time the runtime has a cache "hit", it avoids creating an object. This also saves heap memory and reduces GC overheads caused by object churn.')]),t._v(" "),a("p",[t._v("Notes:")]),t._v(" "),a("ol",[a("li",[t._v("In recent Java implementations, autoboxing is implemented by calling "),a("code",[t._v("valueOf")]),t._v(", and there are caches for "),a("code",[t._v("Boolean")]),t._v(", "),a("code",[t._v("Byte")]),t._v(", "),a("code",[t._v("Short")]),t._v(", "),a("code",[t._v("Integer")]),t._v(", "),a("code",[t._v("Long")]),t._v(" and "),a("code",[t._v("Character")]),t._v(".")]),t._v(" "),a("li",[t._v("The caching behavior for the integral types is mandated by the Java Language Specification.")])]),t._v(" "),a("h2",{attrs:{id:"pitfall-interning-strings-so-that-you-can-use-is-a-bad-idea"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#pitfall-interning-strings-so-that-you-can-use-is-a-bad-idea"}},[t._v("#")]),t._v(" Pitfall - Interning strings so that you can use == is a bad idea")]),t._v(" "),a("p",[t._v("When some programmers see this advice:")]),t._v(" "),a("blockquote"),t._v(" "),a("p",[t._v('"Testing strings using '),a("code",[t._v("==")]),t._v(' is incorrect (unless the strings are interned)"')]),t._v(" "),a("p",[t._v("their initial reaction is to intern strings so that they can use "),a("code",[t._v("==")]),t._v(". (After all "),a("code",[t._v("==")]),t._v(" is faster than calling "),a("code",[t._v("String.equals(...)")]),t._v(", isn't it.)")]),t._v(" "),a("p",[t._v("This is the wrong approach, from a number of perspectives:")]),t._v(" "),a("h3",{attrs:{id:"fragility"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#fragility"}},[t._v("#")]),t._v(" Fragility")]),t._v(" "),a("p",[t._v("First of all, you can only safely use "),a("code",[t._v("==")]),t._v(" if you know that "),a("strong",[t._v("all")]),t._v(" of the "),a("code",[t._v("String")]),t._v(" objects you are testing have been interned. The JLS guarantees that String literals in your source code will have been interned. However, none of the standard Java SE APIs guarantee to return interned strings, apart from "),a("code",[t._v("String.intern(String)")]),t._v(" itself. If you miss just one source of "),a("code",[t._v("String")]),t._v(" objects that haven't been interned, your application will be unreliable. That unreliability will manifest itself as false negatives rather than exceptions which is liable to make it harder to detect.")]),t._v(" "),a("h3",{attrs:{id:"costs-of-using-intern"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#costs-of-using-intern"}},[t._v("#")]),t._v(" Costs of using 'intern()'")]),t._v(" "),a("p",[t._v("Under the hood, interning works by maintaining a hash table that contains previously interned "),a("code",[t._v("String")]),t._v(" objects. Some kind of weak reference mechanism is used so that the interning hash table does not become a storage leak. While the hash table is implemented in native code (unlike "),a("code",[t._v("HashMap")]),t._v(", "),a("code",[t._v("HashTable")]),t._v(" and so on), the "),a("code",[t._v("intern")]),t._v(" calls are still relatively costly in terms of CPU and memory used.")]),t._v(" "),a("p",[t._v("This cost has to be compared with the saving of we are going to get by using "),a("code",[t._v("==")]),t._v(" instead of "),a("code",[t._v("equals")]),t._v('. In fact, we are not going to break even unless each interned string is compared with other strings "a few" times.')]),t._v(" "),a("p",[t._v("(Aside: the few situations where interning is worthwhile tend to be about reducing the memory foot print of an application where the same strings recur many times, "),a("strong",[t._v("and")]),t._v(" those strings have a long lifetime.)")]),t._v(" "),a("h3",{attrs:{id:"the-impact-on-garbage-collection"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#the-impact-on-garbage-collection"}},[t._v("#")]),t._v(" The impact on garbage collection")]),t._v(" "),a("p",[t._v("In addition to the direct CPU and memory costs described above, interned Strings impact on the garbage collector performance.")]),t._v(" "),a("p",[t._v('For versions of Java prior to Java 7, interned strings are held in the "PermGen" space which is collected infrequently. If PermGen needs to be collected, this (typically) triggers a full garbage collection. If the PermGen space fills completely, the JVM crashes, even if there was free space in the regular heap spaces.')]),t._v(" "),a("p",[t._v('In Java 7, the string pool was moved out of "PermGen" into the normal heap. However, the hash table is still going to be a long-lived data structure, which is going to cause any interned strings to be long-lived. (Even if the interned string objects were allocated in Eden space they would most likely be promoted before they were collected.)')]),t._v(" "),a("p",[t._v("Thus in all cases, interning a string is going to prolong its lifetime relative to an ordinary string. That will increase the garbage collection overheads over the lifetime of the JVM.")]),t._v(" "),a("p",[t._v("The second issue is that the hash table needs to use a weak reference mechanism of some kind to prevent string interning leaking memory. But such a mechanism is more work for the garbage collector.")]),t._v(" "),a("p",[t._v("These garbage collection overheads are difficult to quantify, but there is little doubt that they do exist. If you use "),a("code",[t._v("intern")]),t._v(" extensively, they could be significant.")]),t._v(" "),a("h3",{attrs:{id:"the-string-pool-hashtable-size"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#the-string-pool-hashtable-size"}},[t._v("#")]),t._v(" The string pool hashtable size")]),t._v(" "),a("p",[t._v("According to "),a("a",{attrs:{href:"http://java-performance.info/string-intern-in-java-6-7-8/",target:"_blank",rel:"noopener noreferrer"}},[t._v("this source"),a("OutboundLink")],1),t._v(", from Java 6 onwards, the string pool is implemented as fixed sized hash table with chains to deal with strings that hash to the same bucket. In early releases of Java 6, the hash table had a (hard-wired) constant size. A tuning parameter ("),a("code",[t._v("-XX:StringTableSize")]),t._v(") was added as a mid-life update to Java 6. Then in a mid-life update to Java 7, the default size of the pool was changed from "),a("code",[t._v("1009")]),t._v(" to "),a("code",[t._v("60013")]),t._v(".")]),t._v(" "),a("p",[t._v("The bottom line is that if you do intend to use "),a("code",[t._v("intern")]),t._v(" intensively in your code, it is "),a("strong",[t._v("advisable")]),t._v(" to pick a version of Java where the hashtable size is tunable and make sure that you tune the size it appropriately. Otherwise, the performance of "),a("code",[t._v("intern")]),t._v(" is liable to degrade as the pool gets larger.")]),t._v(" "),a("h3",{attrs:{id:"interning-as-a-potential-denial-of-service-vector"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#interning-as-a-potential-denial-of-service-vector"}},[t._v("#")]),t._v(" Interning as a potential denial of service vector")]),t._v(" "),a("p",[t._v("The hashcode algorithm for strings is well-known. If you intern strings supplied by malicious users or applications, this could be used as part of a denial of service (DoS) attack. If the malicious agent arranges that all of the strings it provides have the same hash code, this could lead to an unbalanced hash table and "),a("code",[t._v("O(N)")]),t._v(" performance for "),a("code",[t._v("intern")]),t._v(" ... where "),a("code",[t._v("N")]),t._v(" is the number of collided strings.")]),t._v(" "),a("p",[t._v("(There are simpler / more effective ways to launch a DoS attack against a service. However, this vector could be used if the goal of the DoS attack is to break security, or to evade first-line DoS defences.)")]),t._v(" "),a("h2",{attrs:{id:"pitfall-efficiency-concerns-with-regular-expressions"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#pitfall-efficiency-concerns-with-regular-expressions"}},[t._v("#")]),t._v(" Pitfall - Efficiency concerns with regular expressions")]),t._v(" "),a("p",[t._v("Regular expression matching is a powerful tool (in Java, and in other contexts) but it does have some drawbacks. One of these that regular expressions tends to be rather expensive.")]),t._v(" "),a("h3",{attrs:{id:"pattern-and-matcher-instances-should-be-reused"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#pattern-and-matcher-instances-should-be-reused"}},[t._v("#")]),t._v(" Pattern and Matcher instances should be reused")]),t._v(" "),a("p",[t._v("Consider the following example:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token comment"}},[t._v("/**\n * Test if all strings in a list consist of English letters and numbers.\n * @param strings the list to be checked\n * @return 'true' if an only if all strings satisfy the criteria\n * @throws NullPointerException if 'strings' is 'null' or a 'null' element.\n */")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("public")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("boolean")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("allAlphanumeric")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("List")]),a("span",{pre:!0,attrs:{class:"token generics"}},[a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("<")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(">")])]),t._v(" strings"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("for")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),t._v(" s "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v(":")]),t._v(" strings"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("if")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("!")]),t._v("s"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("matches")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('"[A-Za-z0-9]*"')]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("return")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token boolean"}},[t._v("false")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v(" \n "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("return")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token boolean"}},[t._v("true")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n\n")])])]),a("p",[t._v("This code is correct, but it is inefficient. The problem is in the "),a("code",[t._v("matches(...)")]),t._v(" call. Under the hood, "),a("code",[t._v('s.matches("[A-Za-z0-9]*")')]),t._v(" is equivalent to this:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Pattern")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("matches")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("s"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(",")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('"[A-Za-z0-9]*"')]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v("\n\n")])])]),a("p",[t._v("which is in turn equivalent to")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Pattern")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("compile")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('"[A-Za-z0-9]*"')]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("matcher")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("s"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("matches")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v("\n\n")])])]),a("p",[t._v("The "),a("code",[t._v('Pattern.compile("[A-Za-z0-9]*")')]),t._v(" call parses the regular expression, analyze it, and construct a "),a("code",[t._v("Pattern")]),t._v(" object that holds the data structure that will be used by the regex engine. This is a non-trivial computation. Then a "),a("code",[t._v("Matcher")]),t._v(" object is created to wrap the "),a("code",[t._v("s")]),t._v(" argument. Finally we call "),a("code",[t._v("match()")]),t._v(" to do the actual pattern matching.")]),t._v(" "),a("p",[t._v("The problem is that this work is all repeated for each loop iteration. The solution is to restructure the code as follows:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("private")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("static")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Pattern")]),t._v(" ALPHA_NUMERIC "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Pattern")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("compile")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('"[A-Za-z0-9]*"')]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n\n"),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("public")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("boolean")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("allAlphanumeric")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("List")]),a("span",{pre:!0,attrs:{class:"token generics"}},[a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("<")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(">")])]),t._v(" strings"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Matcher")]),t._v(" matcher "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" ALPHA_NUMERIC"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("matcher")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('""')]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("for")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),t._v(" s "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v(":")]),t._v(" strings"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n matcher"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("reset")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("s"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("if")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("!")]),t._v("matcher"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("matches")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("return")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token boolean"}},[t._v("false")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v(" \n "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("return")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token boolean"}},[t._v("true")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n\n")])])]),a("p",[t._v("Note that the "),a("a",{attrs:{href:"http://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html",target:"_blank",rel:"noopener noreferrer"}},[t._v("javadoc"),a("OutboundLink")],1),t._v(" for "),a("code",[t._v("Pattern")]),t._v(" states:")]),t._v(" "),a("blockquote"),t._v(" "),a("p",[t._v("Instances of this class are immutable and are safe for use by multiple concurrent threads. Instances of the "),a("code",[t._v("Matcher")]),t._v(" class are not safe for such use.")]),t._v(" "),a("h3",{attrs:{id:"don-t-use-match-when-you-should-use-find"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#don-t-use-match-when-you-should-use-find"}},[t._v("#")]),t._v(" Don't use match() when you should use find()")]),t._v(" "),a("p",[t._v("Suppose you want to test if a string "),a("code",[t._v("s")]),t._v(" contains three or more digits in a row. You cn express this in various ways including:")]),t._v(" "),a("div",{staticClass:"language- extra-class"},[a("pre",{pre:!0,attrs:{class:"language-text"}},[a("code",[t._v('\n if (s.matches(".*[0-9]{3}.*")) {\n System.out.println("matches");\n }\n\n')])])]),a("p",[t._v("or")]),t._v(" "),a("div",{staticClass:"language- extra-class"},[a("pre",{pre:!0,attrs:{class:"language-text"}},[a("code",[t._v('\n if (Pattern.compile("[0-9]{3}").matcher(s).find()) {\n System.out.println("matches");\n }\n\n')])])]),a("p",[t._v('The first one is more concise, but it is also likely to be less efficient. On the face of it, the first version is going to try to match the entire string against the pattern. Furthermore, since ".*" is a "greedy" pattern, the pattern matcher is likely to advance "eagerly" try to the end of the string, and backtrack until it finds a match.')]),t._v(" "),a("p",[t._v("By contrast, the second version will search from left to right and will stop searching as soon as it finds the 3 digits in a row.")]),t._v(" "),a("h3",{attrs:{id:"use-more-efficient-alternatives-to-regular-expressions"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#use-more-efficient-alternatives-to-regular-expressions"}},[t._v("#")]),t._v(" Use more efficient alternatives to regular expressions")]),t._v(" "),a("p",[t._v("Regular expressions are a powerful tool, but they should not be your only tool. A lot of tasks can be done more efficiently in other ways. For example:")]),t._v(" "),a("div",{staticClass:"language- extra-class"},[a("pre",{pre:!0,attrs:{class:"language-text"}},[a("code",[t._v('\nPattern.compile("ABC").matcher(s).find()\n\n')])])]),a("p",[t._v("does the same thing as:")]),t._v(" "),a("div",{staticClass:"language- extra-class"},[a("pre",{pre:!0,attrs:{class:"language-text"}},[a("code",[t._v('\ns.contains("ABC")\n\n')])])]),a("p",[t._v("except that the latter is a lot more efficient. (Even if you can amortize the cost of compiling the regular expression.)")]),t._v(" "),a("p",[t._v("Often, the non-regex form is more complicated. For example, the test performed by the "),a("code",[t._v("matches()")]),t._v(" call the earlier "),a("code",[t._v("allAlplanumeric")]),t._v(" method can be rewritten as:")]),t._v(" "),a("div",{staticClass:"language- extra-class"},[a("pre",{pre:!0,attrs:{class:"language-text"}},[a("code",[t._v("\npublic boolean matches(String s) {\n for (char c : s) {\n if ((c >= 'A' && c <= 'Z') ||\n (c >= 'a' && c <= 'z') ||\n (c >= '0' && c <= '9')) {\n return false;\n }\n }\n return true;\n }\n\n")])])]),a("p",[t._v("Now that is more code than using a "),a("code",[t._v("Matcher")]),t._v(", but it is also going to be significantly faster.")]),t._v(" "),a("h3",{attrs:{id:"catastrophic-backtracking"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#catastrophic-backtracking"}},[t._v("#")]),t._v(" Catastrophic Backtracking")]),t._v(" "),a("p",[t._v("(This is potentially a problem with all implementations of regular expressions, but we will mention it here because it is a pitfall for "),a("code",[t._v("Pattern")]),t._v(" usage.)")]),t._v(" "),a("p",[t._v("Consider this (contrived) example:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Pattern")]),t._v(" pat "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Pattern")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("compile")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('"(A+)+B"')]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("System")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),t._v("out"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("println")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("pat"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("matcher")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB"')]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("matches")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("System")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),t._v("out"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("println")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("pat"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("matcher")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC"')]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("matches")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n\n")])])]),a("p",[t._v("The first "),a("code",[t._v("println")]),t._v(" call will quickly print "),a("code",[t._v("true")]),t._v(". The second one will print "),a("code",[t._v("false")]),t._v(". Eventually. Indeed, if you experiment with the code above, you will see that each time you add an "),a("code",[t._v("A")]),t._v(" before the "),a("code",[t._v("C")]),t._v(", the time take will double.")]),t._v(" "),a("p",[t._v("This is behavior is an example of "),a("strong",[t._v("catastrophic backtracking")]),t._v(". The pattern matching engine that implements the regex matching is fruitlessly trying all of the "),a("strong",[t._v("possible")]),t._v(" ways that the pattern "),a("strong",[t._v("might")]),t._v(" match.")]),t._v(" "),a("p",[t._v("Let us look at what "),a("code",[t._v("(A+)+B")]),t._v(' actually means. Superficially, it seems to say "one or more '),a("code",[t._v("A")]),t._v(" characters followed by a "),a("code",[t._v("B")]),t._v(' value", but in reality it says one or more groups, each of which consists of one or more '),a("code",[t._v("A")]),t._v(" characters. So, for example:")]),t._v(" "),a("ul",[a("li",[t._v("'AB' matches one way only: '(A)B'")]),t._v(" "),a("li",[t._v("'AAB' matches two ways: '(AA)B' or '(A)(A)B`")]),t._v(" "),a("li",[t._v("'AAAB' matches four ways: '(AAA)B' or '(AA)(A)B"),a("code",[t._v("or '(A)(AA)B")]),t._v(" or '(A)(A)(A)B`")]),t._v(" "),a("li",[t._v("and so on")])]),t._v(" "),a("p",[t._v("In other words, the number of possible matches is 2"),a("sup",[t._v("N")]),t._v(" where N is the number of "),a("code",[t._v("A")]),t._v(" characters.")]),t._v(" "),a("p",[t._v("The above example is clearly contrived, but patterns that exhibit this kind of performance characteristics (i.e. "),a("code",[t._v("O(2^N)")]),t._v(" or "),a("code",[t._v("O(N^K)")]),t._v(" for a large "),a("code",[t._v("K")]),t._v(") arise frequently when ill-considered regular expressions are used. There are some standard remedies:")]),t._v(" "),a("ul",[a("li",[t._v("Avoid nesting repeating patterns within other repeating patterns.")]),t._v(" "),a("li",[t._v("Avoid using too many repeating patterns.")]),t._v(" "),a("li",[t._v("Use non-backtracking repetition as appropriate.")]),t._v(" "),a("li",[t._v("Don't use regexes for complicated parsing tasks. (Write a proper parser instead.)")])]),t._v(" "),a("p",[t._v('Finally, beware of situations where a user or an API client can supply a regex string with pathological characteristics. That can lead to accidental or deliberate "denial of service".')]),t._v(" "),a("p",[t._v("References:")]),t._v(" "),a("ul",[a("li",[t._v("The "),a("a",{attrs:{href:"http://stackoverflow.com/documentation/regex",target:"_blank",rel:"noopener noreferrer"}},[t._v("Regular Expressions"),a("OutboundLink")],1),t._v(" tag, particularly "),a("a",{attrs:{href:"http://stackoverflow.com/documentation/regex/977/backtracking#t=201610010339131361163",target:"_blank",rel:"noopener noreferrer"}},[t._v("http://stackoverflow.com/documentation/regex/977/backtracking#t=201610010339131361163"),a("OutboundLink")],1),t._v(" and "),a("a",{attrs:{href:"http://stackoverflow.com/documentation/regex/4527/when-you-should-not-use-regular-expressions#t=201610010339593564913",target:"_blank",rel:"noopener noreferrer"}},[t._v("http://stackoverflow.com/documentation/regex/4527/when-you-should-not-use-regular-expressions#t=201610010339593564913"),a("OutboundLink")],1)]),t._v(" "),a("li",[a("a",{attrs:{href:"https://blog.codinghorror.com/regex-performance/",target:"_blank",rel:"noopener noreferrer"}},[t._v('"Regex Performance"'),a("OutboundLink")],1),t._v(" by Jeff Atwood.")]),t._v(" "),a("li",[a("a",{attrs:{href:"http://andreas.haufler.info/2013/09/how-to-kill-java-with-regular-expression.html",target:"_blank",rel:"noopener noreferrer"}},[t._v('"How to kill Java with a Regular Expression"'),a("OutboundLink")],1),t._v(" by Andreas Haufler.")])]),t._v(" "),a("h2",{attrs:{id:"pitfall-small-reads-writes-on-unbuffered-streams-are-inefficient"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#pitfall-small-reads-writes-on-unbuffered-streams-are-inefficient"}},[t._v("#")]),t._v(" Pitfall - Small reads / writes on unbuffered streams are inefficient")]),t._v(" "),a("p",[t._v("Consider the following code to copy one file to another:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("import")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token namespace"}},[t._v("java"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),t._v("io"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")])]),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("*")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n\n"),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("public")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("class")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("FileCopy")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("public")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("static")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("void")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("main")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("[")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("]")]),t._v(" args"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("throws")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Exception")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("try")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("InputStream")]),t._v(" is "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("new")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("FileInputStream")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("args"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("[")]),a("span",{pre:!0,attrs:{class:"token number"}},[t._v("0")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("]")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("OutputStream")]),t._v(" os "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("new")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("FileOutputStream")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("args"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("[")]),a("span",{pre:!0,attrs:{class:"token number"}},[t._v("1")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("]")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("int")]),t._v(" octet"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("while")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("octet "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" is"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("read")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("!=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("-")]),a("span",{pre:!0,attrs:{class:"token number"}},[t._v("1")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n os"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("write")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("octet"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n\n")])])]),a("p",[t._v("(We have deliberated omitted normal argument checking, error reporting and so on because they are not relevant to "),a("strong",[t._v("point")]),t._v(" of this example.)")]),t._v(" "),a("p",[t._v("If you compile the above code and use it to copy a huge file, you will notice that it is very slow. In fact, it will be at least a couple of orders of magnitude slower than the standard OS file copy utilities.")]),t._v(" "),a("p",[t._v("("),a("strong",[t._v("Add actual performance measurements here!")]),t._v(")")]),t._v(" "),a("p",[t._v("The primary reason that the example above is slow (in the large file case) is that it is performing one-byte reads and one-byte writes on unbuffered byte streams. The simple way to improve performance is to wrap the streams with buffered streams. For example:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("import")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token namespace"}},[t._v("java"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),t._v("io"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")])]),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("*")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n\n"),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("public")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("class")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("FileCopy")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("public")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("static")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("void")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("main")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("[")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("]")]),t._v(" args"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("throws")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Exception")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("try")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("InputStream")]),t._v(" is "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("new")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("BufferedInputStream")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("new")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("FileInputStream")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("args"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("[")]),a("span",{pre:!0,attrs:{class:"token number"}},[t._v("0")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("]")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("OutputStream")]),t._v(" os "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("new")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("BufferedOutputStream")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("new")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("FileOutputStream")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("args"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("[")]),a("span",{pre:!0,attrs:{class:"token number"}},[t._v("1")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("]")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("int")]),t._v(" octet"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("while")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("octet "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" is"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("read")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("!=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("-")]),a("span",{pre:!0,attrs:{class:"token number"}},[t._v("1")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n os"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("write")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("octet"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n\n")])])]),a("p",[t._v("These small changes will improve data copy rate by "),a("strong",[t._v("at least")]),t._v(" a couple of orders of magnitude, depending on various platform-related factors. The buffered stream wrappers cause the data to be read and written in larger chunks. The instances both have buffers implemented as byte arrays.")]),t._v(" "),a("li",[t._v("\nWith `is`, data is read from the file into the buffer a few kilobytes at a time. When `read()` is called, the implementation will typically return a byte from the buffer. It will only read from the underlying input stream if the buffer has been emptied.\n")]),t._v(" "),a("li",[t._v("\nThe behavior for `os` is analogous. Calls to `os.write(int)` write single bytes into the buffer. Data is only written to the output stream when the buffer is full, or when `os` is flushed or closed.\n")]),t._v(" "),a("h3",{attrs:{id:"what-about-character-based-streams"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#what-about-character-based-streams"}},[t._v("#")]),t._v(" What about character-based streams?")]),t._v(" "),a("p",[t._v("As you should be aware, Java I/O provides different APIs for reading and writing binary and text data.")]),t._v(" "),a("ul",[a("li",[a("code",[t._v("InputStream")]),t._v(" and "),a("code",[t._v("OutputStream")]),t._v(" are the base APIs for stream-based binary I/O")]),t._v(" "),a("li",[a("code",[t._v("Reader")]),t._v(" and "),a("code",[t._v("Writer")]),t._v(" are the base APIs for stream-based text I/O.")])]),t._v(" "),a("p",[t._v("For text I/O, "),a("code",[t._v("BufferedReader")]),t._v(" and "),a("code",[t._v("BufferedWriter")]),t._v(" are the equivalents for "),a("code",[t._v("BufferedInputStream")]),t._v(" and "),a("code",[t._v("BufferedOutputStream")]),t._v(".")]),t._v(" "),a("h3",{attrs:{id:"why-do-buffered-streams-make-this-much-difference"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#why-do-buffered-streams-make-this-much-difference"}},[t._v("#")]),t._v(" Why do buffered streams make this much difference?")]),t._v(" "),a("p",[t._v("The real reason that buffered streams help performance is to do with the way that an application talks to the operating system:")]),t._v(" "),a("li",[t._v("\nJava method in a Java application, or native procedure calls in the JVM's native runtime libraries are fast. They typically take a couple of machine instructions and have minimal performance impact.\n")]),t._v(" "),a("li",[t._v('\nBy contrast, JVM runtime calls to the operating system are not fast. They involve something known as a "syscall". The typical pattern for a syscall is as follows:\n'),a("ol",[t._v("\n- Put the syscall arguments into registers.\n- Execute a SYSENTER trap instruction.\n- The trap handler switched to privileged state and changes the virtual memory mappings. Then it dispatches to the code to handle the specific syscall.\n- The syscall handler checks the arguments, taking care that it isn't being told to access memory that the user process should not see.\n"),a("li",[t._v("The syscall specific work is performed. In the case of a `read` syscall, this may involve:\n"),a("ol",[t._v("\n- checking that there is data to be read at the file descriptor's current position\n- calling the file system handler to fetch the required data from disk (or wherever it is stored) into the buffer cache,\n- copying data from the buffer cache to the JVM-supplied address\n- adjusting thstream pointerse file descriptor position\n")])]),t._v("\n- Return from the syscall. This entails changing VM mappings again and switching out of privileged state.\n")])]),t._v(" "),a("p",[t._v("As you can imagine, performing a single syscall can thousands of machine instructions. Conservatively, "),a("strong",[t._v("at least")]),t._v(" two orders of magnitude longer than a regular method call. (Probably three or more.)")]),t._v(" "),a("p",[t._v("Given this, the reason that buffered streams make a big difference is that they drastically reduce the number of syscalls. Instead of doing a syscall for each "),a("code",[t._v("read()")]),t._v(" call, the buffered input stream reads a large amount of data into a buffer as required. Most "),a("code",[t._v("read()")]),t._v(" calls on the buffered stream do some simple bounds checking and return a "),a("code",[t._v("byte")]),t._v(" that was read previously. Similar reasoning applies in the output stream case, and also the character stream cases.")]),t._v(" "),a("p",[t._v("(Some people think that buffered I/O performance comes from the mismatch between the read request size and the size of a disk block, disk rotational latency and things like that. In fact, a modern OS uses a number of strategies to ensure that the application "),a("strong",[t._v("typically")]),t._v(" doesn't need to wait for the disk. This is not the real explanation.)")]),t._v(" "),a("h3",{attrs:{id:"are-buffered-streams-always-a-win"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#are-buffered-streams-always-a-win"}},[t._v("#")]),t._v(" Are buffered streams always a win?")]),t._v(" "),a("p",[t._v('Not always. Buffered streams are definitely a win if your application is going to do lots of "small" reads or writes. However, if your application only needs to perform large reads or writes to / from a large '),a("code",[t._v("byte[]")]),t._v(" or "),a("code",[t._v("char[]")]),t._v(", then buffered streams will give you no real benefits. Indeed there might even be a (tiny) performance penalty.")]),t._v(" "),a("h3",{attrs:{id:"is-this-the-fastest-way-to-copy-a-file-in-java"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#is-this-the-fastest-way-to-copy-a-file-in-java"}},[t._v("#")]),t._v(" Is this the fastest way to copy a file in Java?")]),t._v(" "),a("p",[t._v("No it isn't. When you use Java's stream-based APIs to copy a file, you incur the cost of at least one extra memory-to-memory copy of the data. It is possible to avoid this if your use the NIO "),a("code",[t._v("ByteBuffer")]),t._v(" and "),a("code",[t._v("Channel")]),t._v(" APIs. ("),a("strong",[t._v("Add a link to a separate example here.")]),t._v(")")]),t._v(" "),a("h2",{attrs:{id:"pitfall-the-overheads-of-creating-log-messages"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#pitfall-the-overheads-of-creating-log-messages"}},[t._v("#")]),t._v(" Pitfall - The overheads of creating log messages")]),t._v(" "),a("p",[a("code",[t._v("TRACE")]),t._v(" and "),a("code",[t._v("DEBUG")]),t._v(' log levels are there to be able to convey high detail about the operation of the given code at runtime. Setting the log level above these is usually recommended, however some care must be taken for these statements to not affect performance even when seemingly "turned off".')]),t._v(" "),a("p",[t._v("Consider this log statement:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token comment"}},[t._v("// Processing a request of some kind, logging the parameters")]),t._v("\nLOG"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("debug")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('"Request coming from "')]),t._v(" "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("+")]),t._v(" myInetAddress"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("toString")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" \n "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("+")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('" parameters: "')]),t._v(" "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("+")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Arrays")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("toString")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("veryLongParamArray"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n\n")])])]),a("p",[t._v("Even when the log level is set to "),a("code",[t._v("INFO")]),t._v(", arguments passed to "),a("code",[t._v("debug()")]),t._v(" will be evaluated on each execution of the line. This makes it unnecessarily consuming on several counts:")]),t._v(" "),a("ul",[a("li",[a("code",[t._v("String")]),t._v(" concatenation: multiple "),a("code",[t._v("String")]),t._v(" instances will be created")]),t._v(" "),a("li",[a("code",[t._v("InetAddress")]),t._v(" might even do a DNS lookup.")]),t._v(" "),a("li",[t._v("the "),a("code",[t._v("veryLongParamArray")]),t._v(" might be very long - creating a String out of it consumes memory, takes time")])]),t._v(" "),a("h3",{attrs:{id:"solution"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#solution"}},[t._v("#")]),t._v(" Solution")]),t._v(" "),a("p",[t._v("Most logging framework provide means to create log messages using fix strings and object references. The log message will be evaluated only if the message is actually logged. Example:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token comment"}},[t._v("// No toString() evaluation, no string concatenation if debug is disabled")]),t._v("\nLOG"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("debug")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('"Request coming from {} parameters: {}"')]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(",")]),t._v(" myInetAddress"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(",")]),t._v(" parameters"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n\n")])])]),a("p",[t._v("This works very well as long as all parameters can be converted to strings using "),a("a",{attrs:{href:"https://docs.oracle.com/javase/8/docs/api/java/lang/String.html#valueOf-java.lang.Object-",target:"_blank",rel:"noopener noreferrer"}},[t._v("String.valueOf(Object)"),a("OutboundLink")],1),t._v(". If the log message compuation is more complex, the log level can be checked before logging:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("if")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("LOG"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("isDebugEnabled")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token comment"}},[t._v("// Argument expression evaluated only when DEBUG is enabled")]),t._v("\n LOG"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("debug")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token string"}},[t._v('"Request coming from {}, parameters: {}"')]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(",")]),t._v(" myInetAddress"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(",")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Arrays")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("toString")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("veryLongParamArray"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n\n")])])]),a("p",[t._v("Here, "),a("code",[t._v("LOG.debug()")]),t._v(" with the costly "),a("code",[t._v("Arrays.toString(Obect[])")]),t._v(" computation is processed only when "),a("code",[t._v("DEBUG")]),t._v(" is actually enabled.")]),t._v(" "),a("h2",{attrs:{id:"pitfall-calling-new-string-string-is-inefficient"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#pitfall-calling-new-string-string-is-inefficient"}},[t._v("#")]),t._v(" Pitfall - Calling 'new String(String)' is inefficient")]),t._v(" "),a("p",[t._v("Using "),a("code",[t._v("new String(String)")]),t._v(" to duplicate a string is inefficient and almost always unnecessary.")]),t._v(" "),a("ul",[a("li",[t._v("String objects are immutable, so there is no need to copy them to protect against changes.")]),t._v(" "),a("li",[t._v("In some older versions of Java, "),a("code",[t._v("String")]),t._v(" objects can share backing arrays with other "),a("code",[t._v("String")]),t._v(" objects. In those versions, it is possible to leak memory by creating a (small) substring of a (large) string and retaining it. However, from Java 7 onwards, "),a("code",[t._v("String")]),t._v(" backing arrays are not shared.")])]),t._v(" "),a("p",[t._v("In the absence of any tangible benefit, calling "),a("code",[t._v("new String(String)")]),t._v(" is simply wasteful:")]),t._v(" "),a("ul",[a("li",[t._v("Making the copy takes CPU time.")]),t._v(" "),a("li",[t._v("The copy uses more memory which increases the application's memoru footprint and / or increases GC overheads.")]),t._v(" "),a("li",[t._v("Operations like "),a("code",[t._v("equals(Object)")]),t._v(" and "),a("code",[t._v("hashCode()")]),t._v(" can be slower if String objects are copied.")])]),t._v(" "),a("h2",{attrs:{id:"pitfall-calling-system-gc-is-inefficient"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#pitfall-calling-system-gc-is-inefficient"}},[t._v("#")]),t._v(" Pitfall - Calling System.gc() is inefficient")]),t._v(" "),a("p",[t._v("It is (almost always) a bad idea to call "),a("code",[t._v("System.gc()")]),t._v(".")]),t._v(" "),a("p",[t._v("The javadoc for the "),a("code",[t._v("gc()")]),t._v(" method specifies the following:")]),t._v(" "),a("blockquote"),t._v(" "),a("p",[t._v('"Calling the '),a("code",[t._v("gc")]),t._v(' method suggests that the Java Virtual Machine expend effort toward recycling unused objects in order to make the memory they currently occupy available for quick reuse. When control returns from the method call, the Java Virtual Machine has made a best effort to reclaim space from all discarded objects."')]),t._v(" "),a("p",[t._v("There are a couple of important points that can be drawn from this:")]),t._v(" "),a("li",[t._v('\nThe use of the word "suggests" rather than (say) "tells" means that the JVM is free to ignore the suggestion. The default JVM behavior (recent releases) is to follow the suggestion, but this can be overridden by setting `-XX:+DisableExplicitGC` when when launching the JVM.\n')]),t._v(" "),a("li",[t._v('\nThe phrase "a best effort to reclaim space from all discarded objects" implies that calling `gc` will trigger a "full" garbage collection.\n')]),t._v(" "),a("p",[t._v("So why is calling "),a("code",[t._v("System.gc()")]),t._v(" a bad idea?")]),t._v(" "),a("p",[t._v('First, running a full garbage collection is expensive. A full GC involves visiting and "marking" every object that is still reachable; i.e. every object that is not garbage. If you trigger this when there isn\'t much garbage to be collected, then the GC does a lot of work for relatively little benefit.')]),t._v(" "),a("p",[t._v('Second, a full garbage collection is liable to disturb the "locality" properties of the objects that are not collected. Objects that are allocated by the same thread at roughly the same time tend to be allocated close together in memory. This is good. Objects that are allocated at the same time are likely to be related; i.e. reference each other. If your application uses those references, then the chances are that memory access will be faster because of various memory and page caching effects. Unfortunately, a full garbage collection tend to move objects around so that objects that were once close are now further apart.')]),t._v(" "),a("p",[t._v("Third, running a full garbage collection is liable to make your application pause until the collection is complete. While this is happening, your application will be non-responsive.")]),t._v(" "),a("p",[t._v("In fact, the best strategy is to let the JVM decide when to run the GC, and what kind of collection to run. If you don't interfere, the JVM will choose a time and collection type that optimizes throughput or minimizes GC pause times.")]),t._v(" "),a("p",[t._v('At the beginning we said "... (almost always) a bad idea ...". In fact there are a couple of scenarios where it '),a("strong",[t._v("might")]),t._v(" be a good idea:")]),t._v(" "),a("li",[t._v("\nIf you are implementing a unit test for some code that is garbage collection sensitive (e.g. something involving finalizers or weak / soft / phantom references) then calling `System.gc()` may be necessary.\n")]),t._v(" "),a("li",[t._v('\nIn some interactive applications, there can be particular points in time where the user won\'t care if there is a garbage collection pause. One example is a game where there are natural pauses in the "play"; e.g. when loading a new level.\n')]),t._v(" "),a("h2",{attrs:{id:"pitfall-over-use-of-primitive-wrapper-types-is-inefficient"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#pitfall-over-use-of-primitive-wrapper-types-is-inefficient"}},[t._v("#")]),t._v(" Pitfall - Over-use of primitive wrapper types is inefficient")]),t._v(" "),a("p",[t._v("Consider these two pieces of code:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("int")]),t._v(" a "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token number"}},[t._v("1000")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("int")]),t._v(" b "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" a "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("+")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token number"}},[t._v("1")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n\n")])])]),a("p",[t._v("and")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Integer")]),t._v(" a "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token number"}},[t._v("1000")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Integer")]),t._v(" b "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" a "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("+")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token number"}},[t._v("1")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n\n")])])]),a("p",[t._v("Question: Which version is more efficient?")]),t._v(" "),a("p",[t._v("Answer: The two versions look almost the identical, but the first version is a lot more efficient than the second one.")]),t._v(" "),a("p",[t._v("The second version is using a representation for the numbers that uses more space, and is relying on auto-boxing and auto-unboxing behind the scenes. In fact the second version is directly equivalent to the following code:")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Integer")]),t._v(" a "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Integer")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("valueOf")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token number"}},[t._v("1000")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token comment"}},[t._v("// box 1000")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Integer")]),t._v(" b "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Integer")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("valueOf")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("a"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("intValue")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("+")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token number"}},[t._v("1")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token comment"}},[t._v("// unbox 1000, add 1, box 1001")]),t._v("\n\n")])])]),a("p",[t._v("Comparing this to the other version that uses "),a("code",[t._v("int")]),t._v(", there are clearly three extra method calls when "),a("code",[t._v("Integer")]),t._v(" is used. In the case of "),a("code",[t._v("valueOf")]),t._v(", the calls are each going to create and initialize a new "),a("code",[t._v("Integer")]),t._v(" object. All of this extra boxing and unboxing work is likely to make the second version an order of magnitude slower than the first one.")]),t._v(" "),a("p",[t._v("In addition to that, the second version is allocating objects on the heap in each "),a("code",[t._v("valueOf")]),t._v(" call. While the space utilization is platform specific, it is likely to be in the region of 16 bytes for each "),a("code",[t._v("Integer")]),t._v(" object. By contrast, the "),a("code",[t._v("int")]),t._v(" version needs zero extra heap space, assuming that "),a("code",[t._v("a")]),t._v(" and "),a("code",[t._v("b")]),t._v(" are local variables.")]),t._v(" "),a("p",[t._v("Another big reason why primitives are faster then their boxed equivalent is how their respective array types are laid out in memory.")]),t._v(" "),a("p",[t._v("If you take "),a("code",[t._v("int[]")]),t._v(" and "),a("code",[t._v("Integer[]")]),t._v(" as an example, in the case of an "),a("code",[t._v("int[]")]),t._v(" the "),a("code",[t._v("int")]),t._v(" "),a("strong",[t._v("values")]),t._v(" are contiguously laid out in memory. But in the case of an "),a("code",[t._v("Integer[]")]),t._v(" it's not the values that are laid out, but references (pointers) to "),a("code",[t._v("Integer")]),t._v(" objects, which in turn contain the actual "),a("code",[t._v("int")]),t._v(" values.")]),t._v(" "),a("p",[t._v("Besides being an extra level of indirection, this can be a big tank when it comes to cache locality when iterating over the values. In the case of an "),a("code",[t._v("int[]")]),t._v(" the CPU could fetch all the values in the array, into it's cache at once, because they are contiguous in memory. But in the case of an "),a("code",[t._v("Integer[]")]),t._v(" the CPU potentially has to do an additional memory fetch for each element, since the array only contains references to the actual values.")]),t._v(" "),a("p",[t._v("In short, using primitive wrapper types is relatively expensive in both CPU and memory resources. Using them unnecessarily is in efficient.")]),t._v(" "),a("h2",{attrs:{id:"pitfall-iterating-a-map-s-keys-can-be-inefficient"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#pitfall-iterating-a-map-s-keys-can-be-inefficient"}},[t._v("#")]),t._v(" Pitfall - Iterating a Map's keys can be inefficient")]),t._v(" "),a("p",[t._v("The following example code is slower than it needs to be :")]),t._v(" "),a("div",{staticClass:"language-java extra-class"},[a("pre",{pre:!0,attrs:{class:"language-java"}},[a("code",[a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("Map")]),a("span",{pre:!0,attrs:{class:"token generics"}},[a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("<")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(",")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(">")])]),t._v(" map "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("new")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("HashMap")]),a("span",{pre:!0,attrs:{class:"token generics"}},[a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("<")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(">")])]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v(" \n"),a("span",{pre:!0,attrs:{class:"token keyword"}},[t._v("for")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),t._v(" key "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v(":")]),t._v(" map"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("keySet")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),t._v(" "),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("{")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token class-name"}},[t._v("String")]),t._v(" value "),a("span",{pre:!0,attrs:{class:"token operator"}},[t._v("=")]),t._v(" map"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(".")]),a("span",{pre:!0,attrs:{class:"token function"}},[t._v("get")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("(")]),t._v("key"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(")")]),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v(";")]),t._v("\n "),a("span",{pre:!0,attrs:{class:"token comment"}},[t._v("// Do something with key and value")]),t._v("\n"),a("span",{pre:!0,attrs:{class:"token punctuation"}},[t._v("}")]),t._v("\n\n")])])]),a("p",[t._v("That is because it requires a map lookup (the "),a("code",[t._v("get()")]),t._v(" method) for each key in the map. This lookup may not be efficient (in a HashMap, it entails calling "),a("code",[t._v("hashCode")]),t._v(" on the key, then looking up the correct bucket in internal data structures, and sometimes even calling "),a("code",[t._v("equals")]),t._v("). On a large map, this may not be a trivial overhead.")]),t._v(" "),a("p",[t._v("The correct way of avoiding this is to iterate on the map's entries, which is detailed in the "),a("a",{attrs:{href:"http://stackoverflow.com/documentation/java/90/collections/5856/iterating-over-collections#t=201608260922476000177",target:"_blank",rel:"noopener noreferrer"}},[t._v("Collections topic"),a("OutboundLink")],1)]),t._v(" "),a("h4",{attrs:{id:"remarks"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#remarks"}},[t._v("#")]),t._v(" Remarks")]),t._v(" "),a("p",[t._v('This topic describes some "micro" Java coding practices that are inefficient. In most cases, the inefficiencies are relatively small, but it is still worth avoiding them is possible.')])])}),[],!1,null,null,null);e.default=n.exports}}]);