Skip to content

Commit 72b89bc

Browse files
authored
Add Monte Carlo Tree Search Algorithm (TheAlgorithms#2588)
1 parent 2b7a977 commit 72b89bc

1 file changed

Lines changed: 179 additions & 0 deletions

File tree

Searches/MonteCarloTreeSearch.java

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
package Searches;
2+
3+
import java.util.Collections;
4+
import java.util.ArrayList;
5+
import java.util.Comparator;
6+
import java.util.Random;
7+
8+
/**
9+
* Monte Carlo Tree Search (MCTS) is a heuristic search algorithm
10+
* used in decition taking problems especially games.
11+
*
12+
* See more: https://en.wikipedia.org/wiki/Monte_Carlo_tree_search,
13+
* https://www.baeldung.com/java-monte-carlo-tree-search
14+
*/
15+
public class MonteCarloTreeSearch {
16+
public class Node {
17+
Node parent;
18+
ArrayList <Node> childNodes;
19+
boolean isPlayersTurn; // True if it is the player's turn.
20+
boolean playerWon; // True if the player won; false if the opponent won.
21+
int score;
22+
int visitCount;
23+
24+
public Node() {}
25+
26+
public Node(Node parent, boolean isPlayersTurn) {
27+
this.parent = parent;
28+
childNodes = new ArrayList<>();
29+
this.isPlayersTurn = isPlayersTurn;
30+
playerWon = false;
31+
score = 0;
32+
visitCount = 0;
33+
}
34+
}
35+
36+
static final int WIN_SCORE = 10;
37+
static final int TIME_LIMIT = 500; // Time the algorithm will be running for (in milliseconds).
38+
39+
public static void main(String[] args) {
40+
MonteCarloTreeSearch mcts = new MonteCarloTreeSearch();
41+
42+
mcts.monteCarloTreeSearch(mcts.new Node(null, true));
43+
}
44+
45+
/**
46+
* Explores a game tree using Monte Carlo Tree Search (MCTS)
47+
* and returns the most promising node.
48+
*
49+
* @param rootNode Root node of the game tree.
50+
* @return The most promising child of the root node.
51+
*/
52+
public Node monteCarloTreeSearch(Node rootNode) {
53+
Node winnerNode;
54+
double timeLimit;
55+
56+
// Expand the root node.
57+
addChildNodes(rootNode, 10);
58+
59+
timeLimit = System.currentTimeMillis() + TIME_LIMIT;
60+
61+
// Explore the tree until the time limit is reached.
62+
while (System.currentTimeMillis() < timeLimit) {
63+
Node promisingNode;
64+
65+
// Get a promising node using UCT.
66+
promisingNode = getPromisingNode(rootNode);
67+
68+
// Expand the promising node.
69+
if (promisingNode.childNodes.size() == 0) {
70+
addChildNodes(promisingNode, 10);
71+
}
72+
73+
simulateRandomPlay(promisingNode);
74+
}
75+
76+
winnerNode = getWinnerNode(rootNode);
77+
printScores(rootNode);
78+
System.out.format("\nThe optimal node is: %02d\n", rootNode.childNodes.indexOf(winnerNode) + 1);
79+
80+
return winnerNode;
81+
}
82+
83+
public void addChildNodes(Node node, int childCount) {
84+
for (int i = 0; i < childCount; i++) {
85+
node.childNodes.add(new Node(node, !node.isPlayersTurn));
86+
}
87+
}
88+
89+
/**
90+
* Uses UCT to find a promising child node to be explored.
91+
*
92+
* UCT: Upper Confidence bounds applied to Trees.
93+
*
94+
* @param rootNode Root node of the tree.
95+
* @return The most promising node according to UCT.
96+
*/
97+
public Node getPromisingNode(Node rootNode) {
98+
Node promisingNode = rootNode;
99+
100+
// Iterate until a node that hasn't been expanded is found.
101+
while (promisingNode.childNodes.size() != 0) {
102+
double uctIndex = Double.MIN_VALUE;
103+
int nodeIndex = 0;
104+
105+
// Iterate through child nodes and pick the most promising one
106+
// using UCT (Upper Confidence bounds applied to Trees).
107+
for (int i = 0; i < promisingNode.childNodes.size(); i++) {
108+
Node childNode = promisingNode.childNodes.get(i);
109+
double uctTemp;
110+
111+
// If child node has never been visited
112+
// it will have the highest uct value.
113+
if (childNode.visitCount == 0) {
114+
nodeIndex = i;
115+
break;
116+
}
117+
118+
uctTemp = ((double) childNode.score / childNode.visitCount)
119+
+ 1.41 * Math.sqrt(Math.log(promisingNode.visitCount) / (double) childNode.visitCount);
120+
121+
if (uctTemp > uctIndex) {
122+
uctIndex = uctTemp;
123+
nodeIndex = i;
124+
}
125+
}
126+
127+
promisingNode = promisingNode.childNodes.get(nodeIndex);
128+
}
129+
130+
return promisingNode;
131+
}
132+
133+
/**
134+
* Simulates a random play from a nodes current state
135+
* and back propagates the result.
136+
*
137+
* @param promisingNode Node that will be simulated.
138+
*/
139+
public void simulateRandomPlay(Node promisingNode) {
140+
Random rand = new Random();
141+
Node tempNode = promisingNode;
142+
boolean isPlayerWinner;
143+
144+
// The following line randomly determines whether the simulated play is a win or loss.
145+
// To use the MCTS algorithm correctly this should be a simulation of the nodes' current
146+
// state of the game until it finishes (if possible) and use an evaluation function to
147+
// determine how good or bad the play was.
148+
// e.g. Play tic tac toe choosing random squares until the game ends.
149+
promisingNode.playerWon = (rand.nextInt(6) == 0);
150+
151+
isPlayerWinner = promisingNode.playerWon;
152+
153+
// Back propagation of the random play.
154+
while (tempNode != null) {
155+
tempNode.visitCount++;
156+
157+
// Add wining scores to bouth player and opponent depending on the turn.
158+
if ((tempNode.isPlayersTurn && isPlayerWinner) ||
159+
(!tempNode.isPlayersTurn && !isPlayerWinner)) {
160+
tempNode.score += WIN_SCORE;
161+
}
162+
163+
tempNode = tempNode.parent;
164+
}
165+
}
166+
167+
public Node getWinnerNode(Node rootNode) {
168+
return Collections.max(rootNode.childNodes, Comparator.comparing(c -> c.score));
169+
}
170+
171+
public void printScores(Node rootNode) {
172+
System.out.println("N.\tScore\t\tVisits");
173+
174+
for (int i = 0; i < rootNode.childNodes.size(); i++) {
175+
System.out.println(String.format("%02d\t%d\t\t%d", i + 1,
176+
rootNode.childNodes.get(i).score, rootNode.childNodes.get(i).visitCount));
177+
}
178+
}
179+
}

0 commit comments

Comments
 (0)