Where's error in implementation of minimax alg

2019-09-13 10:07发布

问题:

There is one little problem with it's implementation for a Tic-Tac-Toe game. For the following combination:

['x', 'o', 'e',  
 'o', ' e', 'e',  
 'e', ' e', 'e']  

the best choice would be

['x', 'o', 'e',  
 'o', ' x', 'e',  
 'e', ' e', 'e']  

but it returns as I suppose the nearest suitable one:

['x', 'o', 'x',  
 'o', ' e', 'e',  
 'e', ' e', 'e']   

And in this case AI loses. Here is the code:

var board = ['x', 'o', 'e', 'o', 'e', 'e', 'e', 'e', 'e'];
var signPlayer = 'o';
var signAI = (signPlayer === 'x') ? 'o' : 'x';

game = {
    over: function(board) {
        for (var i = 0; i < board.length; i += 3) {
            if (board[i] === board[i + 1] && board[i + 1] === board[i + 2]) {
                return board[i] !== 'e' ? board[i] : false;
            }
        }
        for (var j = 0; j < 3; j++) {
            if (board[j] === board[j + 3] && board[j + 3] === board[j + 6]) {
                return board[j] !== 'e' ? board[j] : false;
            }
        }
        if ((board[4] === board[0] && board[4] === board[8]) || 
        (board[4] === board[2] && board[4] === board[6])) {
            return board[4] !== 'e' ? board[4] : false;
        }
        return ( board.every(function(element) {
            return element !== 'e';
        })) 
    },
    winner: function(board) {
        return game.over(board);
    },
    possible_moves: function(board, sign) {
        var testBoard = [], 
        nextBoard;
        for (var i = 0; i < board.length; i++) {
            nextBoard = board.slice();
            if (nextBoard[i] === 'e') {
                nextBoard[i] = sign;
                testBoard.push(nextBoard);
            }
        }
        return testBoard;
    }
}

function moveScore(board) {
    var result = game.winner(board);

    if (result === signPlayer) {
        return -100;
    }
    if (result === signAI) {
        return +100;
    }
    return 0;
    //Game is a draw
}

function max(board) {

    if (game.over(board)) {
        return board;
    }
    var newGame = [];
    var bestMove = [];
    var score;
    var best_score = -Infinity;
    var movesArray = game.possible_moves(board, signAI);

    for (var i = 0; i < movesArray.length; i++) {
        newGame = movesArray[i].slice();
        score = moveScore(min(newGame));
        if (score > best_score) {
            best_score = score;
            bestMove = newGame;
        }
    }
    return bestMove;
}

function min(board) {

    if (game.over(board)) {
        return board;
    }
    var newGame = [];
    var worstMove = [];
    var score;
    var worst_score = +Infinity;
    var movesArray = game.possible_moves(board, signPlayer);

    for (var i = 0; i < movesArray.length; i++) {
        newGame = movesArray[i].slice();
        score = moveScore(max(newGame));
        if (score < worst_score) {
            worst_score = score;
            worstMove = newGame;
        }
    }
    return worstMove;
}

max(board);

回答1:

There are the following issues:

  • The over method gives wrong output for some boards, like for instance for this board:

    ['e', 'e', 'e', 'o', 'o', 'o', 'x', 'x', 'e']
    

    It will actually stop looking after it finds the three e values in the first three elements and return false, i.e. it does not see the win on the second row for o. To fix, change this line:

    return board[i] !== 'e' ? board[i] : false;
    

    to:

    if (board[i] !== 'e') return board[i];
    

    This will make the function continue with the other checks if it finds three e in a row. Similar fixes are necessary in the other loops (except the very last one).

  • Although the minimax algorithm visits the nodes in the search tree succesfully, it does not carry the found leaf-score (0, -100 or 100) back up in the search tree. Instead you recalculate each position's score by just looking at a static board configuration, ignoring the best/worst score you could get from the recursive call. To fix this, let the min and max function not only return the best move, but also the score associated with it. So replace this:

    return bestMove;
    

    with:

    return [best_score, bestMove];
    

    And then you pick up the score from the recursive call, if you replace this:

    score = moveScore(min(newGame));
    

    with:

    score = min(newGame)[0];
    

    You need to do a similar change for the case where the game is over. Replace this:

    if (game.over(board)) {
        return board;
    }
    

    with:

    if (game.over(board)) {
        return [moveScore(board), []];
    }
    

    Note that this is the only right moment to call moveScore. The second element of the array should be the best move, but as there is no move, it is better to just use an empty array for that.

  • This is a minor issue: you don't actually use the best move you get from the main call to max. With the above change, you could get both the best move and its score in the main call:

    [score, nextBoard] = max(board);
    

Here is your corrected code, with some additional code at the end to allow a game to be played by clicking on a 3x3 grid. For that purpose I have changed the code e to a space, as it looks better on a printed board:

var board = ['x', 'o', ' ', 'o', ' ', ' ', ' ', ' ', ' '];
var signPlayer = 'o';
var signAI = (signPlayer === 'x') ? 'o' : 'x';

var game = {
    over: function(board) {
        for (var i = 0; i < board.length; i += 3) {
            if (board[i] === board[i + 1] && board[i + 1] === board[i + 2]) {
                //return board[i] !== ' ' ? board[i] : false;
                if (board[i] !== ' ') return board[i];
            }
        }
        for (var j = 0; j < 3; j++) {
            if (board[j] === board[j + 3] && board[j + 3] === board[j + 6]) {
                //return board[j] !== ' ' ? board[j] : false;
                if (board[j] !== ' ') return board[j];
            }
        }
        if ((board[4] === board[0] && board[4] === board[8]) || 
                (board[4] === board[2] && board[4] === board[6])) {
            //return board[4] !== ' ' ? board[4] : false;
            if (board[4] !== ' ') return board[4];
        }
        return ( board.every(function(element) {
            return element !== ' ';
        })) 
    },
    winner: function(board) {
        return game.over(board);
    },
    possible_moves: function(board, sign) {
        var testBoard = [], 
        nextBoard;
        for (var i = 0; i < board.length; i++) {
            nextBoard = board.slice();
            if (nextBoard[i] === ' ') {
                nextBoard[i] = sign;
                testBoard.push(nextBoard);
            }
        }
        return testBoard;
    }
}

function moveScore(board) {
    var result = game.winner(board);

    if (result === signPlayer) {
        return -100;
    }
    if (result === signAI) {
        return +100;
    }
    return 0;
    //Game is a draw
}

function max(board) {
    //if (game.over(board)) {
    //    return board;
    //}
    if (game.over(board)) {
        return [moveScore(board), []];
    }
    var newGame = [];
    var bestMove = [];
    var score;
    var best_score = -Infinity;
    var movesArray = game.possible_moves(board, signAI);
    for (var i = 0; i < movesArray.length; i++) {
        newGame = movesArray[i].slice();
        //score = moveScore(min(newGame));
        score = min(newGame)[0];
        if (score > best_score) {
            best_score = score;
            bestMove = newGame;
        }
    }
    //return bestMove;
    return [best_score, bestMove];
}

function min(board) {
    //if (game.over(board)) {
    //    return board;
    //}
    if (game.over(board)) {
        return [moveScore(board), []];
    }
    var newGame = [];
    var worstMove = [];
    var score;
    var worst_score = +Infinity;
    var movesArray = game.possible_moves(board, signPlayer);

    for (var i = 0; i < movesArray.length; i++) {
        newGame = movesArray[i].slice();
        //score = moveScore(max(newGame));
        score = max(newGame)[0];
        if (score < worst_score) {
            worst_score = score;
            worstMove = newGame;
        }
    }
    //return worstMove;
    return [worst_score, worstMove];
}

// Extra code for adding a simple GUI

var board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '];
var score = null;

var tds = Array.from(document.querySelectorAll('td'));
var table = document.querySelector('table');
var span = document.querySelector('span');

function display(board, score) {
    board.forEach( (v, i) => tds[i].textContent = v );
    span.textContent = score;
}
display(board);

table.onclick = function (e) {
    var i = tds.indexOf(e.target);
    if (i == -1 || board[i] !== ' ' || game.over(board)) return;
    board[i] = signPlayer;
    display(board);
    [score, board] = max(board, 1);
    display(board, score);
}
td { border: 1px solid; width: 20px; text-align: center; cursor: hand }
tr { height: 25px; v-align: middle }
<table>
<tr><td></td><td></td><td></td></tr>
<tr><td></td><td></td><td></td></tr>
<tr><td></td><td></td><td></td></tr>
</table>
<div>
Score: <span></span>
</div>

Final note

I have just made the corrections to make it work, but note there are several ways to improve the efficiency. This you can do by using an alpha-beta search, tracking scores for already evaluated boards, while mapping similar boards by translations (turning, mirroring), and mutating boards instead of creating a new board each time you play a move.