Gomoku (Connect Five) Minimax 算法 AI 不在乎输

Gomoku (Connect Five) Minimax algorithm AI doesn't care about losing

提问人:John Smith 提问时间:7/20/2023 更新时间:7/20/2023 访问量:99

问:

这是我上一个问题的延续。

所以,我认为我已经取得了很大的进步。人工智能现在似乎通常会做出最好的行动,并争取胜利,但我遇到的最后一个问题是它似乎并不在乎失败。也就是说,如果它有一个分叉,或者连续 4 个,它就会获胜,但如果玩家连续有 3 个(或 4 个),它不会做出阻止他们获胜的举动。HUMAN

更奇怪的是,有时如果我将深度设置为较低的值,它将防止损失,但如果我增加深度则不会。这是我当前的代码,有一个示例板,它无法找到最佳移动(以防止下一回合丢失):

const ROWS = 9;
const COLS = 9;
const LEN = 5;

const EMPTY = 0;
const HUMAN = 1;
const COMP = 2;

const WINNING_MOVE = 100000;

function checkDirection(grid, who, currChain, sRow, sCol, incRow, incCol) {
  let newChain = 0;

  while (currChain + newChain < LEN) {
    const row = sRow + (incRow * (newChain + 1));
    const col = sCol + (incCol * (newChain + 1));

    if (grid[row * COLS + col] !== who) {
      break;
    }
    
    newChain++;
  }

  return newChain;
}

function lineCheck(grid, who, sRow, sCol, mRow, mCol) {
  let chain = 1;

  chain += checkDirection(grid, who, 0, sRow, sCol, mRow, mCol);
  chain += checkDirection(grid, who, chain, sRow, sCol, -mRow, -mCol);

  return chain >= LEN;
}

function isWinningMove(grid, who, row, col) {    
  return lineCheck(grid, who, row, col, 1, 0) || 
         lineCheck(grid, who, row, col, 0, 1) || 
         lineCheck(grid, who, row, col, 1, 1) || 
         lineCheck(grid, who, row, col, -1, 1);
}

function getTile(grid, row, col) {
  if (row < 0 || col < 0 || row >= ROWS || col >= COLS) {
    return -1;
  }

  return grid[row * COLS + col];
}

function hasNeighbor(board, row, col) {
  if (getTile(board, row - 1, col - 1) > 0) { return true; }
  if (getTile(board, row - 1, col + 1) > 0) { return true; }
  if (getTile(board, row + 1, col - 1) > 0) { return true; }
  if (getTile(board, row + 1, col + 1) > 0) { return true; }

  if (getTile(board, row - 1, col) > 0) { return true; }
  if (getTile(board, row + 1, col) > 0) { return true; }

  if (getTile(board, row, col - 1) > 0) { return true; }
  if (getTile(board, row, col + 1) > 0) { return true; }

  return false;
}

function minimax(board, depth, alpha, beta, player, latestRow, latestCol) {
  if (depth === 0) {
    const val = evaluateBoard(board, latestRow, latestCol);

    return [ val, latestRow * COLS + latestCol ]; // returns a pair (value, move)
  }

  const opponent = player === COMP ? HUMAN : COMP;

  // player argument should be opponent, and return statement should be different per player
  if (isWinningMove(board, opponent, latestRow, latestCol)) {
    const multiplier = player === COMP ? 1 : -1;

    return [ WINNING_MOVE * multiplier, latestRow * COLS + latestCol ];
  }

  let bestMove = -1;

  if (player === COMP) {
    let maxEval = Number.MIN_SAFE_INTEGER;

    for (let row = 0; row < ROWS; row++) {
      for (let col = 0; col < COLS; col++) {
        const idx = row * COLS + col;
        const tileValue = board[idx];

        if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }

        board[idx] = player;
        const evaluation = minimax(board, depth - 1, alpha, beta, HUMAN, row, col)[0];
        board[idx] = tileValue;

        if (evaluation > maxEval) {
          maxEval = evaluation;
          bestMove = idx;
        }

        alpha = Math.max(alpha, evaluation);

        if (beta <= alpha) {
          return [ maxEval, bestMove ];
        }
      }
    }

    return [ maxEval, bestMove ];
  } else {
    let minEval = Number.MAX_SAFE_INTEGER;

    for (let row = 0; row < ROWS; row++) {
      for (let col = 0; col < COLS; col++) {
        const idx = row * COLS + col;
        const tileValue = board[idx];

        if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }

        board[idx] = player;
        const evaluation = minimax(board, depth - 1, alpha, beta, COMP, row, col)[0];
        board[idx] = tileValue;

        if (evaluation < minEval) {
          minEval = evaluation;
          bestMove = idx; // Also track best move for HUMAN.
        }

        beta = Math.min(beta, evaluation);

        if (beta <= alpha) {
          return [ minEval, bestMove ];
        }
      }
    }

    return [ minEval, bestMove ];
  }
}

function evaluatePlayerBoard(grid, who, latestRow, latestCol) {
  let idx = 0;
  let score = 0;

  if (isWinningMove(grid, who, latestRow, latestCol)) {
    return WINNING_MOVE;
  }

  for (let row = 0; row < ROWS; row++) {
    for (let col = 0; col < COLS; col++) {
      if (grid[idx] !== who) { idx++; continue; }

      if (getTile(grid, row - 1, col - 1) === who) { score++; }
      if (getTile(grid, row - 1, col + 1) === who) { score++; }
      if (getTile(grid, row + 1, col - 1) === who) { score++; }
      if (getTile(grid, row + 1, col + 1) === who) { score++; }

      if (getTile(grid, row - 1, col) === who) { score++; }
      if (getTile(grid, row + 1, col) === who) { score++; }

      if (getTile(grid, row, col - 1) === who) { score++; }
      if (getTile(grid, row, col + 1) === who) { score++; }
      
      // if (getTile(grid, row, col) === who) { score++; }

      idx++;
    } 
  }

  return score;
}

function evaluateBoard(grid, latestRow, latestCol) {
  return evaluatePlayerBoard(grid, COMP, latestRow, latestCol) // COMP is maximizing
       - evaluatePlayerBoard(grid, HUMAN, latestRow, latestCol); // HUMAN is minimizing
}

function getBestMove(board, maxDepth) {
  for (let depth = 1; depth <= maxDepth; depth++) {
    const [ evaluation, move ] = minimax(board, depth, Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER, COMP, -1, -1);

    // if we found a winning move already, return early
    // otherwise, keep iterating until we reach max depth
    if (evaluation > 10000 || depth === maxDepth) {
      return move;
    }
  }

  return 0; // should never run
}

const exampleBoard = [
  0, 0, 0, 0, 0, 0, 0, 0, 0, // 0-8
  0, 0, 0, 0, 0, 0, 0, 0, 0, // 9-17
  0, 0, 0, 0, 0, 0, 2, 0, 0, // 18-26
  0, 0, 2, 2, 0, 1, 0, 0, 0, // 27-35
  0, 0, 0, 2, 1, 0, 0, 0, 0, // 36-44
  0, 0, 0, 1, 0, 0, 0, 0, 0, // 45-53
  0, 0, 1, 0, 0, 0, 0, 0, 0, // 54-62
  0, 0, 0, 0, 0, 0, 0, 0, 0, // 63-71
  0, 0, 0, 0, 0, 0, 0, 0, 0, // 72-80
];

console.log(getBestMove(exampleBoard, 3));

我相信它应该是记录(以防止下一回合丢失),但它是记录6420

JavaScript 人工智能 Minimax Gomoku

评论


答:

1赞 trincot 7/20/2023 #1

代码的这一部分存在逻辑错误:

  // player argument should be opponent, and return statement should be different per player
  if (isWinningMove(board, opponent, latestRow, latestCol)) {
    const multiplier = player === COMP ? 1 : -1;

    return [ WINNING_MOVE * multiplier, latestRow * COLS + latestCol ];
  }

当输入这个区块时,我们知道是赢了,而不是赢了,所以乘数应该基于 。如果 是最大化玩家,我们应该乘以 1,否则乘以 -1。opponentplayeropponentopponent

不是问题,但作为最佳一步返回是没有意义的,因为当前玩家没有最佳动作(他们输掉了比赛),当然也不是他们的动作,所以这无关紧要。(可以对块进行相同的评论)latestRow * COLS + latestCollatestRow * COLS + latestColdepth == 0

修复:

  // player argument should be opponent, and return statement should be different per player
  if (isWinningMove(board, opponent, latestRow, latestCol)) {
    const multiplier = opponent === COMP ? 1 : -1;

    return [ WINNING_MOVE * multiplier, -1 ];
  }

评论

1赞 John Smith 7/20/2023
啊,多么愚蠢的错误!!非常感谢您抓住它!现在似乎效果很好!万岁!非常感谢!
0赞 John Smith 7/20/2023
哎呀,这确实有很大帮助,但出于某种原因,AI 现在只有在深度至少为 4 时才有效。知道为什么会这样吗?当深度为 1 到 3 时,您可以通过单行 5 立即获胜,计算机甚至不会试图阻止您。从好的方面来说,深度为 4 或 5 时的 AI 实际上相当不错!我尝试了很多不同的获胜方式,但都失败了
0赞 trincot 7/20/2023
我知道您希望它完美运行,但 Stack Overflow 不是展示一段代码并期望在答案中识别所有错误的正确位置。只要坚持下去,使用一个好的调试器和单元测试。如果你不能让它工作,并想依靠这个社区,那么我只能说:就你遇到的特定问题提出一个新问题。
0赞 John Smith 7/20/2023
奥基多基。感谢所有的帮助!