Gomoku (Connect Five) Minimax算法没有找到明显的胜利

Gomoku (Connect Five) Minimax algorithm not finding obvious win

提问人:John Smith 提问时间:7/19/2023 最后编辑:John Smith 更新时间:7/20/2023 访问量:83

问:

我正在开发一个非常简单的连接五 (gomoku) AI,以便在 Javascript 中使用 minimax 和 alpha beta 修剪来娱乐。我刚刚在网上遵循了一些教程,但由于某种原因,我无法完全让它工作。我想我在某处有一个逻辑错误,因为在下面的代码中,如果 AI 在第二行和第三列中放置一个片段,则会出现一个分叉,但它没有被发现为 .bestMove

奇怪的是,如果我设置一个断点,该位置 () 通常被发现为 ,但无论出于何种原因,minimax 函数永远不会计算为该移动,即使我相信它显然应该是。也就是说,如果 AI 在那里放置一个棋子,它基本上是下一回合的直接胜利,因为它会导致多个方向的胜利。row: 1, col: 2winningMovebestMove

也就是说,如果 AI 在白色 2 上移动,那么在下一个 AI 移动中可以有垂直获胜或水平获胜,因为人类只能阻止其中之一:

enter image description here

const ROWS = 9;
const COLS = 9;
const LEN = 5;

const EMPTY = 0;
const HUMAN = 1;
const COMP = 2;

function checkDirection(grid, who, currChain, sRow, sCol, incRow, incCol) {
  let newChain = 0;

  while (currChain + newChain < LEN) {
    const row = sRow + (incRow * (newChain + 1));
    const col = sCol + (incCol * (newChain + 1));

    if (grid[row * COLS + col] !== who) {
      break;
    }
    
    newChain++;
  }

  return newChain;
}

function lineCheck(grid, who, sRow, sCol, mRow, mCol) {
  let chain = 1;

  chain += checkDirection(grid, who, 0, sRow, sCol, mRow, mCol);
  chain += checkDirection(grid, who, chain, sRow, sCol, -mRow, -mCol);

  return chain >= LEN;
}

function isWinningMove(grid, who, row, col) {    
  return lineCheck(grid, who, row, col, 1, 0) || 
         lineCheck(grid, who, row, col, 0, 1) || 
         lineCheck(grid, who, row, col, 1, 1) || 
         lineCheck(grid, who, row, col, -1, 1);
}

function getTile(grid, row, col) {
  if (row < 0 || col < 0 || row >= ROWS || col >= COLS) {
    return -1;
  }

  return grid[row * COLS + col];
}

function hasNeighbor(board, row, col) {
  if (getTile(board, row - 1, col - 1) > 0) { return true; }
  if (getTile(board, row - 1, col + 1) > 0) { return true; }
  if (getTile(board, row + 1, col - 1) > 0) { return true; }
  if (getTile(board, row + 1, col + 1) > 0) { return true; }

  if (getTile(board, row - 1, col) > 0) { return true; }
  if (getTile(board, row + 1, col) > 0) { return true; }

  if (getTile(board, row, col - 1) > 0) { return true; }
  if (getTile(board, row, col + 1) > 0) { return true; }

  return false;
}

let bestMove = Number.MIN_SAFE_INTEGER;

function minimax(board, depth, alpha, beta, player, latestRow, latestCol) {
  if (depth === 0) {
    return evaluatePlayerBoard(board, player, player === COMP ? HUMAN : COMP, latestRow, latestCol);
  }

  if (isWinningMove(board, player, latestRow, latestCol)) {
    return 1000000;
  }

  if (player === COMP) {
    let maxEval = Number.MIN_SAFE_INTEGER;

    for (let row = 0; row < ROWS; row++) {
      for (let col = 0; col < COLS; col++) {
        const idx = row * COLS + col;
        const tileValue = board[idx];

        if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }

        board[idx] = player;
        const evaluation = minimax(board, depth - 1, alpha, beta, HUMAN, row, col);
        board[idx] = tileValue;

        if (evaluation > maxEval) {
          maxEval = evaluation;
          bestMove = idx;          
        }

        alpha = Math.max(alpha, evaluation);

        if (beta <= alpha) {
          return maxEval;
        }
      }
    }

    return maxEval;
  } else {
    let minEval = Number.MAX_SAFE_INTEGER;

    for (let row = 0; row < ROWS; row++) {
      for (let col = 0; col < COLS; col++) {
        const idx = row * COLS + col;
        const tileValue = board[idx];

        if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }

        board[idx] = player;
        const evaluation = minimax(board, depth - 1, alpha, beta, COMP, row, col);
        board[idx] = tileValue;

        if (evaluation < minEval) {
          minEval = evaluation;
        }

        beta = Math.min(beta, evaluation);

        if (beta <= alpha) {
          return minEval;
        }
      }
    }

    return minEval;
  }
}

function evaluatePlayerBoard(grid, who, latestRow, latestCol) {
  let idx = 0;
  let score = 0;

  if (isWinningMove(grid, who, latestRow, latestCol)) {
    return 1000000;
  }

  for (let row = 0; row < ROWS; row++) {
    for (let col = 0; col < COLS; col++) {
      if (grid[idx] !== who) { idx++; continue; }

      if (getTile(grid, row - 1, col - 1) === who) { score++; }
      if (getTile(grid, row - 1, col + 1) === who) { score++; }
      if (getTile(grid, row + 1, col - 1) === who) { score++; }
      if (getTile(grid, row + 1, col + 1) === who) { score++; }

      if (getTile(grid, row - 1, col) === who) { score++; }
      if (getTile(grid, row + 1, col) === who) { score++; }

      if (getTile(grid, row, col - 1) === who) { score++; }
      if (getTile(grid, row, col + 1) === who) { score++; }
      
      if (getTile(grid, row, col) === who) { score++; }

      idx++;
    } 
  }

  return score;
}

function evaluateBoard(grid, you, opponent, latestRow, latestCol) {
  return evaluatePlayerBoard(grid, you, latestRow, latestCol) - evaluatePlayerBoard(grid, opponent, latestRow, latestCol);
}

const exampleBoard = [
  0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 2, 0, 2, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 2, 0, 0, 0, 0, 0, 0,
  0, 0, 2, 0, 0, 0, 0, 0, 0,
  0, 0, 2, 0, 0, 0, 0, 0, 0,
  0, 0, 1, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0,
];

minimax(exampleBoard, 2, Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER, COMP, -1, -1);

console.log(bestMove);

您可以运行上面的代码段,并看到它被记录而不是 ,即使这显然是更好的举动,因为它会导致立即获胜。201111

JavaScript 人工智能 Minimax Gomoku

评论

0赞 Ouroborus 7/19/2023
在猜测中,在搜索时,它不会计算给定移动可以赢得多少场胜利,只是计算获胜是可能的。
0赞 John Smith 7/19/2023
@Ouroborus这是正确性/正常工作的必要条件?我只是假设,既然它会看到将棋子放在索引 11 将导致保证获胜,即使他们试图阻止,这将是最高价值的选项。这难道不比一场可能被阻止的胜利更“有价值”吗?
0赞 Ouroborus 7/19/2023
我认为这在一定程度上取决于深度的设置。
0赞 John Smith 7/19/2023
好吧,即使我将深度设置为 6(这个胜利条件应该只需要深度为 2,对吧?),它似乎仍然没有找到它
0赞 Ouroborus 7/19/2023
胜利发生在第三步。双方的排名都计入其中。不知道为什么找不到 6。

答:

1赞 trincot 7/20/2023 #1

有几个问题:

  • 通过传递 2 个 player 参数,而函数只需要一个 player 参数。因此,参数被函数误解,结果是无用的。const val = evaluatePlayerBoard(board, player, player === COMP ? HUMAN : COMP, latestRow, latestCol);

  • 可能与上述内容有关:您有一个从未被调用的函数,但它确实需要两个 player 参数。我猜你打算从 .evaluateBoardminimax

  • 仍然返回一个相对于第一个玩家参数的分数(正数更好),但由于你有一个最大化玩家和最小化玩家,分数的符号不应该由这个函数的参数动态确定,而是以这样一种方式“硬编码”,即 COMP 总是得到正分,而 HUMAN 得到负分。所以实际上应该根本没有玩家参数,只需调用 with as 参数,第二次调用 as 参数。evaluateBoardevaluateBoardevaluatePlayerBoardCOMPHUMAN

  • minimax使用错误的 player 参数进行调用。应该是对手下了最后一步棋,因为这是作为参数传递的棋。isWinningMove

  • 从 2 开始,您只允许 COMP 的移动和 HUMAN 的返回移动。然后你评估这个位置。到那时还没有赢。您应该从至少 3 开始depthdepth

  • 作为一个全局变量,你有时会得到 COMP 更深移动的最佳移动,因为无论深度是多少,你都会覆盖它。但这种更深层次的举动并不是你想要识别的举动。最佳做法是不要为此使用全局变量。相反,make 返回找到的作为相应的移动。您可以将两者组合到一个数组(或普通对象)中,如下所示: 。这意味着你必须在几个地方更改你的代码:所有语句现在都应该返回一个数组,所有调用都应该期待一个数组作为返回值,并选择它们感兴趣的部分(值或移动)。bestMoveminimaxreturn [maxEval, bestMove]returnminimaxminimax

  • 当看到深度为零,并通过调用检测到胜利时,它总是返回 1000000,但如果最后一步是由 HUMAN 进行的,它应该返回 -1000000。因此,将此逻辑移动到两个块中。minimaxisWinningMoveifelse

  • 问题不大,但它只需要再增加一行,这样也可以返回初始调用者的最佳移动。我只会添加它。minimaxHUMAN

下面是代码的更正版本:

const ROWS = 9;
const COLS = 9;
const LEN = 5;

const EMPTY = 0;
const HUMAN = 1;
const COMP = 2;

function checkDirection(grid, who, currChain, sRow, sCol, incRow, incCol) {
  let newChain = 0;

  while (currChain + newChain < LEN) {
    const row = sRow + (incRow * (newChain + 1));
    const col = sCol + (incCol * (newChain + 1));

    if (grid[row * COLS + col] !== who) {
      break;
    }
    
    newChain++;
  }

  return newChain;
}

function lineCheck(grid, who, sRow, sCol, mRow, mCol) {
  let chain = 1;

  chain += checkDirection(grid, who, 0, sRow, sCol, mRow, mCol);
  chain += checkDirection(grid, who, chain, sRow, sCol, -mRow, -mCol);

  return chain >= LEN;
}

function isWinningMove(grid, who, row, col) {    
  return lineCheck(grid, who, row, col, 1, 0) || 
         lineCheck(grid, who, row, col, 0, 1) || 
         lineCheck(grid, who, row, col, 1, 1) || 
         lineCheck(grid, who, row, col, -1, 1);
}

function getTile(grid, row, col) {
  if (row < 0 || col < 0 || row >= ROWS || col >= COLS) {
    return -1;
  }

  return grid[row * COLS + col];
}

function hasNeighbor(board, row, col) {
  if (getTile(board, row - 1, col - 1) > 0) { return true; }
  if (getTile(board, row - 1, col + 1) > 0) { return true; }
  if (getTile(board, row + 1, col - 1) > 0) { return true; }
  if (getTile(board, row + 1, col + 1) > 0) { return true; }

  if (getTile(board, row - 1, col) > 0) { return true; }
  if (getTile(board, row + 1, col) > 0) { return true; }

  if (getTile(board, row, col - 1) > 0) { return true; }
  if (getTile(board, row, col + 1) > 0) { return true; }

  return false;
}

// Removed global bestMove definition from here

function minimax(board, depth, alpha, beta, player, latestRow, latestCol) {
  if (depth === 0) {
    // Fixed: Call different function and don't pass player arguments:
    const val = evaluateBoard(board, latestRow, latestCol);
    return [val, -1]; // Now returns a pair (value, move)
  }

  const opponent = player === COMP ? HUMAN : COMP; // Moved this expression here
  let bestMove = -1; // Is now a local variable -- not a global.
  if (player === COMP) {
    // Fixed: player argument should be opponent, and return statement should be different per player
    if (isWinningMove(board, opponent, latestRow, latestCol)) {
      // Minimax returns an array now: value, move
      return [1000000, -1]; // Positive for COMP, negative for HUMAN.
    }
    let maxEval = Number.MIN_SAFE_INTEGER;

    for (let row = 0; row < ROWS; row++) {
      for (let col = 0; col < COLS; col++) {
        const idx = row * COLS + col;
        const tileValue = board[idx];

        if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }

        board[idx] = player;
        // As minimax returns an array now, get the first element of it
        const evaluation = minimax(board, depth - 1, alpha, beta, HUMAN, row, col)[0];
        board[idx] = tileValue;

        if (evaluation > maxEval) {
          maxEval = evaluation;
          bestMove = idx;        
        }

        alpha = Math.max(alpha, evaluation);

        if (beta <= alpha) {
          // Minimax returns an array now: value, move
          return [maxEval, bestMove];
        }
      }
    }
    // Minimax returns an array now: value, move
    return [maxEval, bestMove];
  } else {
    // Fixed: player argument should be opponent, and return statement should be different per player
    if (isWinningMove(board, opponent, latestRow, latestCol)) {
      // Minimax returns an array now: value, move
      return [-1000000, -1]; // Positive for COMP, negative for HUMAN.
    }
    let minEval = Number.MAX_SAFE_INTEGER;

    for (let row = 0; row < ROWS; row++) {
      for (let col = 0; col < COLS; col++) {
        const idx = row * COLS + col;
        const tileValue = board[idx];

        if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }

        board[idx] = player;
        // As minimax returns an array now, get the first element of it
        const evaluation = minimax(board, depth - 1, alpha, beta, COMP, row, col)[0];
        board[idx] = tileValue;

        if (evaluation < minEval) {
          minEval = evaluation;
          bestMove = idx; // Also track best move for HUMAN.
        }

        beta = Math.min(beta, evaluation);

        if (beta <= alpha) {
          // Minimax returns an array now: value, move
          return [minEval, bestMove];
        }
      }
    }
    // Minimax returns an array now: value, move
    return [minEval, bestMove];
  }
}

function evaluatePlayerBoard(grid, who, latestRow, latestCol) {
  let idx = 0;
  let score = 0;
  if (isWinningMove(grid, who, latestRow, latestCol)) {
    return 1000000;
  }
  for (let row = 0; row < ROWS; row++) {
    for (let col = 0; col < COLS; col++) {
      if (grid[idx] !== who) { idx++; continue; }

      if (getTile(grid, row - 1, col - 1) === who) { score++; }
      if (getTile(grid, row - 1, col + 1) === who) { score++; }
      if (getTile(grid, row + 1, col - 1) === who) { score++; }
      if (getTile(grid, row + 1, col + 1) === who) { score++; }

      if (getTile(grid, row - 1, col) === who) { score++; }
      if (getTile(grid, row + 1, col) === who) { score++; }

      if (getTile(grid, row, col - 1) === who) { score++; }
      if (getTile(grid, row, col + 1) === who) { score++; }
      
      if (getTile(grid, row, col) === who) { score++; }

      idx++;
    } 
  }
  return score;
}

function evaluateBoard(grid, latestRow, latestCol) { // Removed player paramaters
  // Hardcoded the player arguments, as COMP is maximizing, and HUMAN is minimizing.
  return evaluatePlayerBoard(grid, COMP, latestRow, latestCol) 
       - evaluatePlayerBoard(grid, HUMAN, latestRow, latestCol);
}

const exampleBoard = [
  0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 2, 0, 2, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 2, 0, 0, 0, 0, 0, 0,
  0, 0, 2, 0, 0, 0, 0, 0, 0,
  0, 0, 2, 0, 0, 0, 0, 0, 0,
  0, 0, 1, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0,
];

// The depth should be set at least at 3 to detect the win.
// As minimax returns an array now, get the move part of it (at index 1)
const bestMove = minimax(exampleBoard, 3, Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER, COMP, -1, -1)[1];    
console.log(bestMove); // 11

免责声明:我只验证了您的代码以解决您提出的问题。可能还需要解决其他问题。

评论

1赞 John Smith 7/20/2023
哦,我非常感谢你!!这太有帮助了,你不知道。我真的被困在了这一点上,事后看来,我在尝试各种不同的事情时犯了一些愚蠢的错误,但你也抓住了一堆会难倒我一段时间的事情。谢谢,谢谢,谢谢,谢谢,谢谢!!
0赞 John Smith 7/20/2023
嗨,我玩得更多了,我注意到如果你将深度从 3 设置为 5,它会将最佳移动从 更改为 ,但我认为这是一个较弱的移动,因为它不会立即获胜?你知道为什么会发生这种情况吗?我以为,如果越深入,它会做出更好的举动,不是吗?112121
0赞 trincot 7/20/2023
发生这种情况是因为代码中没有逻辑可以优先于快速获胜而不是较慢的胜利。您可以通过减少从递归调用返回的值的绝对值来实现这一目标。这要求您在绝对值中有足够的空间来减小它,因此您必须避免分数仅为 1。因此,也许可以计算出 10 的倍数。
0赞 John Smith 7/20/2023
感谢您的回复!!你认为我可以乘以吗?那行得通吗?因为那样速赢会更有价值吗?就像做退货而不是1000000(depth + 1)[1000000 * (depth + 1), -1];return [1000000, -1];
1赞 John Smith 7/20/2023
啊,好吧,对不起。好的,谢谢!我会调查还有什么可能出错的地方,然后如果我不太明白,可能会在几个小时内发布一个新问题。