提问人:John Smith 提问时间:7/20/2023 更新时间:7/20/2023 访问量:99
Gomoku (Connect Five) Minimax 算法 AI 不在乎输
Gomoku (Connect Five) Minimax algorithm AI doesn't care about losing
问:
这是我上一个问题的延续。
所以,我认为我已经取得了很大的进步。人工智能现在似乎通常会做出最好的行动,并争取胜利,但我遇到的最后一个问题是它似乎并不在乎失败。也就是说,如果它有一个分叉,或者连续 4 个,它就会获胜,但如果玩家连续有 3 个(或 4 个),它不会做出阻止他们获胜的举动。HUMAN
更奇怪的是,有时如果我将深度设置为较低的值,它将防止损失,但如果我增加深度则不会。这是我当前的代码,有一个示例板,它无法找到最佳移动(以防止下一回合丢失):
const ROWS = 9;
const COLS = 9;
const LEN = 5;
const EMPTY = 0;
const HUMAN = 1;
const COMP = 2;
const WINNING_MOVE = 100000;
function checkDirection(grid, who, currChain, sRow, sCol, incRow, incCol) {
let newChain = 0;
while (currChain + newChain < LEN) {
const row = sRow + (incRow * (newChain + 1));
const col = sCol + (incCol * (newChain + 1));
if (grid[row * COLS + col] !== who) {
break;
}
newChain++;
}
return newChain;
}
function lineCheck(grid, who, sRow, sCol, mRow, mCol) {
let chain = 1;
chain += checkDirection(grid, who, 0, sRow, sCol, mRow, mCol);
chain += checkDirection(grid, who, chain, sRow, sCol, -mRow, -mCol);
return chain >= LEN;
}
function isWinningMove(grid, who, row, col) {
return lineCheck(grid, who, row, col, 1, 0) ||
lineCheck(grid, who, row, col, 0, 1) ||
lineCheck(grid, who, row, col, 1, 1) ||
lineCheck(grid, who, row, col, -1, 1);
}
function getTile(grid, row, col) {
if (row < 0 || col < 0 || row >= ROWS || col >= COLS) {
return -1;
}
return grid[row * COLS + col];
}
function hasNeighbor(board, row, col) {
if (getTile(board, row - 1, col - 1) > 0) { return true; }
if (getTile(board, row - 1, col + 1) > 0) { return true; }
if (getTile(board, row + 1, col - 1) > 0) { return true; }
if (getTile(board, row + 1, col + 1) > 0) { return true; }
if (getTile(board, row - 1, col) > 0) { return true; }
if (getTile(board, row + 1, col) > 0) { return true; }
if (getTile(board, row, col - 1) > 0) { return true; }
if (getTile(board, row, col + 1) > 0) { return true; }
return false;
}
function minimax(board, depth, alpha, beta, player, latestRow, latestCol) {
if (depth === 0) {
const val = evaluateBoard(board, latestRow, latestCol);
return [ val, latestRow * COLS + latestCol ]; // returns a pair (value, move)
}
const opponent = player === COMP ? HUMAN : COMP;
// player argument should be opponent, and return statement should be different per player
if (isWinningMove(board, opponent, latestRow, latestCol)) {
const multiplier = player === COMP ? 1 : -1;
return [ WINNING_MOVE * multiplier, latestRow * COLS + latestCol ];
}
let bestMove = -1;
if (player === COMP) {
let maxEval = Number.MIN_SAFE_INTEGER;
for (let row = 0; row < ROWS; row++) {
for (let col = 0; col < COLS; col++) {
const idx = row * COLS + col;
const tileValue = board[idx];
if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }
board[idx] = player;
const evaluation = minimax(board, depth - 1, alpha, beta, HUMAN, row, col)[0];
board[idx] = tileValue;
if (evaluation > maxEval) {
maxEval = evaluation;
bestMove = idx;
}
alpha = Math.max(alpha, evaluation);
if (beta <= alpha) {
return [ maxEval, bestMove ];
}
}
}
return [ maxEval, bestMove ];
} else {
let minEval = Number.MAX_SAFE_INTEGER;
for (let row = 0; row < ROWS; row++) {
for (let col = 0; col < COLS; col++) {
const idx = row * COLS + col;
const tileValue = board[idx];
if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }
board[idx] = player;
const evaluation = minimax(board, depth - 1, alpha, beta, COMP, row, col)[0];
board[idx] = tileValue;
if (evaluation < minEval) {
minEval = evaluation;
bestMove = idx; // Also track best move for HUMAN.
}
beta = Math.min(beta, evaluation);
if (beta <= alpha) {
return [ minEval, bestMove ];
}
}
}
return [ minEval, bestMove ];
}
}
function evaluatePlayerBoard(grid, who, latestRow, latestCol) {
let idx = 0;
let score = 0;
if (isWinningMove(grid, who, latestRow, latestCol)) {
return WINNING_MOVE;
}
for (let row = 0; row < ROWS; row++) {
for (let col = 0; col < COLS; col++) {
if (grid[idx] !== who) { idx++; continue; }
if (getTile(grid, row - 1, col - 1) === who) { score++; }
if (getTile(grid, row - 1, col + 1) === who) { score++; }
if (getTile(grid, row + 1, col - 1) === who) { score++; }
if (getTile(grid, row + 1, col + 1) === who) { score++; }
if (getTile(grid, row - 1, col) === who) { score++; }
if (getTile(grid, row + 1, col) === who) { score++; }
if (getTile(grid, row, col - 1) === who) { score++; }
if (getTile(grid, row, col + 1) === who) { score++; }
// if (getTile(grid, row, col) === who) { score++; }
idx++;
}
}
return score;
}
function evaluateBoard(grid, latestRow, latestCol) {
return evaluatePlayerBoard(grid, COMP, latestRow, latestCol) // COMP is maximizing
- evaluatePlayerBoard(grid, HUMAN, latestRow, latestCol); // HUMAN is minimizing
}
function getBestMove(board, maxDepth) {
for (let depth = 1; depth <= maxDepth; depth++) {
const [ evaluation, move ] = minimax(board, depth, Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER, COMP, -1, -1);
// if we found a winning move already, return early
// otherwise, keep iterating until we reach max depth
if (evaluation > 10000 || depth === maxDepth) {
return move;
}
}
return 0; // should never run
}
const exampleBoard = [
0, 0, 0, 0, 0, 0, 0, 0, 0, // 0-8
0, 0, 0, 0, 0, 0, 0, 0, 0, // 9-17
0, 0, 0, 0, 0, 0, 2, 0, 0, // 18-26
0, 0, 2, 2, 0, 1, 0, 0, 0, // 27-35
0, 0, 0, 2, 1, 0, 0, 0, 0, // 36-44
0, 0, 0, 1, 0, 0, 0, 0, 0, // 45-53
0, 0, 1, 0, 0, 0, 0, 0, 0, // 54-62
0, 0, 0, 0, 0, 0, 0, 0, 0, // 63-71
0, 0, 0, 0, 0, 0, 0, 0, 0, // 72-80
];
console.log(getBestMove(exampleBoard, 3));
我相信它应该是记录(以防止下一回合丢失),但它是记录64
20
答:
1赞
trincot
7/20/2023
#1
代码的这一部分存在逻辑错误:
// player argument should be opponent, and return statement should be different per player
if (isWinningMove(board, opponent, latestRow, latestCol)) {
const multiplier = player === COMP ? 1 : -1;
return [ WINNING_MOVE * multiplier, latestRow * COLS + latestCol ];
}
当输入这个区块时,我们知道是赢了,而不是赢了,所以乘数应该基于 。如果 是最大化玩家,我们应该乘以 1,否则乘以 -1。opponent
player
opponent
opponent
不是问题,但作为最佳一步返回是没有意义的,因为当前玩家没有最佳动作(他们输掉了比赛),当然也不是他们的动作,所以这无关紧要。(可以对块进行相同的评论)latestRow * COLS + latestCol
latestRow * COLS + latestCol
depth == 0
修复:
// player argument should be opponent, and return statement should be different per player
if (isWinningMove(board, opponent, latestRow, latestCol)) {
const multiplier = opponent === COMP ? 1 : -1;
return [ WINNING_MOVE * multiplier, -1 ];
}
评论
1赞
John Smith
7/20/2023
啊,多么愚蠢的错误!!非常感谢您抓住它!现在似乎效果很好!万岁!非常感谢!
0赞
John Smith
7/20/2023
哎呀,这确实有很大帮助,但出于某种原因,AI 现在只有在深度至少为 4 时才有效。知道为什么会这样吗?当深度为 1 到 3 时,您可以通过单行 5 立即获胜,计算机甚至不会试图阻止您。从好的方面来说,深度为 4 或 5 时的 AI 实际上相当不错!我尝试了很多不同的获胜方式,但都失败了
0赞
trincot
7/20/2023
我知道您希望它完美运行,但 Stack Overflow 不是展示一段代码并期望在答案中识别所有错误的正确位置。只要坚持下去,使用一个好的调试器和单元测试。如果你不能让它工作,并想依靠这个社区,那么我只能说:就你遇到的特定问题提出一个新问题。
0赞
John Smith
7/20/2023
奥基多基。感谢所有的帮助!
评论