Skip to content

Commit c71cdef

Browse files
committed
Merge branch 'global_ab_3' into global_ab_4
2 parents c90db88 + db6861b commit c71cdef

File tree

2 files changed

+121
-58
lines changed

2 files changed

+121
-58
lines changed

src/AI/AlphaBeta.hs

+118-58
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,15 @@ scoreMove (ScoreMoveInput {..}) = do
9090
let AlphaBeta params rules eval = smiAi
9191
score <- Monitoring.timed "ai.score.move" $ do
9292
let board' = applyMoveActions (pmResult smiMove) smiBoard
93-
score <- doScore rules eval smiCache params smiGameId (opposite smiSide) smiDepth board' smiAlpha smiBeta
93+
score <- doScore rules eval smiCache params smiGameId (opposite smiSide) smiDepth board' smiGlobalInterval smiAlpha smiBeta
9494
`catchError` (\(e :: Error) -> do
9595
$info "doScore: move {}, depth {}: {}" (show smiMove, dpTarget smiDepth, show e)
9696
throwError e
9797
)
9898
$info "Check: {} ([{} - {}], depth {}) => {}" (show smiMove, show smiAlpha, show smiBeta, dpTarget smiDepth, show score)
9999
return score
100100

101+
restrictInterval smiGlobalInterval smiSide score
101102
return (smiMove, score)
102103

103104
scoreMoveGroup :: (GameRules rules, Evaluator eval) => [ScoreMoveInput rules eval] -> Checkers [(PossibleMove, Score)]
@@ -373,7 +374,8 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
373374
, dpReductedMode = False
374375
}
375376
$info "Preselecting; number of possible moves = {}, depth = {}" (length moves, dpTarget simple)
376-
options <- scoreMoves' False moves simple (loose, win)
377+
globalInterval <- liftIO $ atomically $ newTVar (loose, win)
378+
options <- scoreMoves' False moves simple globalInterval (loose, win)
377379
let key = if maximize
378380
then negate . snd
379381
else snd
@@ -385,14 +387,16 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
385387
-- return result
386388

387389
depthDriver :: [PossibleMove] -> Checkers DepthIterationOutput
388-
depthDriver moves =
390+
depthDriver moves = do
391+
globalInterval <- liftIO $ atomically $ newTVar (loose, win)
389392
let input = DepthIterationInput {
390393
diiParams = params,
391394
diiMoves = moves,
392395
diiPrevResult = Nothing,
396+
diiGlobalInterval = globalInterval,
393397
diiSortKeys = Nothing
394398
}
395-
in case abBaseTime params of
399+
case abBaseTime params of
396400
Nothing -> do
397401
(result, _) <- go input
398402
return result
@@ -445,7 +449,7 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
445449
Just keys -> return keys
446450
Nothing -> preselect preselectDepth diiMoves
447451
let sortedMoves = map snd $ sortOn fst $ zip sortKeys diiMoves
448-
result <- widthController True True diiPrevResult sortedMoves dp' =<< initInterval
452+
result <- widthController True True diiPrevResult sortedMoves dp' diiGlobalInterval =<< initInterval
449453
-- In some corner cases, there might be 1 or 2 possible moves,
450454
-- so the timeout would allow us to calculate with very big depth;
451455
-- too big depth does not decide anything in such situations.
@@ -493,22 +497,22 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
493497
| s > 100 = 5
494498
| otherwise = 2
495499

496-
nextInterval :: (Score, Score) -> (Score, Score)
497-
nextInterval (alpha, beta) =
500+
nextInterval :: Score -> (Score, Score) -> (Score, Score)
501+
nextInterval good (alpha, beta) =
498502
let width = (beta - alpha)
499503
width' = selectScale width `scaleScore` width
500-
alpha' = prevScore alpha
501-
beta' = nextScore beta
504+
alpha' = min good (prevScore alpha)
505+
beta' = max good (nextScore beta)
502506
in if maximize
503507
then (beta', max beta' (beta' + width'))
504508
else (min alpha' (alpha' - width'), alpha')
505509

506-
prevInterval :: (Score, Score) -> (Score, Score)
507-
prevInterval (alpha, beta) =
510+
prevInterval :: Score -> (Score, Score) -> (Score, Score)
511+
prevInterval bad (alpha, beta) =
508512
let width = (beta - alpha)
509513
width' = selectScale width `scaleScore` width
510-
alpha' = prevScore alpha
511-
beta' = nextScore beta
514+
alpha' = min bad (prevScore alpha)
515+
beta' = max bad (nextScore beta)
512516
in if minimize
513517
then (beta', max beta' (beta' + width'))
514518
else (min alpha' (alpha' - width'), alpha')
@@ -518,24 +522,26 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
518522
-> Maybe DepthIterationOutput -- ^ Results of previous depth iteration
519523
-> [PossibleMove]
520524
-> DepthParams
525+
-> TVar (Score, Score) -- ^ Global (alpha, beta)
521526
-> (Score, Score) -- ^ (Alpha, Beta)
522527
-> Checkers DepthIterationOutput
523-
widthController allowNext allowPrev prevResult moves dp interval@(alpha,beta) =
528+
widthController allowNext allowPrev prevResult moves dp globalInterval localInterval = do
529+
interval@(alpha, beta) <- getRestrictedInterval globalInterval localInterval
524530
if alpha == beta
525531
then do
526532
$info "Empty scores interval: [{}]. We have to think that all moves have this score." (Single alpha)
527533
return [(move, alpha) | move <- moves]
528534
else do
529-
results <- widthIteration prevResult moves dp interval
530-
let (good, badScore, badMoves) = selectBestEdge interval moves results
535+
results <- widthIteration prevResult moves dp globalInterval interval
536+
let (goodScore, good, badScore, badMoves) = selectBestEdge interval moves results
531537
(bestMoves, bestResults) = unzip good
532538
if length badMoves == length moves
533539
then
534540
if allowPrev
535541
then do
536-
let interval' = prevInterval interval
542+
let interval' = prevInterval badScore interval
537543
$info "All moves are `too bad'; consider worse scores interval: [{} - {}]" interval'
538-
widthController False True prevResult badMoves dp interval'
544+
widthController False True prevResult badMoves dp globalInterval interval'
539545
else do
540546
$info "All moves are `too bad' ({}), but we have already checked worse interval; so this is the real score." (Single badScore)
541547
return [(move, badScore) | move <- moves]
@@ -548,15 +554,20 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
548554
_ ->
549555
if allowNext
550556
then do
551-
let interval'@(alpha',beta') = nextInterval interval
557+
let interval'@(alpha',beta') = nextInterval goodScore interval
552558
$info "Some moves ({} of them) are `too good'; consider better scores interval: [{} - {}]" (length bestMoves, alpha', beta')
553-
widthController True False prevResult bestMoves dp interval'
559+
widthController True False prevResult bestMoves dp globalInterval interval'
554560
else do
555561
$info "Some moves ({} of them) are `too good'; but we have already checked better interval; so this is the real score" (Single $ length bestMoves)
556562
return bestResults
557563

558-
scoreMoves :: Bool -> [PossibleMove] -> DepthParams -> (Score, Score) -> Checkers [Either Error (PossibleMove, Score)]
559-
scoreMoves byOne moves dp (alpha, beta) = do
564+
scoreMoves :: Bool
565+
-> [PossibleMove]
566+
-> DepthParams
567+
-> TVar (Score, Score) -- ^ Global interval
568+
-> (Score, Score) -- ^ Local interval
569+
-> Checkers [Either Error (PossibleMove, Score)]
570+
scoreMoves byOne moves dp globalInterval (localAlpha, localBeta) = do
560571
let var = aichData handle
561572
let processor = aichProcessor handle
562573
let inputs = [
@@ -568,8 +579,9 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
568579
smiDepth = dp,
569580
smiBoard = board,
570581
smiMove = move,
571-
smiAlpha = alpha,
572-
smiBeta = beta
582+
smiGlobalInterval = globalInterval,
583+
smiAlpha = localAlpha,
584+
smiBeta = localBeta
573585
} | move <- moves ]
574586

575587
n = length moves
@@ -580,18 +592,29 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
580592

581593
results <- process' processor groups
582594
return $ concatE (map length groups) results
583-
584-
scoreMoves' :: Bool -> [PossibleMove] -> DepthParams -> (Score, Score) -> Checkers DepthIterationOutput
585-
scoreMoves' byOne moves dp (alpha, beta) = do
586-
results <- scoreMoves byOne moves dp (alpha, beta)
595+
596+
scoreMoves' :: Bool
597+
-> [PossibleMove]
598+
-> DepthParams
599+
-> TVar (Score, Score)
600+
-> (Score, Score)
601+
-> Checkers DepthIterationOutput
602+
scoreMoves' byOne moves dp globalInterval localInterval = do
603+
results <- scoreMoves byOne moves dp globalInterval localInterval
587604
case sequence results of
588605
Right result -> return result
589606
Left err -> throwError err
590607

591-
widthIteration :: Maybe DepthIterationOutput -> [PossibleMove] -> DepthParams -> (Score, Score) -> Checkers DepthIterationOutput
592-
widthIteration prevResult moves dp (alpha, beta) = do
608+
widthIteration :: Maybe DepthIterationOutput
609+
-> [PossibleMove]
610+
-> DepthParams
611+
-> TVar (Score, Score)
612+
-> (Score, Score)
613+
-> Checkers DepthIterationOutput
614+
widthIteration prevResult moves dp globalInterval localInterval = do
615+
(alpha, beta) <- getRestrictedInterval globalInterval localInterval
593616
$info "`- Considering scores interval: [{} - {}], depth = {}" (alpha, beta, dpTarget dp)
594-
results <- scoreMoves False moves dp (alpha, beta)
617+
results <- scoreMoves False moves dp globalInterval (alpha, beta)
595618
joinResults prevResult results
596619

597620
joinResults :: Maybe DepthIterationOutput -> [Either Error (PossibleMove, Score)] -> Checkers DepthIterationOutput
@@ -612,7 +635,14 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
612635
let (good, bad) = if maximize then (beta, alpha) else (alpha, beta)
613636
goodResults = [(move, (goodMoves, score)) | (move, (goodMoves, score)) <- zip moves results, score >= good]
614637
badResults = [move | (move, (_, score)) <- zip moves results, score <= bad]
615-
in (goodResults, bad, badResults)
638+
scores = map snd results
639+
badScore = if maximize
640+
then minimum scores
641+
else maximum scores
642+
goodScore = if maximize
643+
then maximum scores
644+
else minimum scores
645+
in (goodScore, goodResults, bad, badResults)
616646

617647
select :: DepthIterationOutput -> Checkers AiOutput
618648
select pairs = do
@@ -631,10 +661,11 @@ doScore :: (GameRules rules, Evaluator eval)
631661
-> Side
632662
-> DepthParams
633663
-> Board
664+
-> TVar (Score, Score)
634665
-> Score -- ^ Alpha
635666
-> Score -- ^ Beta
636667
-> Checkers Score
637-
doScore rules eval var params gameId side dp board alpha beta = do
668+
doScore rules eval var params gameId side dp board globalInterval alpha beta = do
638669
initState <- mkInitState
639670
out <- evalStateT (cachedScoreAB var params input) initState
640671
return $ soScore out
@@ -645,14 +676,36 @@ doScore rules eval var params gameId side dp board alpha beta = do
645676
let timeout = case abBaseTime params of
646677
Nothing -> Nothing
647678
Just sec -> Just $ TimeSpec (fromIntegral sec) 0
648-
return $ ScoreState rules eval gameId [loose] M.empty now timeout
679+
return $ ScoreState rules eval gameId globalInterval [loose] M.empty now timeout
649680

650681
clamp :: Ord a => a -> a -> a -> a
651682
clamp alpha beta score
652683
| score < alpha = alpha
653684
| score > beta = beta
654685
| otherwise = score
655686

687+
restrictInterval :: MonadIO m => TVar (Score, Score) -> Side -> Score -> m ()
688+
restrictInterval var side score = liftIO $ atomically $ do
689+
(globalAlpha, globalBeta) <- readTVar var
690+
when (globalAlpha < score && score < globalBeta) $
691+
if side == First -- maximize
692+
then writeTVar var (score, globalBeta)
693+
else writeTVar var (globalAlpha, score)
694+
695+
getRestrictedInterval :: (MonadIO m, HasLogger m, HasLogContext m) => TVar (Score, Score) -> (Score, Score) -> m (Score, Score)
696+
getRestrictedInterval global (localAlpha, localBeta) = do
697+
(globalAlpha, globalBeta) <- liftIO $ atomically $ readTVar global
698+
let alpha1 = max globalAlpha localAlpha
699+
beta1 = min globalBeta localBeta
700+
if alpha1 <= beta1
701+
then do
702+
$trace "Restrict: Global [{}, {}] x Local [{}, {}] => [{}, {}]"
703+
(globalAlpha, globalBeta, localAlpha, localBeta, alpha1, beta1)
704+
return (alpha1, beta1)
705+
else do
706+
let mid = (alpha1 + beta1) `divideScore` 2
707+
return (mid, mid)
708+
656709
-- | Calculate score of the board.
657710
-- This uses the cache. It is called in the recursive call also.
658711
cachedScoreAB :: forall rules eval. (GameRules rules, Evaluator eval)
@@ -676,7 +729,7 @@ cachedScoreAB var params input = do
676729
-- AB-section: alpha <= result <= beta. So here we clamp the value
677730
-- that we got from cache.
678731
case itemBound item of
679-
Exact -> return $ Just $ ScoreOutput (clamp alpha beta score) False
732+
Exact -> return $ Just $ ScoreOutput score False
680733
Alpha -> if score <= alpha
681734
then return $ Just $ ScoreOutput alpha False
682735
else return Nothing
@@ -776,16 +829,12 @@ scoreAB :: forall rules eval. (GameRules rules, Evaluator eval)
776829
-> ScoreInput
777830
-> ScoreM rules eval ScoreOutput
778831
scoreAB var params input
779-
| alpha == beta = do
780-
$verbose "Alpha == Beta == {}, return it" (Single $ show alpha)
781-
quiescene <- checkQuiescene
782-
return $ ScoreOutput alpha quiescene
783-
784832
| isTargetDepth dp = do
785833
-- target depth is achieved, calculate score of current board directly
786834
evaluator <- gets ssEvaluator
787835
let score0 = evalBoard' evaluator board
788-
$verbose " X Side: {}, A = {}, B = {}, score0 = {}" (show side, show alpha, show beta, show score0)
836+
(alpha, beta) <- getRestrictedInterval'
837+
$trace " X Side: {}, A = {}, B = {}, score0 = {}" (show side, show alpha, show beta, show score0)
789838
quiescene <- checkQuiescene
790839
return $ ScoreOutput score0 quiescene
791840

@@ -799,6 +848,7 @@ scoreAB var params input
799848
return out
800849
Nothing -> do
801850

851+
(alpha, beta) <- getRestrictedInterval'
802852
moves <- lift $ getPossibleMoves var side board
803853
let quiescene = isQuiescene moves
804854
let worst
@@ -811,7 +861,7 @@ scoreAB var params input
811861
$verbose "{}`—No moves left." (Single indent)
812862
return $ ScoreOutput worst True
813863
else
814-
if dpStaticMode dp && isQuiescene moves
864+
if dpStaticMode dp && quiescene
815865
-- In static mode, we are considering forced moves only.
816866
-- If we have reached a quiescene, then that's all.
817867
then do
@@ -822,25 +872,27 @@ scoreAB var params input
822872
let best
823873
| dpStaticMode dp = evalBoard' evaluator board
824874
| otherwise = worst
825-
826-
push best
827-
$verbose "{}V Side: {}, A = {}, B = {}" (indent, show side, show alpha, show beta)
828-
rules <- gets ssRules
829-
dp' <- updateDepth params moves dp
830-
let prevMove = siPrevMove input
831-
moves' <- sortMoves params var side dp board prevMove moves
832-
let depths = correspondingDepths (length moves') score0 quiescene dp'
833-
-- let depths = repeat dp'
834-
out <- iterateMoves $ zip3 [1..] moves' depths
835-
pop
836-
return out
875+
if alpha == beta
876+
then return $ ScoreOutput best quiescene
877+
else do
878+
push best
879+
$verbose "{}V Side: {}, A = {}, B = {}" (indent, show side, show alpha, show beta)
880+
rules <- gets ssRules
881+
dp' <- updateDepth params moves dp
882+
let prevMove = siPrevMove input
883+
moves' <- sortMoves params var side dp board prevMove moves
884+
let depths = correspondingDepths (length moves') score0 quiescene dp'
885+
-- let depths = repeat dp'
886+
out <- iterateMoves $ zip3 [1..] moves' depths
887+
pop
888+
return out
837889

838890
where
839891

840892
side = siSide input
841893
dp = siDepth input
842-
alpha = siAlpha input
843-
beta = siBeta input
894+
localAlpha = siAlpha input
895+
localBeta = siBeta input
844896
board = siBoard input
845897

846898
canReduceDepth :: Score -> Bool -> Bool
@@ -849,8 +901,8 @@ scoreAB var params input
849901
not (dpReductedMode dp) &&
850902
dpCurrent dp >= 4 &&
851903
quiescene &&
852-
score0 > alpha &&
853-
score0 < beta &&
904+
score0 > localAlpha &&
905+
score0 < localBeta &&
854906
score0 > -10 &&
855907
score0 < 10
856908

@@ -868,6 +920,7 @@ scoreAB var params input
868920
checkFutility = do
869921
evaluator <- gets ssEvaluator
870922
quiescene <- checkQuiescene
923+
(alpha, beta) <- getRestrictedInterval'
871924
let score0 = evalBoard' evaluator board
872925
best = if maximize then alpha else beta
873926
isBad = if maximize
@@ -883,6 +936,11 @@ scoreAB var params input
883936
then return $ Just $ ScoreOutput score0 quiescene
884937
else return Nothing
885938

939+
getRestrictedInterval' = do
940+
globalInterval <- gets ssGlobalInterval
941+
result@(alpha, beta) <- getRestrictedInterval globalInterval (localAlpha, localBeta)
942+
return result
943+
886944
evalBoard' :: eval -> Board -> Score
887945
evalBoard' evaluator board = result
888946
where
@@ -964,6 +1022,7 @@ scoreAB var params input
9641022
go (input : inputs) = do
9651023
out <- cachedScoreAB var params input
9661024
let score = soScore out
1025+
(alpha, beta) <- getRestrictedInterval'
9671026
if maximize && score >= beta || minimize && score <= alpha
9681027
then go inputs
9691028
else return out
@@ -984,6 +1043,7 @@ scoreAB var params input
9841043
evaluator <- gets ssEvaluator
9851044
rules <- gets ssRules
9861045
best <- getBest
1046+
let (alpha, beta) = (localAlpha, localBeta)
9871047
let input' = input {
9881048
siSide = opposite side
9891049
, siAlpha = if maximize

0 commit comments

Comments
 (0)